From: Julian Seward Date: Tue, 4 Apr 2006 15:12:44 +0000 (+0000) Subject: Cleanup/restructure m_debuginfo, as described in X-Git-Tag: svn/VALGRIND_3_2_0~136 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=27b95f28a53b701fbab5f28cbe2a0846c46cb3d7;p=thirdparty%2Fvalgrind.git Cleanup/restructure m_debuginfo, as described in coregrind/m_debuginfo/README.txt. git-svn-id: svn://svn.valgrind.org/valgrind/trunk@5823 --- diff --git a/coregrind/Makefile.am b/coregrind/Makefile.am index 4c830d2703..4727318dd6 100644 --- a/coregrind/Makefile.am +++ b/coregrind/Makefile.am @@ -114,8 +114,10 @@ noinst_HEADERS = \ vki_unistd-ppc64-linux.h\ vki_unistd-x86-linux.h \ m_coredump/priv_elf.h \ - m_debuginfo/priv_symtab.h \ - m_debuginfo/priv_symtypes.h \ + m_debuginfo/priv_storage.h \ + m_debuginfo/priv_readstabs.h \ + m_debuginfo/priv_readdwarf.h \ + m_debuginfo/priv_readelf.h \ m_demangle/ansidecl.h \ m_demangle/dyn-string.h \ m_demangle/demangle.h \ @@ -165,10 +167,11 @@ COREGRIND_SOURCES_COMMON = \ m_transtab.c \ m_ume.c \ m_aspacemgr/aspacemgr.c \ - m_debuginfo/dwarf.c \ - m_debuginfo/stabs.c \ - m_debuginfo/symtab.c \ - m_debuginfo/symtypes.c \ + m_debuginfo/storage.c \ + m_debuginfo/readdwarf.c \ + m_debuginfo/readstabs.c \ + m_debuginfo/readelf.c \ + m_debuginfo/debuginfo.c \ m_demangle/cp-demangle.c \ m_demangle/cplus-dem.c \ m_demangle/demangle.c \ @@ -301,3 +304,7 @@ all-local: rm -f $(inplacedir)/$$p/$$n; \ ln -f -s ../../$(subdir)/$$f $(inplacedir)/$$p/$$n; \ done + +EXTRA_DIST = \ + m_debuginfo/UNUSED_STABS.txt \ + m_debuginfo/README.txt diff --git a/coregrind/m_debuginfo/README.txt b/coregrind/m_debuginfo/README.txt new file mode 100644 index 0000000000..ea96ba9569 --- /dev/null +++ b/coregrind/m_debuginfo/README.txt @@ -0,0 +1,57 @@ + +On 4 Apr 06, the debuginfo reader (m_debuginfo) was majorly cleaned up +and restructured. It has been a bit of a tangle for a while. The new +structure looks like this: + + debuginfo.c + + readelf.c + + readdwarf.c readstabs.c + + storage.c + +Each .c can only call those below it on the page. + +storage.c contains the SegInfo structure and stuff for +maintaining/searching arrays of symbols, line-numbers, and Dwarf CF +info records. + +readdwarf.c and readstabs.c parse the relevant kind of info and +call storage.c to store the results. + +readelf.c reads ELF format, hands syms directly to storage.c, +then delegates to readdwarf.c/readstabs.c for debug info. All +straightforward. + +debuginfo.c is the top-level file, and is quite small. + +There are 3 goals to this: + +(1) Generally tidy up something which needs tidying up + +(2) Introduce more modularity, so as to make it easier to add + readers for other formats, if needed + +(3) Simplify the stabs reader. + +Rationale for (1) and (2) are obvious. + +Re (3), the stabs reader has for a good year contained a sophisticated +and impressive parser for stabs strings, with the aim of recording in +detail the types of variables (I think) (Jeremy's work). Unfortunately +that has caused various segfaults reading stabs info in the past few months +(#77869, #117936, #119914, #120345 and another to do with deeply nested +template types). + +The worst thing is that it is the stabs type reader that is crashing, +not the stabs line-number reader, but the type info is only used by +Helgrind, which is looking pretty dead at the moment. So I have lifed +out the type-reader code and put it in UNUSED_STABS.txt for safe +storage, just leaving the line-number reader in place. + +If Helgrind ever does come back to life we will need to reinstate the +type storage/reader stuff but with DWARF as its primary target. +Placing the existing stabs type-reader in hibernation improves +stability whilst retaining the development effort/expertise that went +into it for possible future reinstatement. diff --git a/coregrind/m_debuginfo/symtypes.c b/coregrind/m_debuginfo/UNUSED_STABS.txt similarity index 66% rename from coregrind/m_debuginfo/symtypes.c rename to coregrind/m_debuginfo/UNUSED_STABS.txt index 323815353f..3879026151 100644 --- a/coregrind/m_debuginfo/symtypes.c +++ b/coregrind/m_debuginfo/UNUSED_STABS.txt @@ -1076,3 +1076,408 @@ Char *VG_(describe_addr)(ThreadId tid, Addr addr) /*--------------------------------------------------------------------*/ /*--- end ---*/ /*--------------------------------------------------------------------*/ + +/*--------------------------------------------------------------------*/ +/*--- Header for symbol table stuff. priv_symtab.h ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2000-2005 Julian Seward + jseward@acm.org + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#ifndef __PRIV_SYMTAB_H +#define __PRIV_SYMTAB_H + +/* A structure to hold an ELF symbol (very crudely). */ +typedef + struct { + Addr addr; /* lowest address of entity */ + UInt size; /* size in bytes */ + Char *name; /* name */ + Addr tocptr; /* ppc64-linux only: value that R2 should have */ + } + RiSym; + +/* Line count at which overflow happens, due to line numbers being stored as + * shorts in `struct nlist' in a.out.h. */ +#define LINENO_OVERFLOW (1 << (sizeof(short) * 8)) + +#define LINENO_BITS 20 +#define LOC_SIZE_BITS (32 - LINENO_BITS) +#define MAX_LINENO ((1 << LINENO_BITS) - 1) + +/* Unlikely to have any lines with instruction ranges > 4096 bytes */ +#define MAX_LOC_SIZE ((1 << LOC_SIZE_BITS) - 1) + +/* Number used to detect line number overflows; if one line is 60000-odd + * smaller than the previous, is was probably an overflow. + */ +#define OVERFLOW_DIFFERENCE (LINENO_OVERFLOW - 5000) + +/* A structure to hold addr-to-source info for a single line. There can be a + * lot of these, hence the dense packing. */ +typedef + struct { + /* Word 1 */ + Addr addr; /* lowest address for this line */ + /* Word 2 */ + UShort size:LOC_SIZE_BITS; /* byte size; we catch overflows of this */ + UInt lineno:LINENO_BITS; /* source line number, or zero */ + /* Word 3 */ + Char* filename; /* source filename */ + /* Word 4 */ + Char* dirname; /* source directory name */ + } + RiLoc; + + +/* A structure to hold a set of variables in a particular scope */ +typedef struct _Scope Scope; /* a set of symbols in one scope */ +typedef struct _Sym Sym; /* a single symbol */ +typedef struct _ScopeRange ScopeRange; /* a range of code addreses a scope covers */ + +typedef enum { + SyESPrel, /* on the stack (relative to ESP) */ + SyEBPrel, /* on the stack (relative to EBP) */ + SyReg, /* in a register */ + SyType, /* a type definition */ + SyStatic, /* a static variable */ + SyGlobal, /* a global variable (XXX any different to static + in an outer scope?) */ +} SyKind; + +struct _Sym { + SymType *type; /* type */ + Char *name; /* name */ + SyKind kind; /* kind of symbol */ + + /* a value, depending on kind */ + union { + OffT offset; /* offset on stack (-ve -> ebp; +ve -> esp) */ + Int regno; /* register number */ + Addr addr; /* static or global address */ + } u; +}; + +struct _Scope { + Scope *outer; /* outer (containing) scope */ + UInt nsyms; /* number of symbols in this scope */ + UInt depth; /* depth of scope */ + Sym *syms; /* the symbols */ +}; + +/* A structure to map a scope to a range of code addresses; scopes may + be broken into multiple ranges (before and after a nested scope) */ +struct _ScopeRange { + Addr addr; /* start address of this scope */ + Int size; /* length of scope */ + Scope *scope; /* symbols in scope */ +}; + +#define STRCHUNKSIZE (64*1024) + + +/* A structure to summarise CFI summary info for the code address + range [base .. base+len-1]. In short, if you know (sp,fp,ip) at + some point and ip is in the range [base .. base+len-1], it tells + you how to calculate (sp,fp) for the caller of the current + frame and also ra, the return address of the current frame. + + First off, calculate CFA, the Canonical Frame Address, thusly: + + cfa = if cfa_sprel then sp+cfa_off else fp+cfa_off + + Once that is done, the previous frame's sp/fp values and this + frame's ra value can be calculated like this: + + old_sp/fp/ra + = case sp/fp/ra_how of + CFIR_UNKNOWN -> we don't know, sorry + CFIR_SAME -> same as it was before (sp/fp only) + CFIR_CFAREL -> cfa + sp/fp/ra_off + CFIR_MEMCFAREL -> *( cfa + sp/fp/ra_off ) +*/ + +#define CFIR_UNKNOWN ((UChar)0) +#define CFIR_SAME ((UChar)1) +#define CFIR_CFAREL ((UChar)2) +#define CFIR_MEMCFAREL ((UChar)3) + +typedef + struct { + Addr base; + UInt len; + Bool cfa_sprel; + UChar ra_how; /* a CFIR_ value */ + UChar sp_how; /* a CFIR_ value */ + UChar fp_how; /* a CFIR_ value */ + Int cfa_off; + Int ra_off; + Int sp_off; + Int fp_off; + } + CfiSI; + +extern void ML_(ppCfiSI) ( CfiSI* ); + + +/* A structure which contains information pertaining to one mapped + text segment. This type is exported only abstractly - in + pub_tool_debuginfo.h. */ +struct _SegInfo { + struct _SegInfo* next; /* list of SegInfos */ + + Int ref; + + /* Description of the mapped segment. */ + Addr start; + UInt size; + Char* filename; /* in mallocville */ + OffT foffset; + Char* soname; + + /* An expandable array of symbols. */ + RiSym* symtab; + UInt symtab_used; + UInt symtab_size; + /* An expandable array of locations. */ + RiLoc* loctab; + UInt loctab_used; + UInt loctab_size; + /* An expandable array of scope ranges. */ + ScopeRange *scopetab; + UInt scopetab_used; + UInt scopetab_size; + /* An expandable array of CFI summary info records. Also includes + summary address bounds, showing the min and max address covered + by any of the records, as an aid to fast searching. */ + CfiSI* cfisi; + UInt cfisi_used; + UInt cfisi_size; + Addr cfisi_minaddr; + Addr cfisi_maxaddr; + + /* Expandable arrays of characters -- the string table. + Pointers into this are stable (the arrays are not reallocated) + */ + struct strchunk { + UInt strtab_used; + struct strchunk *next; + Char strtab[STRCHUNKSIZE]; + } *strchunks; + + /* offset is what we need to add to symbol table entries + to get the real location of that symbol in memory. + */ + OffT offset; + + /* Bounds of data, BSS, PLT, GOT and OPD (for ppc64-linux) so that + tools can see what section an address is in. In the running image! */ + Addr plt_start_vma; + UInt plt_size; + Addr got_start_vma; + UInt got_size; + Addr opd_start_vma; + UInt opd_size; + Addr data_start_vma; + UInt data_size; + Addr bss_start_vma; + UInt bss_size; + + /* data used by stabs parser */ + struct _StabTypeTab *stab_typetab; +}; + +extern +Char *ML_(addStr) ( SegInfo* si, Char* str, Int len ); + +extern +void ML_(addScopeInfo) ( SegInfo* si, Addr this, Addr next, Scope *scope); + +extern +void ML_(addLineInfo) ( SegInfo* si, + Char* filename, + Char* dirname, /* NULL is allowable */ + Addr this, Addr next, Int lineno, Int entry); + +extern +void ML_(addCfiSI) ( SegInfo* si, CfiSI* cfisi ); + +/* Non-fatal -- use vg_panic if terminal. */ +extern +void ML_(symerr) ( Char* msg ); + +/* -------------------- + Stabs reader + -------------------- */ +extern +void ML_(read_debuginfo_stabs) ( SegInfo* si, + UChar* stabC, Int stab_sz, + UChar* stabstr, Int stabstr_sz ); + +/* -------------------- + DWARF2 reader + -------------------- */ +extern +void ML_(read_debuginfo_dwarf2) + ( SegInfo* si, + UChar* debuginfo, Int debug_info_sz, /* .debug_info */ + UChar* debugabbrev, /* .debug_abbrev */ + UChar* debugline, Int debug_line_sz, /* .debug_line */ + UChar* debugstr ); + +/* -------------------- + DWARF1 reader + -------------------- */ +extern +void ML_(read_debuginfo_dwarf1) ( SegInfo* si, + UChar* dwarf1d, Int dwarf1d_sz, + UChar* dwarf1l, Int dwarf1l_sz ); + +/* -------------------- + CFI reader + -------------------- */ +extern +void ML_(read_callframe_info_dwarf2) + ( /*OUT*/SegInfo* si, UChar* ehframe, Int ehframe_sz, Addr ehframe_addr ); + + +#endif // __PRIV_SYMTAB_H + +/*--------------------------------------------------------------------*/ +/*--- end ---*/ +/*--------------------------------------------------------------------*/ + +/*--------------------------------------------------------------------*/ +/*--- Intra-Valgrind interfaces for symtypes.c. priv_symtypes.h ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2000-2005 Julian Seward + jseward@acm.org + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#ifndef __PRIV_SYMTYPES_H +#define __PRIV_SYMTYPES_H + +/* Lets try to make these opaque */ +typedef struct _SymType SymType; + +/* ------------------------------------------------------------ + Constructors for various SymType nodes + ------------------------------------------------------------ */ + +/* Find the basetype for a given type: that is, if type is a typedef, + return the typedef'd type. If resolve is true, it will resolve + unresolved symbols. If type is not a typedef, then this is just + returns type. +*/ +SymType *ML_(st_basetype)(SymType *type, Bool resolve); + +void ML_(st_setname)(SymType *ty, Char *name); + +typedef void (SymResolver)(SymType *, void *); + +/* Create an unresolved type */ +SymType *ML_(st_mkunresolved)(SymType *, SymResolver *resolve, void *data); + +/* update an unresolved type's data */ +void ML_(st_unresolved_setdata)(SymType *, SymResolver *resolve, void *data); + +Bool ML_(st_isresolved)(SymType *); +UInt ML_(st_sizeof)(SymType *); + +/* Unknown type (unparsable) */ +SymType *ML_(st_mkunknown)(SymType *); + +SymType *ML_(st_mkvoid)(SymType *); + +SymType *ML_(st_mkint)(SymType *, UInt size, Bool isSigned); +SymType *ML_(st_mkbool)(SymType *, UInt size); +SymType *ML_(st_mkchar)(SymType *, Bool isSigned); +SymType *ML_(st_mkfloat)(SymType *, UInt size); +SymType *ML_(st_mkdouble)(SymType *, UInt size); + +SymType *ML_(st_mkpointer)(SymType *, SymType *); +SymType *ML_(st_mkrange)(SymType *, SymType *, Int min, Int max); + +SymType *ML_(st_mkstruct)(SymType *, UInt size, UInt nfields); +SymType *ML_(st_mkunion)(SymType *, UInt size, UInt nfields); +void ML_(st_addfield)(SymType *, Char *name, SymType *, UInt off, UInt size); + +SymType *ML_(st_mkenum)(SymType *, UInt ntags); +SymType *ML_(st_addtag)(SymType *, Char *name, Int val); + +SymType *ML_(st_mkarray)(SymType *, SymType *idxtype, SymType *artype); + +SymType *ML_(st_mktypedef)(SymType *, Char *name, SymType *type); + +Bool ML_(st_isstruct)(SymType *); +Bool ML_(st_isunion)(SymType *); +Bool ML_(st_isenum)(SymType *); + +/* ------------------------------------------------------------ + Interface with symtab.c + ------------------------------------------------------------ */ + +/* Typed value */ +typedef struct _Variable Variable; + +struct _Variable { + Char *name; /* name */ + SymType *type; /* type of value */ + Addr valuep; /* pointer to value */ + UInt size; /* size of value */ + UInt distance; /* "distance" from site of interest */ + Variable *next; + Variable *container; +}; + +Variable *ML_(get_scope_variables)(ThreadId tid); + +#endif // __PRIV_SYMTYPES_H + +/*--------------------------------------------------------------------*/ +/*--- end ---*/ +/*--------------------------------------------------------------------*/ diff --git a/coregrind/m_debuginfo/debuginfo.c b/coregrind/m_debuginfo/debuginfo.c new file mode 100644 index 0000000000..d4d34f10e2 --- /dev/null +++ b/coregrind/m_debuginfo/debuginfo.c @@ -0,0 +1,935 @@ + +/*--------------------------------------------------------------------*/ +/*--- Top level management of symbols and debugging information. ---*/ +/*--- debuginfo.c ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2000-2006 Julian Seward + jseward@acm.org + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ +/* + Stabs reader greatly improved by Nick Nethercote, Apr 02. + This module was also extensively hacked on by Jeremy Fitzhardinge + and Tom Hughes. +*/ + +#include "pub_core_basics.h" +#include "pub_core_threadstate.h" +#include "pub_core_debuginfo.h" /* self */ +#include "pub_core_demangle.h" +#include "pub_core_libcbase.h" +#include "pub_core_libcassert.h" +#include "pub_core_libcprint.h" +#include "pub_core_mallocfree.h" +#include "pub_core_options.h" +#include "pub_core_redir.h" // VG_(redir_notify_{new,delete}_SegInfo) +#include "pub_core_aspacemgr.h" +#include "priv_storage.h" +#include "priv_readdwarf.h" +#include "priv_readstabs.h" +#include "priv_readelf.h" + + +/*------------------------------------------------------------*/ +/*--- Root structure ---*/ +/*------------------------------------------------------------*/ + +/* The root structure for the entire symbol table system. It is a + linked list of SegInfos. Note that this entire mechanism assumes + that what we read from /proc/self/maps doesn't contain overlapping + address ranges, and as a result the SegInfos in this list describe + disjoint address ranges. +*/ +static SegInfo* segInfo_list = NULL; + + +/*------------------------------------------------------------*/ +/*--- Notification (acquire/discard) helpers ---*/ +/*------------------------------------------------------------*/ + +/* Allocate and zero out a new SegInfo record. */ +static +SegInfo* alloc_SegInfo(Addr start, SizeT size, OffT foffset, + const HChar* filename) +{ + SegInfo* si = VG_(arena_calloc)(VG_AR_SYMTAB, 1, sizeof(SegInfo)); + + si->start = start; + si->size = size; + si->foffset = foffset; + si->filename = VG_(arena_strdup)(VG_AR_SYMTAB, filename); + + // Everything else -- pointers, sizes, arrays -- is zeroed by calloc. + return si; +} + + +/* Free a SegInfo, and also all the stuff hanging off it. */ +static void free_SegInfo ( SegInfo* si ) +{ + struct strchunk *chunk, *next; + vg_assert(si != NULL); + if (si->filename) VG_(arena_free)(VG_AR_SYMTAB, si->filename); + if (si->symtab) VG_(arena_free)(VG_AR_SYMTAB, si->symtab); + if (si->loctab) VG_(arena_free)(VG_AR_SYMTAB, si->loctab); + if (si->cfsi) VG_(arena_free)(VG_AR_SYMTAB, si->cfsi); + + for (chunk = si->strchunks; chunk != NULL; chunk = next) { + next = chunk->next; + VG_(arena_free)(VG_AR_SYMTAB, chunk); + } + VG_(arena_free)(VG_AR_SYMTAB, si); +} + + +/* 'si' is a member of segInfo_list. Find it, remove it from the + list, notify m_redir that this has happened, and free all storage + reachable from it. +*/ +static void discard_SegInfo ( SegInfo* si ) +{ + SegInfo** prev_next_ptr = &segInfo_list; + SegInfo* curr = segInfo_list; + + while (curr) { + if (curr == si) { + // Found it; remove from list and free it. + if (VG_(clo_verbosity) > 1 || VG_(clo_trace_redir)) + VG_(message)(Vg_DebugMsg, + "Discarding syms at %p-%p in %s due to munmap()", + si->start, si->start + si->size, + curr->filename ? curr->filename : (UChar*)"???"); + vg_assert(*prev_next_ptr == curr); + *prev_next_ptr = curr->next; + VG_(redir_notify_delete_SegInfo)( curr ); + free_SegInfo(curr); + return; + } + prev_next_ptr = &curr->next; + curr = curr->next; + } + + // Not found. +} + + +/* Repeatedly scan segInfo_list, looking for segInfos intersecting + [start,start+length), and call discard_SegInfo to get rid of them. + This modifies the list, hence the multiple iterations. + JRS 20060401: I don't understand that last sentence. */ +static void discard_syms_in_range ( Addr start, SizeT length ) +{ + Bool found; + SegInfo* curr; + + while (True) { + found = False; + + curr = segInfo_list; + while (True) { + if (curr == NULL) + break; + if (start+length-1 < curr->start + || curr->start+curr->size-1 < start) { + /* no overlap */ + } else { + found = True; + break; + } + curr = curr->next; + } + + if (!found) break; + discard_SegInfo( curr ); + } +} + + +/* Create a new SegInfo with the specific address/length/vma offset, + then snarf whatever info we can from the given filename into it. */ +static +SegInfo* acquire_syms_for_range( Addr seg_addr, SizeT seg_len, + OffT seg_offset, const Char* seg_filename) +{ + SegInfo* si = alloc_SegInfo(seg_addr, seg_len, seg_offset, seg_filename); + + if (! ML_(read_elf_debug_info) ( si )) { + // Something went wrong (eg. bad ELF file). + free_SegInfo( si ); + si = NULL; + + } else { + // Prepend si to segInfo_list + si->next = segInfo_list; + segInfo_list = si; + + ML_(canonicaliseTables) ( si ); + + /* notify m_redir about it */ + VG_(redir_notify_new_SegInfo)( si ); + } + + return si; +} + + +/*------------------------------------------------------------*/ +/*--- ---*/ +/*--- TOP LEVEL: NOTIFICATION (ACQUIRE/DISCARD INFO) ---*/ +/*--- ---*/ +/*------------------------------------------------------------*/ + +/* The debug info system is driven by notifications that a text + segment has been mapped in, or unmapped. When that happens it + tries to acquire/discard whatever info is available for the + corresponding object. This section contains the notification + handlers. */ + +/* Notify the debuginfo system about a new mapping. This is the way + new debug information gets loaded. If allow_SkFileV is True, it + will try load debug info if the mapping at 'a' belongs to Valgrind; + whereas normally (False) it will not do that. This allows us to + carefully control when the thing will read symbols from the + Valgrind executable itself. */ + +void VG_(di_notify_mmap)( Addr a, Bool allow_SkFileV ) +{ + NSegment* seg; + HChar* filename; + Bool ok; + + /* If this mapping is at the beginning of a file, isn't part of + Valgrind, is at least readable and seems to contain an object + file, then try reading symbols from it. + + Getting this heuristic right is critical. On x86-linux, objects + are typically mapped twice: + + 1b8fb000-1b8ff000 r-xp 00000000 08:02 4471477 vgpreload_memcheck.so + 1b8ff000-1b900000 rw-p 00004000 08:02 4471477 vgpreload_memcheck.so + + whereas ppc32-linux mysteriously does this: + + 118a6000-118ad000 r-xp 00000000 08:05 14209428 vgpreload_memcheck.so + 118ad000-118b6000 ---p 00007000 08:05 14209428 vgpreload_memcheck.so + 118b6000-118bd000 rwxp 00000000 08:05 14209428 vgpreload_memcheck.so + + The third mapping should not be considered to have executable + code in. Therefore a test which works for both is: r and x and + NOT w. Reading symbols from the rwx segment -- which overlaps + the r-x segment in the file -- causes the redirection mechanism + to redirect to addresses in that third segment, which is wrong + and causes crashes. + + ------ + JRS 28 Dec 05: unfortunately icc 8.1 on x86 has been seen to + produce executables with a single rwx segment rather than a + (r-x,rw-) pair. That means the rules have to be modified thusly: + + x86-linux: consider if r and x + all others: consider if r and x and NOT w + */ +# if defined(VGP_x86_linux) + Bool require_no_W = False; +# else + Bool require_no_W = True; +# endif + + seg = VG_(am_find_nsegment)(a); + vg_assert(seg); + + filename = VG_(am_get_filename)( seg ); + if (!filename) + return; + + filename = VG_(arena_strdup)( VG_AR_SYMTAB, filename ); + + ok = (seg->kind == SkFileC || (seg->kind == SkFileV && allow_SkFileV)) + && seg->offset == 0 + && seg->fnIdx != -1 + && seg->hasR + && seg->hasX + && (require_no_W ? (!seg->hasW) : True) + && ML_(is_elf_object_file)( (const void*)seg->start ); + + if (!ok) { + VG_(arena_free)(VG_AR_SYMTAB, filename); + return; + } + + /* Dump any info previously associated with the range. */ + discard_syms_in_range( seg->start, seg->end + 1 - seg->start ); + + /* .. and acquire new info. */ + acquire_syms_for_range( seg->start, seg->end + 1 - seg->start, + seg->offset, filename ); + + /* acquire_syms_for_range makes its own copy of filename, so is + safe to free it. */ + VG_(arena_free)(VG_AR_SYMTAB, filename); +} + + +/* Unmap is simpler - throw away any SegInfos intersecting + [a, a+len). */ +void VG_(di_notify_munmap)( Addr a, SizeT len ) +{ + discard_syms_in_range(a, len); +} + + +/* Uh, this doesn't do anything at all. IIRC glibc (or ld.so, I don't + remember) does a bunch of mprotects on itself, and if we follow + through here, it causes the debug info for that object to get + discarded. */ +void VG_(di_notify_mprotect)( Addr a, SizeT len, UInt prot ) +{ + Bool exe_ok = toBool(prot & VKI_PROT_EXEC); +# if defined(VGP_x86_linux) + exe_ok = exe_ok || toBool(prot & VKI_PROT_READ); +# endif + if (0 && !exe_ok) + discard_syms_in_range(a, len); +} + + +/*------------------------------------------------------------*/ +/*--- ---*/ +/*--- TOP LEVEL: QUERYING EXISTING DEBUG INFO ---*/ +/*--- ---*/ +/*------------------------------------------------------------*/ + +/*------------------------------------------------------------*/ +/*--- Use of symbol table & location info to create ---*/ +/*--- plausible-looking stack dumps. ---*/ +/*------------------------------------------------------------*/ + +/* Search all symtabs that we know about to locate ptr. If found, set + *psi to the relevant SegInfo, and *symno to the symtab entry number + within that. If not found, *psi is set to NULL. */ +static void search_all_symtabs ( Addr ptr, /*OUT*/SegInfo** psi, + /*OUT*/Int* symno, + Bool match_anywhere_in_fun ) +{ + Int sno; + SegInfo* si; + + for (si = segInfo_list; si != NULL; si = si->next) { + if (si->start <= ptr && ptr < si->start+si->size) { + sno = ML_(search_one_symtab) ( si, ptr, match_anywhere_in_fun ); + if (sno == -1) goto not_found; + *symno = sno; + *psi = si; + return; + } + } + not_found: + *psi = NULL; +} + + +/* Search all loctabs that we know about to locate ptr. If found, set + *psi to the relevant SegInfo, and *locno to the loctab entry number + within that. If not found, *psi is set to NULL. +*/ +static void search_all_loctabs ( Addr ptr, /*OUT*/SegInfo** psi, + /*OUT*/Int* locno ) +{ + Int lno; + SegInfo* si; + + for (si = segInfo_list; si != NULL; si = si->next) { + if (si->start <= ptr && ptr < si->start+si->size) { + lno = ML_(search_one_loctab) ( si, ptr ); + if (lno == -1) goto not_found; + *locno = lno; + *psi = si; + return; + } + } + not_found: + *psi = NULL; +} + + +/* The whole point of this whole big deal: map a code address to a + plausible symbol name. Returns False if no idea; otherwise True. + Caller supplies buf and nbuf. If demangle is False, don't do + demangling, regardless of VG_(clo_demangle) -- probably because the + call has come from VG_(get_fnname_nodemangle)(). */ +static +Bool get_fnname ( Bool demangle, Addr a, Char* buf, Int nbuf, + Bool match_anywhere_in_fun, Bool show_offset) +{ + SegInfo* si; + Int sno; + Int offset; + + search_all_symtabs ( a, &si, &sno, match_anywhere_in_fun ); + if (si == NULL) + return False; + if (demangle) { + VG_(demangle) ( True/*do C++ demangle*/, + si->symtab[sno].name, buf, nbuf ); + } else { + VG_(strncpy_safely) ( buf, si->symtab[sno].name, nbuf ); + } + + offset = a - si->symtab[sno].addr; + if (show_offset && offset != 0) { + Char buf2[12]; + Char* symend = buf + VG_(strlen)(buf); + Char* end = buf + nbuf; + Int len; + + len = VG_(sprintf)(buf2, "%c%d", + offset < 0 ? '-' : '+', + offset < 0 ? -offset : offset); + vg_assert(len < (Int)sizeof(buf2)); + + if (len < (end - symend)) { + Char *cp = buf2; + VG_(memcpy)(symend, cp, len+1); + } + } + + return True; +} + +/* ppc64-linux only: find the TOC pointer (R2 value) that should be in + force at the entry point address of the function containing + guest_code_addr. Returns 0 if not known. */ +Addr VG_(get_tocptr) ( Addr guest_code_addr ) +{ + SegInfo* si; + Int sno; + search_all_symtabs ( guest_code_addr, + &si, &sno, True/*match_anywhere_in_fun*/ ); + if (si == NULL) + return 0; + else + return si->symtab[sno].tocptr; +} + +/* This is available to tools... always demangle C++ names, + match anywhere in function, but don't show offsets. */ +Bool VG_(get_fnname) ( Addr a, Char* buf, Int nbuf ) +{ + return get_fnname ( /*demangle*/True, a, buf, nbuf, + /*match_anywhere_in_fun*/True, + /*show offset?*/False ); +} + +/* This is available to tools... always demangle C++ names, + match anywhere in function, and show offset if nonzero. */ +Bool VG_(get_fnname_w_offset) ( Addr a, Char* buf, Int nbuf ) +{ + return get_fnname ( /*demangle*/True, a, buf, nbuf, + /*match_anywhere_in_fun*/True, + /*show offset?*/True ); +} + +/* This is available to tools... always demangle C++ names, + only succeed if 'a' matches first instruction of function, + and don't show offsets. */ +Bool VG_(get_fnname_if_entry) ( Addr a, Char* buf, Int nbuf ) +{ + return get_fnname ( /*demangle*/True, a, buf, nbuf, + /*match_anywhere_in_fun*/False, + /*show offset?*/False ); +} + +/* This is only available to core... don't demangle C++ names, + match anywhere in function, and don't show offsets. */ +Bool VG_(get_fnname_nodemangle) ( Addr a, Char* buf, Int nbuf ) +{ + return get_fnname ( /*demangle*/False, a, buf, nbuf, + /*match_anywhere_in_fun*/True, + /*show offset?*/False ); +} + +/* This is only available to core... don't demangle C++ names, but do + do Z-demangling, match anywhere in function, and don't show + offsets. */ +Bool VG_(get_fnname_Z_demangle_only) ( Addr a, Char* buf, Int nbuf ) +{ +# define N_TMPBUF 4096 /* arbitrary, 4096 == ERRTXT_LEN */ + Char tmpbuf[N_TMPBUF]; + Bool ok; + vg_assert(nbuf > 0); + ok = get_fnname ( /*demangle*/False, a, tmpbuf, N_TMPBUF, + /*match_anywhere_in_fun*/True, + /*show offset?*/False ); + tmpbuf[N_TMPBUF-1] = 0; /* paranoia */ + if (!ok) + return False; + + /* We have something, at least. Try to Z-demangle it. */ + VG_(demangle)( False/*don't do C++ demangling*/, tmpbuf, buf, nbuf); + + buf[nbuf-1] = 0; /* paranoia */ + return True; +# undef N_TMPBUF +} + +/* Map a code address to the name of a shared object file or the executable. + Returns False if no idea; otherwise True. Doesn't require debug info. + Caller supplies buf and nbuf. */ +Bool VG_(get_objname) ( Addr a, Char* buf, Int nbuf ) +{ + SegInfo* si; + + for (si = segInfo_list; si != NULL; si = si->next) { + if (si->start <= a && a < si->start+si->size) { + VG_(strncpy_safely)(buf, si->filename, nbuf); + return True; + } + } + return False; +} + +/* Map a code address to its SegInfo. Returns NULL if not found. Doesn't + require debug info. */ +SegInfo* VG_(find_seginfo) ( Addr a ) +{ + SegInfo* si; + + for (si = segInfo_list; si != NULL; si = si->next) { + if (si->start <= a && a < si->start+si->size) { + return si; + } + } + return NULL; +} + +/* Map a code address to a filename. Returns True if successful. */ +Bool VG_(get_filename)( Addr a, Char* filename, Int n_filename ) +{ + SegInfo* si; + Int locno; + search_all_loctabs ( a, &si, &locno ); + if (si == NULL) + return False; + VG_(strncpy_safely)(filename, si->loctab[locno].filename, n_filename); + return True; +} + +/* Map a code address to a line number. Returns True if successful. */ +Bool VG_(get_linenum)( Addr a, UInt* lineno ) +{ + SegInfo* si; + Int locno; + search_all_loctabs ( a, &si, &locno ); + if (si == NULL) + return False; + *lineno = si->loctab[locno].lineno; + + return True; +} + +/* Map a code address to a filename/line number/dir name info. + See prototype for detailed description of behaviour. +*/ +Bool VG_(get_filename_linenum) ( Addr a, + /*OUT*/Char* filename, Int n_filename, + /*OUT*/Char* dirname, Int n_dirname, + /*OUT*/Bool* dirname_available, + /*OUT*/UInt* lineno ) +{ + SegInfo* si; + Int locno; + + vg_assert( (dirname == NULL && dirname_available == NULL) + || + (dirname != NULL && dirname_available != NULL) ); + + search_all_loctabs ( a, &si, &locno ); + if (si == NULL) + return False; + VG_(strncpy_safely)(filename, si->loctab[locno].filename, n_filename); + *lineno = si->loctab[locno].lineno; + + if (dirname) { + /* caller wants directory info too .. */ + vg_assert(n_dirname > 0); + if (si->loctab[locno].dirname) { + /* .. and we have some */ + *dirname_available = True; + VG_(strncpy_safely)(dirname, si->loctab[locno].dirname, + n_dirname); + } else { + /* .. but we don't have any */ + *dirname_available = False; + *dirname = 0; + } + } + + return True; +} + + +/* Print into buf info on code address, function name and filename */ + +static Int putStr ( Int n, Int n_buf, Char* buf, Char* str ) +{ + for (; n < n_buf-1 && *str != 0; n++,str++) + buf[n] = *str; + buf[n] = '\0'; + return n; +} +static Int putStrEsc ( Int n, Int n_buf, Char* buf, Char* str ) +{ + Char alt[2]; + for (; *str != 0; str++) { + switch (*str) { + case '&': n = putStr( n, n_buf, buf, "&"); break; + case '<': n = putStr( n, n_buf, buf, "<"); break; + case '>': n = putStr( n, n_buf, buf, ">"); break; + default: alt[0] = *str; + alt[1] = 0; + n = putStr( n, n_buf, buf, alt ); + break; + } + } + return n; +} + +Char* VG_(describe_IP)(Addr eip, Char* buf, Int n_buf) +{ +# define APPEND(_str) \ + n = putStr(n, n_buf, buf, _str); +# define APPEND_ESC(_str) \ + n = putStrEsc(n, n_buf, buf, _str); +# define BUF_LEN 4096 + + UInt lineno; + UChar ibuf[50]; + Int n = 0; + static UChar buf_fn[BUF_LEN]; + static UChar buf_obj[BUF_LEN]; + static UChar buf_srcloc[BUF_LEN]; + static UChar buf_dirname[BUF_LEN]; + Bool know_dirinfo = False; + Bool know_fnname = VG_(get_fnname) (eip, buf_fn, BUF_LEN); + Bool know_objname = VG_(get_objname)(eip, buf_obj, BUF_LEN); + Bool know_srcloc = VG_(get_filename_linenum)( + eip, + buf_srcloc, BUF_LEN, + buf_dirname, BUF_LEN, &know_dirinfo, + &lineno + ); + if (VG_(clo_xml)) { + + Bool human_readable = True; + HChar* maybe_newline = human_readable ? "\n " : ""; + HChar* maybe_newline2 = human_readable ? "\n " : ""; + + /* Print in XML format, dumping in as much info as we know. */ + APPEND(""); + VG_(sprintf)(ibuf,"0x%llx", (ULong)eip); + APPEND(maybe_newline); + APPEND(ibuf); + if (know_objname) { + APPEND(maybe_newline); + APPEND(""); + APPEND_ESC(buf_obj); + APPEND(""); + } + if (know_fnname) { + APPEND(maybe_newline); + APPEND(""); + APPEND_ESC(buf_fn); + APPEND(""); + } + if (know_srcloc) { + if (know_dirinfo) { + APPEND(maybe_newline); + APPEND(""); + APPEND(buf_dirname); + APPEND(""); + } + APPEND(maybe_newline); + APPEND(""); + APPEND_ESC(buf_srcloc); + APPEND(""); + APPEND(maybe_newline); + APPEND(""); + VG_(sprintf)(ibuf,"%d",lineno); + APPEND(ibuf); + APPEND(""); + } + APPEND(maybe_newline2); + APPEND(""); + + } else { + + /* Print for humans to read */ + VG_(sprintf)(ibuf,"0x%llx: ", (ULong)eip); + APPEND(ibuf); + if (know_fnname) { + APPEND(buf_fn); + if (!know_srcloc && know_objname) { + APPEND(" (in "); + APPEND(buf_obj); + APPEND(")"); + } + } else if (know_objname && !know_srcloc) { + APPEND("(within "); + APPEND(buf_obj); + APPEND(")"); + } else { + APPEND("???"); + } + if (know_srcloc) { + APPEND(" ("); + APPEND(buf_srcloc); + APPEND(":"); + VG_(sprintf)(ibuf,"%d",lineno); + APPEND(ibuf); + APPEND(")"); + } + + } + return buf; + +# undef APPEND +# undef APPEND_ESC +# undef BUF_LEN +} + +/* Returns True if OK. If not OK, *{ip,sp,fp}P are not changed. */ +/* NOTE: this function may rearrange the order of entries in the + SegInfo list. */ +Bool VG_(use_CF_info) ( /*MOD*/Addr* ipP, + /*MOD*/Addr* spP, + /*MOD*/Addr* fpP, + Addr min_accessible, + Addr max_accessible ) +{ + Int i; + SegInfo* si; + DiCfSI* cfsi = NULL; + Addr cfa, ipHere, spHere, fpHere, ipPrev, spPrev, fpPrev; + + static UInt n_search = 0; + static UInt n_steps = 0; + n_search++; + + if (0) VG_(printf)("search for %p\n", *ipP); + + for (si = segInfo_list; si != NULL; si = si->next) { + n_steps++; + + /* Use the per-SegInfo summary address ranges to skip + inapplicable SegInfos quickly. */ + if (si->cfsi_used == 0) + continue; + if (*ipP < si->cfsi_minaddr || *ipP > si->cfsi_maxaddr) + continue; + + i = ML_(search_one_cfitab)( si, *ipP ); + if (i != -1) { + vg_assert(i >= 0 && i < si->cfsi_used); + cfsi = &si->cfsi[i]; + break; + } + } + + if (cfsi == NULL) + return False; + + if (0 && ((n_search & 0xFFFFF) == 0)) + VG_(printf)("%u %u\n", n_search, n_steps); + + /* Start of performance-enhancing hack: once every 16 (chosen + hackily after profiling) successful searchs, move the found + SegInfo one step closer to the start of the list. This makes + future searches cheaper. For starting konqueror on amd64, this + in fact reduces the total amount of searching done by the above + find-the-right-SegInfo loop by more than a factor of 20. */ + if ((n_search & 0xF) == 0) { + /* Move si one step closer to the start of the list. */ + SegInfo* si0 = segInfo_list; + SegInfo* si1 = NULL; + SegInfo* si2 = NULL; + SegInfo* tmp; + while (True) { + if (si0 == NULL) break; + if (si0 == si) break; + si2 = si1; + si1 = si0; + si0 = si0->next; + } + if (si0 == si && si0 != NULL && si1 != NULL && si2 != NULL) { + /* si0 points to si, si1 to its predecessor, and si2 to si1's + predecessor. Swap si0 and si1, that is, move si0 one step + closer to the start of the list. */ + tmp = si0->next; + si2->next = si0; + si0->next = si1; + si1->next = tmp; + } + } + /* End of performance-enhancing hack. */ + + if (0) { + VG_(printf)("found cfisi: "); + ML_(ppDiCfSI)(cfsi); + } + + ipPrev = spPrev = fpPrev = 0; + + ipHere = *ipP; + spHere = *spP; + fpHere = *fpP; + + cfa = cfsi->cfa_off + (cfsi->cfa_sprel ? spHere : fpHere); + +# define COMPUTE(_prev, _here, _how, _off) \ + do { \ + switch (_how) { \ + case CFIR_UNKNOWN: \ + return False; \ + case CFIR_SAME: \ + _prev = _here; break; \ + case CFIR_MEMCFAREL: { \ + Addr a = cfa + (Word)_off; \ + if (a < min_accessible \ + || a+sizeof(Addr) > max_accessible) \ + return False; \ + _prev = *(Addr*)a; \ + break; \ + } \ + case CFIR_CFAREL: \ + _prev = cfa + (Word)_off; \ + break; \ + } \ + } while (0) + + COMPUTE(ipPrev, ipHere, cfsi->ra_how, cfsi->ra_off); + COMPUTE(spPrev, spHere, cfsi->sp_how, cfsi->sp_off); + COMPUTE(fpPrev, fpHere, cfsi->fp_how, cfsi->fp_off); + +# undef COMPUTE + + *ipP = ipPrev; + *spP = spPrev; + *fpP = fpPrev; + return True; +} + + +/*------------------------------------------------------------*/ +/*--- SegInfo accessor functions ---*/ +/*------------------------------------------------------------*/ + +const SegInfo* VG_(next_seginfo)(const SegInfo* si) +{ + if (si == NULL) + return segInfo_list; + return si->next; +} + +Addr VG_(seginfo_start)(const SegInfo* si) +{ + return si->start; +} + +SizeT VG_(seginfo_size)(const SegInfo* si) +{ + return si->size; +} + +const UChar* VG_(seginfo_soname)(const SegInfo* si) +{ + return si->soname; +} + +const UChar* VG_(seginfo_filename)(const SegInfo* si) +{ + return si->filename; +} + +ULong VG_(seginfo_sym_offset)(const SegInfo* si) +{ + return si->offset; +} + +VgSectKind VG_(seginfo_sect_kind)(Addr a) +{ + SegInfo* si; + VgSectKind ret = Vg_SectUnknown; + + for(si = segInfo_list; si != NULL; si = si->next) { + if (a >= si->start && a < (si->start + si->size)) { + + if (0) + VG_(printf)( + "addr=%p si=%p %s got=%p %d plt=%p %d data=%p %d bss=%p %d\n", + a, si, si->filename, + si->got_start_vma, si->got_size, + si->plt_start_vma, si->plt_size, + si->data_start_vma, si->data_size, + si->bss_start_vma, si->bss_size); + + ret = Vg_SectText; + + if (a >= si->data_start_vma && a < (si->data_start_vma + si->data_size)) + ret = Vg_SectData; + else + if (a >= si->bss_start_vma && a < (si->bss_start_vma + si->bss_size)) + ret = Vg_SectBSS; + else + if (a >= si->plt_start_vma && a < (si->plt_start_vma + si->plt_size)) + ret = Vg_SectPLT; + else + if (a >= si->got_start_vma && a < (si->got_start_vma + si->got_size)) + ret = Vg_SectGOT; + } + } + + return ret; +} + +Int VG_(seginfo_syms_howmany) ( const SegInfo *si ) +{ + return si->symtab_used; +} + +void VG_(seginfo_syms_getidx) ( const SegInfo *si, + Int idx, + /*OUT*/Addr* addr, + /*OUT*/UInt* size, + /*OUT*/HChar** name ) +{ + vg_assert(idx >= 0 && idx < si->symtab_used); + if (addr) *addr = si->symtab[idx].addr; + if (size) *size = si->symtab[idx].size; + if (name) *name = (HChar*)si->symtab[idx].name; +} + + +/*--------------------------------------------------------------------*/ +/*--- end ---*/ +/*--------------------------------------------------------------------*/ diff --git a/coregrind/m_debuginfo/priv_readdwarf.h b/coregrind/m_debuginfo/priv_readdwarf.h new file mode 100644 index 0000000000..b77955e0f0 --- /dev/null +++ b/coregrind/m_debuginfo/priv_readdwarf.h @@ -0,0 +1,73 @@ + +/*--------------------------------------------------------------------*/ +/*--- Read DWARF1/2/3 debug info. priv_readdwarf.h ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2000-2006 Julian Seward + jseward@acm.org + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#ifndef __PRIV_READDWARF_H +#define __PRIV_READDWARF_H + +/* + Stabs reader greatly improved by Nick Nethercote, Apr 02. + This module was also extensively hacked on by Jeremy Fitzhardinge + and Tom Hughes. +*/ + + +/* -------------------- + DWARF2 reader + -------------------- */ +extern +void ML_(read_debuginfo_dwarf2) + ( struct _SegInfo* si, + UChar* debuginfo, Int debug_info_sz, /* .debug_info */ + UChar* debugabbrev, /* .debug_abbrev */ + UChar* debugline, Int debug_line_sz, /* .debug_line */ + UChar* debugstr ); + +/* -------------------- + DWARF1 reader + -------------------- */ +extern +void ML_(read_debuginfo_dwarf1) ( struct _SegInfo* si, + UChar* dwarf1d, Int dwarf1d_sz, + UChar* dwarf1l, Int dwarf1l_sz ); + +/* -------------------- + CFI reader + -------------------- */ +extern +void ML_(read_callframe_info_dwarf2) + ( /*OUT*/struct _SegInfo* si, + UChar* ehframe, Int ehframe_sz, Addr ehframe_addr ); + + +#endif /* ndef __PRIV_READDWARF_H */ + +/*--------------------------------------------------------------------*/ +/*--- end ---*/ +/*--------------------------------------------------------------------*/ diff --git a/coregrind/m_debuginfo/priv_readelf.h b/coregrind/m_debuginfo/priv_readelf.h new file mode 100644 index 0000000000..381146943b --- /dev/null +++ b/coregrind/m_debuginfo/priv_readelf.h @@ -0,0 +1,58 @@ + +/*--------------------------------------------------------------------*/ +/*--- Reading of syms & debug info from ELF .so/executable files. ---*/ +/*--- priv_readelf.h ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2000-2006 Julian Seward + jseward@acm.org + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#ifndef __PRIV_READELF_H +#define __PRIV_READELF_H + +/* + Stabs reader greatly improved by Nick Nethercote, Apr 02. + This module was also extensively hacked on by Jeremy Fitzhardinge + and Tom Hughes. +*/ + +/* Identify an ELF object file by peering at the first few bytes of + it. */ +extern Bool ML_(is_elf_object_file)( const void* buf ); + +/* The central function for reading ELF debug info. For the + object/exe specified by the SegInfo, find ELF sections, then read + the symbols, line number info, file name info, CFA (stack-unwind + info) and anything else we want, into the tables within the + supplied SegInfo. +*/ +extern Bool ML_(read_elf_debug_info) ( struct _SegInfo* si ); + + +#endif /* ndef __PRIV_READELF_H */ + +/*--------------------------------------------------------------------*/ +/*--- end ---*/ +/*--------------------------------------------------------------------*/ diff --git a/coregrind/m_debuginfo/priv_readstabs.h b/coregrind/m_debuginfo/priv_readstabs.h new file mode 100644 index 0000000000..cc83de3227 --- /dev/null +++ b/coregrind/m_debuginfo/priv_readstabs.h @@ -0,0 +1,52 @@ + +/*--------------------------------------------------------------------*/ +/*--- Read 'stabs' format debug info. priv_readstabs.h ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2000-2006 Julian Seward + jseward@acm.org + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#ifndef __PRIV_READSTABS_H +#define __PRIV_READSTABS_H + +/* + Stabs reader greatly improved by Nick Nethercote, Apr 02. + This module was also extensively hacked on by Jeremy Fitzhardinge + and Tom Hughes. +*/ + +/* -------------------- + Stabs reader + -------------------- */ +extern +void ML_(read_debuginfo_stabs) ( struct _SegInfo* si, + UChar* stabC, Int stab_sz, + UChar* stabstr, Int stabstr_sz ); + +#endif /* ndef __PRIV_READSTABS_H */ + +/*--------------------------------------------------------------------*/ +/*--- end ---*/ +/*--------------------------------------------------------------------*/ diff --git a/coregrind/m_debuginfo/priv_storage.h b/coregrind/m_debuginfo/priv_storage.h new file mode 100644 index 0000000000..3965209279 --- /dev/null +++ b/coregrind/m_debuginfo/priv_storage.h @@ -0,0 +1,253 @@ + +/*--------------------------------------------------------------------*/ +/*--- Format-neutral storage of and querying of info acquired from ---*/ +/*--- ELF/XCOFF stabs/dwarf1/dwarf2 debug info. ---*/ +/*--- priv_storage.h ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2000-2006 Julian Seward + jseward@acm.org + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ +/* + Stabs reader greatly improved by Nick Nethercote, Apr 02. + This module was also extensively hacked on by Jeremy Fitzhardinge + and Tom Hughes. +*/ + +#ifndef __PRIV_STORAGE_H +#define __PRIV_STORAGE_H + +/* --------------------- SYMBOLS --------------------- */ + +/* A structure to hold an ELF/XCOFF symbol (very crudely). */ +typedef + struct { + Addr addr; /* lowest address of entity */ + Addr tocptr; /* ppc64-linux only: value that R2 should have */ + UInt size; /* size in bytes */ + UChar *name; /* name */ + } + DiSym; + +/* --------------------- SRCLOCS --------------------- */ + +/* Line count at which overflow happens, due to line numbers being + stored as shorts in `struct nlist' in a.out.h. */ +#define LINENO_OVERFLOW (1 << (sizeof(short) * 8)) + +#define LINENO_BITS 20 +#define LOC_SIZE_BITS (32 - LINENO_BITS) +#define MAX_LINENO ((1 << LINENO_BITS) - 1) + +/* Unlikely to have any lines with instruction ranges > 4096 bytes */ +#define MAX_LOC_SIZE ((1 << LOC_SIZE_BITS) - 1) + +/* Number used to detect line number overflows; if one line is + 60000-odd smaller than the previous, is was probably an overflow. + */ +#define OVERFLOW_DIFFERENCE (LINENO_OVERFLOW - 5000) + +/* A structure to hold addr-to-source info for a single line. There + can be a lot of these, hence the dense packing. */ +typedef + struct { + /* Word 1 */ + Addr addr; /* lowest address for this line */ + /* Word 2 */ + UShort size:LOC_SIZE_BITS; /* # bytes; we catch overflows of this */ + UInt lineno:LINENO_BITS; /* source line number, or zero */ + /* Word 3 */ + UChar* filename; /* source filename */ + /* Word 4 */ + UChar* dirname; /* source directory name */ + } + DiLoc; + +/* --------------------- CF INFO --------------------- */ + +/* A structure to summarise DWARF2/3 CFA info for the code address + range [base .. base+len-1]. In short, if you know (sp,fp,ip) at + some point and ip is in the range [base .. base+len-1], it tells + you how to calculate (sp,fp) for the caller of the current frame + and also ra, the return address of the current frame. + + First off, calculate CFA, the Canonical Frame Address, thusly: + + cfa = if cfa_sprel then sp+cfa_off else fp+cfa_off + + Once that is done, the previous frame's sp/fp values and this + frame's ra value can be calculated like this: + + old_sp/fp/ra + = case sp/fp/ra_how of + CFIR_UNKNOWN -> we don't know, sorry + CFIR_SAME -> same as it was before (sp/fp only) + CFIR_CFAREL -> cfa + sp/fp/ra_off + CFIR_MEMCFAREL -> *( cfa + sp/fp/ra_off ) +*/ + +#define CFIR_UNKNOWN ((UChar)0) +#define CFIR_SAME ((UChar)1) +#define CFIR_CFAREL ((UChar)2) +#define CFIR_MEMCFAREL ((UChar)3) + +typedef + struct { + Addr base; + UInt len; + Bool cfa_sprel; + UChar ra_how; /* a CFIR_ value */ + UChar sp_how; /* a CFIR_ value */ + UChar fp_how; /* a CFIR_ value */ + Int cfa_off; + Int ra_off; + Int sp_off; + Int fp_off; + } + DiCfSI; + +/* --------------------- SEGINFO --------------------- */ + +/* This is the top-level data type. It's a structure which contains + information pertaining to one mapped text segment. This type is + exported only abstractly - in pub_tool_debuginfo.h. */ + +#define SEGINFO_STRCHUNKSIZE (64*1024) + +struct _SegInfo { + struct _SegInfo* next; /* list of SegInfos */ + + /* Description of the mapped segment. */ + Addr start; + UInt size; + UChar* filename; /* in mallocville */ + OffT foffset; + UChar* soname; + + /* An expandable array of symbols. */ + DiSym* symtab; + UInt symtab_used; + UInt symtab_size; + /* An expandable array of locations. */ + DiLoc* loctab; + UInt loctab_used; + UInt loctab_size; + /* An expandable array of CFI summary info records. Also includes + summary address bounds, showing the min and max address covered + by any of the records, as an aid to fast searching. */ + DiCfSI* cfsi; + UInt cfsi_used; + UInt cfsi_size; + Addr cfsi_minaddr; + Addr cfsi_maxaddr; + + /* Expandable arrays of characters -- the string table. Pointers + into this are stable (the arrays are not reallocated). */ + struct strchunk { + UInt strtab_used; + struct strchunk *next; + UChar strtab[SEGINFO_STRCHUNKSIZE]; + } *strchunks; + + /* 'offset' is what we need to add to symbol table entries to get + the location of that symbol in the running image. */ + OffT offset; + + /* Bounds of data, BSS, PLT, GOT and OPD (for ppc64-linux) so that + tools can see what section an address is in. In the running + image! */ + Addr plt_start_vma; + UInt plt_size; + Addr got_start_vma; + UInt got_size; + Addr opd_start_vma; + UInt opd_size; + Addr data_start_vma; + UInt data_size; + Addr bss_start_vma; + UInt bss_size; +}; + +/* --------------------- functions --------------------- */ + +/* ------ Adding ------ */ + +/* Add a symbol to si's symbol table. */ +extern void ML_(addSym) ( struct _SegInfo* si, DiSym* sym ); + +/* Add a line-number record to a SegInfo. */ +extern +void ML_(addLineInfo) ( struct _SegInfo* si, + UChar* filename, + UChar* dirname, /* NULL is allowable */ + Addr this, Addr next, Int lineno, Int entry); + +/* Add a CFI summary record. The supplied DiCfSI is copied. */ +extern void ML_(addDiCfSI) ( struct _SegInfo* si, DiCfSI* cfsi ); + +/* Add a string to the string table of a SegInfo. If len==-1, + ML_(addStr) will itself measure the length of the string. */ +extern UChar* ML_(addStr) ( struct _SegInfo* si, UChar* str, Int len ); + +/* Canonicalise the tables held by 'si', in preparation for use. Call + this after finishing adding entries to these tables. */ +extern void ML_(canonicaliseTables) ( struct _SegInfo* si ); + +/* ------ Searching ------ */ + +/* Find a symbol-table index containing the specified pointer, or -1 + if not found. Binary search. */ +extern Int ML_(search_one_symtab) ( struct _SegInfo* si, Addr ptr, + Bool match_anywhere_in_fun ); + +/* Find a location-table index containing the specified pointer, or -1 + if not found. Binary search. */ +extern Int ML_(search_one_loctab) ( struct _SegInfo* si, Addr ptr ); + +/* Find a CFI-table index containing the specified pointer, or -1 if + not found. Binary search. */ +extern Int ML_(search_one_cfitab) ( struct _SegInfo* si, Addr ptr ); + +/* ------ Misc ------ */ + +/* Show a non-fatal debug info reading error. Use vg_panic if + terminal. */ +extern void ML_(symerr) ( HChar* msg ); + +/* Print a symbol. */ +extern void ML_(ppSym) ( Int idx, DiSym* sym ); + +/* Print a call-frame-info summary. */ +extern void ML_(ppDiCfSI) ( DiCfSI* si ); + + +#define TRACE_SYMTAB(format, args...) \ + if (VG_(clo_trace_symtab)) { VG_(printf)(format, ## args); } + + +#endif /* ndef __PRIV_STORAGE_H */ + +/*--------------------------------------------------------------------*/ +/*--- end ---*/ +/*--------------------------------------------------------------------*/ diff --git a/coregrind/m_debuginfo/priv_symtab.h b/coregrind/m_debuginfo/priv_symtab.h deleted file mode 100644 index f401a43eb9..0000000000 --- a/coregrind/m_debuginfo/priv_symtab.h +++ /dev/null @@ -1,292 +0,0 @@ - -/*--------------------------------------------------------------------*/ -/*--- Header for symbol table stuff. priv_symtab.h ---*/ -/*--------------------------------------------------------------------*/ - -/* - This file is part of Valgrind, a dynamic binary instrumentation - framework. - - Copyright (C) 2000-2005 Julian Seward - jseward@acm.org - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file COPYING. -*/ - -#ifndef __PRIV_SYMTAB_H -#define __PRIV_SYMTAB_H - -/* A structure to hold an ELF symbol (very crudely). */ -typedef - struct { - Addr addr; /* lowest address of entity */ - UInt size; /* size in bytes */ - Char *name; /* name */ - Addr tocptr; /* ppc64-linux only: value that R2 should have */ - } - RiSym; - -/* Line count at which overflow happens, due to line numbers being stored as - * shorts in `struct nlist' in a.out.h. */ -#define LINENO_OVERFLOW (1 << (sizeof(short) * 8)) - -#define LINENO_BITS 20 -#define LOC_SIZE_BITS (32 - LINENO_BITS) -#define MAX_LINENO ((1 << LINENO_BITS) - 1) - -/* Unlikely to have any lines with instruction ranges > 4096 bytes */ -#define MAX_LOC_SIZE ((1 << LOC_SIZE_BITS) - 1) - -/* Number used to detect line number overflows; if one line is 60000-odd - * smaller than the previous, is was probably an overflow. - */ -#define OVERFLOW_DIFFERENCE (LINENO_OVERFLOW - 5000) - -/* A structure to hold addr-to-source info for a single line. There can be a - * lot of these, hence the dense packing. */ -typedef - struct { - /* Word 1 */ - Addr addr; /* lowest address for this line */ - /* Word 2 */ - UShort size:LOC_SIZE_BITS; /* byte size; we catch overflows of this */ - UInt lineno:LINENO_BITS; /* source line number, or zero */ - /* Word 3 */ - Char* filename; /* source filename */ - /* Word 4 */ - Char* dirname; /* source directory name */ - } - RiLoc; - - -/* A structure to hold a set of variables in a particular scope */ -typedef struct _Scope Scope; /* a set of symbols in one scope */ -typedef struct _Sym Sym; /* a single symbol */ -typedef struct _ScopeRange ScopeRange; /* a range of code addreses a scope covers */ - -typedef enum { - SyESPrel, /* on the stack (relative to ESP) */ - SyEBPrel, /* on the stack (relative to EBP) */ - SyReg, /* in a register */ - SyType, /* a type definition */ - SyStatic, /* a static variable */ - SyGlobal, /* a global variable (XXX any different to static - in an outer scope?) */ -} SyKind; - -struct _Sym { - SymType *type; /* type */ - Char *name; /* name */ - SyKind kind; /* kind of symbol */ - - /* a value, depending on kind */ - union { - OffT offset; /* offset on stack (-ve -> ebp; +ve -> esp) */ - Int regno; /* register number */ - Addr addr; /* static or global address */ - } u; -}; - -struct _Scope { - Scope *outer; /* outer (containing) scope */ - UInt nsyms; /* number of symbols in this scope */ - UInt depth; /* depth of scope */ - Sym *syms; /* the symbols */ -}; - -/* A structure to map a scope to a range of code addresses; scopes may - be broken into multiple ranges (before and after a nested scope) */ -struct _ScopeRange { - Addr addr; /* start address of this scope */ - Int size; /* length of scope */ - Scope *scope; /* symbols in scope */ -}; - -#define STRCHUNKSIZE (64*1024) - - -/* A structure to summarise CFI summary info for the code address - range [base .. base+len-1]. In short, if you know (sp,fp,ip) at - some point and ip is in the range [base .. base+len-1], it tells - you how to calculate (sp,fp) for the caller of the current - frame and also ra, the return address of the current frame. - - First off, calculate CFA, the Canonical Frame Address, thusly: - - cfa = if cfa_sprel then sp+cfa_off else fp+cfa_off - - Once that is done, the previous frame's sp/fp values and this - frame's ra value can be calculated like this: - - old_sp/fp/ra - = case sp/fp/ra_how of - CFIR_UNKNOWN -> we don't know, sorry - CFIR_SAME -> same as it was before (sp/fp only) - CFIR_CFAREL -> cfa + sp/fp/ra_off - CFIR_MEMCFAREL -> *( cfa + sp/fp/ra_off ) -*/ - -#define CFIR_UNKNOWN ((UChar)0) -#define CFIR_SAME ((UChar)1) -#define CFIR_CFAREL ((UChar)2) -#define CFIR_MEMCFAREL ((UChar)3) - -typedef - struct { - Addr base; - UInt len; - Bool cfa_sprel; - UChar ra_how; /* a CFIR_ value */ - UChar sp_how; /* a CFIR_ value */ - UChar fp_how; /* a CFIR_ value */ - Int cfa_off; - Int ra_off; - Int sp_off; - Int fp_off; - } - CfiSI; - -extern void ML_(ppCfiSI) ( CfiSI* ); - - -/* A structure which contains information pertaining to one mapped - text segment. This type is exported only abstractly - in - pub_tool_debuginfo.h. */ -struct _SegInfo { - struct _SegInfo* next; /* list of SegInfos */ - - Int ref; - - /* Description of the mapped segment. */ - Addr start; - UInt size; - Char* filename; /* in mallocville */ - OffT foffset; - Char* soname; - - /* An expandable array of symbols. */ - RiSym* symtab; - UInt symtab_used; - UInt symtab_size; - /* An expandable array of locations. */ - RiLoc* loctab; - UInt loctab_used; - UInt loctab_size; - /* An expandable array of scope ranges. */ - ScopeRange *scopetab; - UInt scopetab_used; - UInt scopetab_size; - /* An expandable array of CFI summary info records. Also includes - summary address bounds, showing the min and max address covered - by any of the records, as an aid to fast searching. */ - CfiSI* cfisi; - UInt cfisi_used; - UInt cfisi_size; - Addr cfisi_minaddr; - Addr cfisi_maxaddr; - - /* Expandable arrays of characters -- the string table. - Pointers into this are stable (the arrays are not reallocated) - */ - struct strchunk { - UInt strtab_used; - struct strchunk *next; - Char strtab[STRCHUNKSIZE]; - } *strchunks; - - /* offset is what we need to add to symbol table entries - to get the real location of that symbol in memory. - */ - OffT offset; - - /* Bounds of data, BSS, PLT, GOT and OPD (for ppc64-linux) so that - tools can see what section an address is in. In the running image! */ - Addr plt_start_vma; - UInt plt_size; - Addr got_start_vma; - UInt got_size; - Addr opd_start_vma; - UInt opd_size; - Addr data_start_vma; - UInt data_size; - Addr bss_start_vma; - UInt bss_size; - - /* data used by stabs parser */ - struct _StabTypeTab *stab_typetab; -}; - -extern -Char *ML_(addStr) ( SegInfo* si, Char* str, Int len ); - -extern -void ML_(addScopeInfo) ( SegInfo* si, Addr this, Addr next, Scope *scope); - -extern -void ML_(addLineInfo) ( SegInfo* si, - Char* filename, - Char* dirname, /* NULL is allowable */ - Addr this, Addr next, Int lineno, Int entry); - -extern -void ML_(addCfiSI) ( SegInfo* si, CfiSI* cfisi ); - -/* Non-fatal -- use vg_panic if terminal. */ -extern -void ML_(symerr) ( Char* msg ); - -/* -------------------- - Stabs reader - -------------------- */ -extern -void ML_(read_debuginfo_stabs) ( SegInfo* si, - UChar* stabC, Int stab_sz, - UChar* stabstr, Int stabstr_sz ); - -/* -------------------- - DWARF2 reader - -------------------- */ -extern -void ML_(read_debuginfo_dwarf2) - ( SegInfo* si, - UChar* debuginfo, Int debug_info_sz, /* .debug_info */ - UChar* debugabbrev, /* .debug_abbrev */ - UChar* debugline, Int debug_line_sz, /* .debug_line */ - UChar* debugstr ); - -/* -------------------- - DWARF1 reader - -------------------- */ -extern -void ML_(read_debuginfo_dwarf1) ( SegInfo* si, - UChar* dwarf1d, Int dwarf1d_sz, - UChar* dwarf1l, Int dwarf1l_sz ); - -/* -------------------- - CFI reader - -------------------- */ -extern -void ML_(read_callframe_info_dwarf2) - ( /*OUT*/SegInfo* si, UChar* ehframe, Int ehframe_sz, Addr ehframe_addr ); - - -#endif // __PRIV_SYMTAB_H - -/*--------------------------------------------------------------------*/ -/*--- end ---*/ -/*--------------------------------------------------------------------*/ diff --git a/coregrind/m_debuginfo/priv_symtypes.h b/coregrind/m_debuginfo/priv_symtypes.h deleted file mode 100644 index 5eac6113de..0000000000 --- a/coregrind/m_debuginfo/priv_symtypes.h +++ /dev/null @@ -1,113 +0,0 @@ - -/*--------------------------------------------------------------------*/ -/*--- Intra-Valgrind interfaces for symtypes.c. priv_symtypes.h ---*/ -/*--------------------------------------------------------------------*/ - -/* - This file is part of Valgrind, a dynamic binary instrumentation - framework. - - Copyright (C) 2000-2005 Julian Seward - jseward@acm.org - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file COPYING. -*/ - -#ifndef __PRIV_SYMTYPES_H -#define __PRIV_SYMTYPES_H - -/* Lets try to make these opaque */ -typedef struct _SymType SymType; - -/* ------------------------------------------------------------ - Constructors for various SymType nodes - ------------------------------------------------------------ */ - -/* Find the basetype for a given type: that is, if type is a typedef, - return the typedef'd type. If resolve is true, it will resolve - unresolved symbols. If type is not a typedef, then this is just - returns type. -*/ -SymType *ML_(st_basetype)(SymType *type, Bool resolve); - -void ML_(st_setname)(SymType *ty, Char *name); - -typedef void (SymResolver)(SymType *, void *); - -/* Create an unresolved type */ -SymType *ML_(st_mkunresolved)(SymType *, SymResolver *resolve, void *data); - -/* update an unresolved type's data */ -void ML_(st_unresolved_setdata)(SymType *, SymResolver *resolve, void *data); - -Bool ML_(st_isresolved)(SymType *); -UInt ML_(st_sizeof)(SymType *); - -/* Unknown type (unparsable) */ -SymType *ML_(st_mkunknown)(SymType *); - -SymType *ML_(st_mkvoid)(SymType *); - -SymType *ML_(st_mkint)(SymType *, UInt size, Bool isSigned); -SymType *ML_(st_mkbool)(SymType *, UInt size); -SymType *ML_(st_mkchar)(SymType *, Bool isSigned); -SymType *ML_(st_mkfloat)(SymType *, UInt size); -SymType *ML_(st_mkdouble)(SymType *, UInt size); - -SymType *ML_(st_mkpointer)(SymType *, SymType *); -SymType *ML_(st_mkrange)(SymType *, SymType *, Int min, Int max); - -SymType *ML_(st_mkstruct)(SymType *, UInt size, UInt nfields); -SymType *ML_(st_mkunion)(SymType *, UInt size, UInt nfields); -void ML_(st_addfield)(SymType *, Char *name, SymType *, UInt off, UInt size); - -SymType *ML_(st_mkenum)(SymType *, UInt ntags); -SymType *ML_(st_addtag)(SymType *, Char *name, Int val); - -SymType *ML_(st_mkarray)(SymType *, SymType *idxtype, SymType *artype); - -SymType *ML_(st_mktypedef)(SymType *, Char *name, SymType *type); - -Bool ML_(st_isstruct)(SymType *); -Bool ML_(st_isunion)(SymType *); -Bool ML_(st_isenum)(SymType *); - -/* ------------------------------------------------------------ - Interface with symtab.c - ------------------------------------------------------------ */ - -/* Typed value */ -typedef struct _Variable Variable; - -struct _Variable { - Char *name; /* name */ - SymType *type; /* type of value */ - Addr valuep; /* pointer to value */ - UInt size; /* size of value */ - UInt distance; /* "distance" from site of interest */ - Variable *next; - Variable *container; -}; - -Variable *ML_(get_scope_variables)(ThreadId tid); - -#endif // __PRIV_SYMTYPES_H - -/*--------------------------------------------------------------------*/ -/*--- end ---*/ -/*--------------------------------------------------------------------*/ diff --git a/coregrind/m_debuginfo/dwarf.c b/coregrind/m_debuginfo/readdwarf.c similarity index 97% rename from coregrind/m_debuginfo/dwarf.c rename to coregrind/m_debuginfo/readdwarf.c index 787b120e16..59facf8af9 100644 --- a/coregrind/m_debuginfo/dwarf.c +++ b/coregrind/m_debuginfo/readdwarf.c @@ -1,6 +1,6 @@ /*--------------------------------------------------------------------*/ -/*--- Read DWARF2 debug info. dwarf.c ---*/ +/*--- Read DWARF1/2/3 debug info. readdwarf.c ---*/ /*--------------------------------------------------------------------*/ /* @@ -27,18 +27,28 @@ The GNU General Public License is contained in the file COPYING. */ +/* + Stabs reader greatly improved by Nick Nethercote, Apr 02. + This module was also extensively hacked on by Jeremy Fitzhardinge + and Tom Hughes. +*/ #include "pub_core_basics.h" -#include "pub_core_debuginfo.h" #include "pub_core_libcbase.h" #include "pub_core_libcassert.h" #include "pub_core_libcprint.h" #include "pub_core_mallocfree.h" #include "pub_core_options.h" +#include "priv_storage.h" +#include "priv_readdwarf.h" /* self */ -#include "priv_symtypes.h" -#include "priv_symtab.h" +/*------------------------------------------------------------*/ +/*--- ---*/ +/*--- Read line number and CFI info from DWARF1, DWARF2 ---*/ +/*--- and to some extent DWARF3 sections. ---*/ +/*--- ---*/ +/*------------------------------------------------------------*/ /*------------------------------------------------------------*/ /*--- Expanding arrays of words, for holding file name and ---*/ @@ -291,7 +301,7 @@ Char* lookupDir ( Int filename_index, /* Handled an extend line op. Returns true if this is the end of sequence. */ static -Int process_extended_line_op( SegInfo* si, +Int process_extended_line_op( struct _SegInfo* si, WordArray* filenames, WordArray* dirnames, WordArray* fnidx2dir, @@ -378,7 +388,7 @@ Int process_extended_line_op( SegInfo* si, * Output: - si debug info structures get updated */ static -void read_dwarf2_lineblock ( SegInfo* si, +void read_dwarf2_lineblock ( struct _SegInfo* si, UnitInfo* ui, UChar* theBlock, Int noLargerThan ) @@ -922,8 +932,8 @@ void read_unitinfo_dwarf2( /*OUT*/UnitInfo* ui, break; } /* Loop on each sub block */ - /* This test would be valid if we were not shortcuting the parsing - if( level != 0 ) + /* This test would be valid if we were not shortcutting the parsing + if (level != 0) VG_(printf)( "#### Exiting debuginfo block at level %d !!!\n", level ); */ } @@ -940,7 +950,7 @@ void read_unitinfo_dwarf2( /*OUT*/UnitInfo* ui, * Output: update si to contain all the dwarf2 debug infos */ void ML_(read_debuginfo_dwarf2) - ( SegInfo* si, + ( struct _SegInfo* si, UChar* debuginfo, Int debug_info_sz, /* .debug_info */ UChar* debugabbrev, /* .debug_abbrev */ UChar* debugline, Int debug_line_sz, /* .debug_line */ @@ -1158,7 +1168,7 @@ enum dwarf_attribute { /* end of enums taken from gdb-6.0 sources */ void ML_(read_debuginfo_dwarf1) ( - SegInfo* si, + struct _SegInfo* si, UChar* dwarf1d, Int dwarf1d_sz, UChar* dwarf1l, Int dwarf1l_sz ) { @@ -1513,42 +1523,7 @@ static void initUnwindContext ( /*OUT*/UnwindContext* ctx ) /* ------------ Deal with summary-info records ------------ */ -void ML_(ppCfiSI) ( CfiSI* si ) -{ -# define SHOW_HOW(_how, _off) \ - do { \ - if (_how == CFIR_UNKNOWN) { \ - VG_(printf)("Unknown"); \ - } else \ - if (_how == CFIR_SAME) { \ - VG_(printf)("Same"); \ - } else \ - if (_how == CFIR_CFAREL) { \ - VG_(printf)("cfa+%d", _off); \ - } else \ - if (_how == CFIR_MEMCFAREL) { \ - VG_(printf)("*(cfa+%d)", _off); \ - } else { \ - VG_(printf)("???"); \ - } \ - } while (0) - - VG_(printf)("[%p .. %p]: ", si->base, - si->base + (UWord)si->len - 1); - VG_(printf)("let cfa=%s+%d", - si->cfa_sprel ? "oldSP" : "oldFP", si->cfa_off); - VG_(printf)(" in RA="); - SHOW_HOW(si->ra_how, si->ra_off); - VG_(printf)(" SP="); - SHOW_HOW(si->sp_how, si->sp_off); - VG_(printf)(" FP="); - SHOW_HOW(si->fp_how, si->fp_off); - VG_(printf)("\n"); - -# undef SHOW_HOW -} - -static void initCfiSI ( CfiSI* si ) +static void initCfiSI ( DiCfSI* si ) { si->base = 0; si->len = 0; @@ -1570,7 +1545,7 @@ static void initCfiSI ( CfiSI* si ) summary is up to but not including the current loc. This works on both x86 and amd64. */ -static Bool summarise_context( /*OUT*/CfiSI* si, +static Bool summarise_context( /*OUT*/DiCfSI* si, Addr loc_start, UnwindContext* ctx ) { @@ -2158,12 +2133,12 @@ static void show_CF_instructions ( UChar* instrs, Int ilen ) reached, or until there is a failure. Return True iff success. */ static -Bool run_CF_instructions ( SegInfo* si, +Bool run_CF_instructions ( struct _SegInfo* si, UnwindContext* ctx, UChar* instrs, Int ilen, UWord fde_arange, UnwindContext* restore_ctx ) { - CfiSI cfisi; + DiCfSI cfsi; Bool summ_ok; Int j, i = 0; Addr loc_prev; @@ -2179,11 +2154,11 @@ Bool run_CF_instructions ( SegInfo* si, i += j; if (0) ppUnwindContext(ctx); if (loc_prev != ctx->loc && si) { - summ_ok = summarise_context ( &cfisi, loc_prev, ctx ); + summ_ok = summarise_context ( &cfsi, loc_prev, ctx ); if (summ_ok) { - ML_(addCfiSI)(si, &cfisi); + ML_(addDiCfSI)(si, &cfsi); if (VG_(clo_trace_cfi)) - ML_(ppCfiSI)(&cfisi); + ML_(ppDiCfSI)(&cfsi); } } } @@ -2191,11 +2166,11 @@ Bool run_CF_instructions ( SegInfo* si, loc_prev = ctx->loc; ctx->loc = fde_arange; if (si) { - summ_ok = summarise_context ( &cfisi, loc_prev, ctx ); + summ_ok = summarise_context ( &cfsi, loc_prev, ctx ); if (summ_ok) { - ML_(addCfiSI)(si, &cfisi); + ML_(addDiCfSI)(si, &cfsi); if (VG_(clo_trace_cfi)) - ML_(ppCfiSI)(&cfisi); + ML_(ppDiCfSI)(&cfsi); } } } @@ -2241,7 +2216,7 @@ static CIE the_CIEs[N_CIEs]; void ML_(read_callframe_info_dwarf2) - ( /*OUT*/SegInfo* si, + ( /*OUT*/struct _SegInfo* si, UChar* ehframe, Int ehframe_sz, Addr ehframe_addr ) { Int nbytes; diff --git a/coregrind/m_debuginfo/readelf.c b/coregrind/m_debuginfo/readelf.c new file mode 100644 index 0000000000..89fb2ab9e9 --- /dev/null +++ b/coregrind/m_debuginfo/readelf.c @@ -0,0 +1,1225 @@ + +/*--------------------------------------------------------------------*/ +/*--- Reading of syms & debug info from ELF .so/executable files. ---*/ +/*--- readelf.c ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2000-2006 Julian Seward + jseward@acm.org + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ +/* + Stabs reader greatly improved by Nick Nethercote, Apr 02. + This module was also extensively hacked on by Jeremy Fitzhardinge + and Tom Hughes. +*/ + +#include "pub_core_basics.h" +#include "pub_core_libcbase.h" +#include "pub_core_libcprint.h" +#include "pub_core_libcassert.h" +#include "pub_core_libcfile.h" +#include "pub_core_aspacemgr.h" /* for mmaping debuginfo files */ +#include "pub_core_machine.h" /* VG_ELF_CLASS */ +#include "pub_core_mallocfree.h" +#include "pub_core_options.h" +#include "pub_core_oset.h" +#include "pub_core_tooliface.h" /* VG_(needs) */ +#include "priv_storage.h" +#include "priv_readelf.h" /* self */ +#include "priv_readdwarf.h" /* 'cos ELF contains DWARF */ +#include "priv_readstabs.h" /* and stabs, if we're unlucky */ + +/* --- !!! --- EXTERNAL HEADERS start --- !!! --- */ +#include +/* --- !!! --- EXTERNAL HEADERS end --- !!! --- */ + +/*------------------------------------------------------------*/ +/*--- 32/64-bit parameterisation ---*/ +/*------------------------------------------------------------*/ + +/* For all the ELF macros and types which specify '32' or '64', + select the correct variant for this platform and give it + an 'XX' name. Then use the 'XX' variant consistently in + the rest of this file. +*/ +#if VG_WORDSIZE == 4 +# define ElfXX_Ehdr Elf32_Ehdr +# define ElfXX_Shdr Elf32_Shdr +# define ElfXX_Phdr Elf32_Phdr +# define ElfXX_Sym Elf32_Sym +# define ElfXX_Word Elf32_Word +# define ElfXX_Addr Elf32_Addr +# define ElfXX_Dyn Elf32_Dyn +# define ELFXX_ST_BIND ELF32_ST_BIND +# define ELFXX_ST_TYPE ELF32_ST_TYPE + +#elif VG_WORDSIZE == 8 +# define ElfXX_Ehdr Elf64_Ehdr +# define ElfXX_Shdr Elf64_Shdr +# define ElfXX_Phdr Elf64_Phdr +# define ElfXX_Sym Elf64_Sym +# define ElfXX_Word Elf64_Word +# define ElfXX_Addr Elf64_Addr +# define ElfXX_Dyn Elf64_Dyn +# define ELFXX_ST_BIND ELF64_ST_BIND +# define ELFXX_ST_TYPE ELF64_ST_TYPE + +#else +# error "VG_WORDSIZE should be 4 or 8" +#endif + + +/*------------------------------------------------------------*/ +/*--- ---*/ +/*--- Read symbol table and line info from ELF files. ---*/ +/*--- ---*/ +/*------------------------------------------------------------*/ + +/* readelf.c parses ELF files and acquires symbol table info from + them. It calls onwards to readdwarf.c to read DWARF2/3 line number + and call frame info found. */ + + +/* Identify an ELF object file by peering at the first few bytes of + it. */ + +Bool ML_(is_elf_object_file)( const void* buf ) +{ + ElfXX_Ehdr* ehdr = (ElfXX_Ehdr*)buf; + Int ok = 1; + + ok &= (ehdr->e_ident[EI_MAG0] == 0x7F + && ehdr->e_ident[EI_MAG1] == 'E' + && ehdr->e_ident[EI_MAG2] == 'L' + && ehdr->e_ident[EI_MAG3] == 'F'); + ok &= (ehdr->e_ident[EI_CLASS] == VG_ELF_CLASS + && ehdr->e_ident[EI_DATA] == VG_ELF_DATA2XXX + && ehdr->e_ident[EI_VERSION] == EV_CURRENT); + ok &= (ehdr->e_type == ET_EXEC || ehdr->e_type == ET_DYN); + ok &= (ehdr->e_machine == VG_ELF_MACHINE); + ok &= (ehdr->e_version == EV_CURRENT); + ok &= (ehdr->e_shstrndx != SHN_UNDEF); + ok &= (ehdr->e_shoff != 0 && ehdr->e_shnum != 0); + ok &= (ehdr->e_phoff != 0 && ehdr->e_phnum != 0); + + if (ok) + return True; + else + return False; +} + + +/* Show a raw ELF symbol, given its in-image address and name. */ + +static +void show_raw_elf_symbol ( Int i, + ElfXX_Sym* sym, Char* sym_name, Addr sym_addr, + Bool ppc64_linux_format ) +{ + HChar* space = ppc64_linux_format ? " " : ""; + VG_(printf)("raw symbol [%4d]: ", i); + switch (ELFXX_ST_BIND(sym->st_info)) { + case STB_LOCAL: VG_(printf)("LOC "); break; + case STB_GLOBAL: VG_(printf)("GLO "); break; + case STB_WEAK: VG_(printf)("WEA "); break; + case STB_LOPROC: VG_(printf)("lop "); break; + case STB_HIPROC: VG_(printf)("hip "); break; + default: VG_(printf)("??? "); break; + } + switch (ELFXX_ST_TYPE(sym->st_info)) { + case STT_NOTYPE: VG_(printf)("NOT "); break; + case STT_OBJECT: VG_(printf)("OBJ "); break; + case STT_FUNC: VG_(printf)("FUN "); break; + case STT_SECTION: VG_(printf)("SEC "); break; + case STT_FILE: VG_(printf)("FIL "); break; + case STT_LOPROC: VG_(printf)("lop "); break; + case STT_HIPROC: VG_(printf)("hip "); break; + default: VG_(printf)("??? "); break; + } + VG_(printf)(": val %010p, %ssz %4d %s\n", + sym_addr, space, sym->st_size, + ( sym->st_name ? sym_name : (Char*)"NONAME" ) ); +} + + +/* Decide whether SYM is something we should collect, and if so, copy + relevant info to the _OUT arguments. For {x86,amd64,ppc32}-linux + this is straightforward - the name, address, size are copied out + unchanged. + + For ppc64-linux it's more complex. If the symbol is seen to be in + the .opd section, it is taken to be a function descriptor, and so + a dereference is attempted, in order to get hold of the real entry + point address. Also as part of the dereference, there is an attempt + to calculate the TOC pointer (R2 value) associated with the symbol. + + To support the ppc64-linux pre-"dotless" ABI (prior to gcc 4.0.0), + if the symbol is seen to be outside the .opd section and its name + starts with a dot, an .opd deference is not attempted, and no TOC + pointer is calculated, but the the leading dot is removed from the + name. + + As a result, on ppc64-linux, the caller of this function may have + to piece together the real size, address, name of the symbol from + multiple calls to this function. Ugly and confusing. +*/ +static +Bool get_elf_symbol_info ( + /* INPUTS */ + struct _SegInfo* si, /* containing SegInfo */ + ElfXX_Sym* sym, /* ELF symbol */ + Char* sym_name, /* name */ + Addr sym_addr, /* declared address */ + UChar* opd_filea, /* oimage of .opd sec (ppc64-linux only) */ + /* OUTPUTS */ + Char** sym_name_out, /* name we should record */ + Addr* sym_addr_out, /* addr we should record */ + Int* sym_size_out, /* symbol size */ + Addr* sym_tocptr_out, /* ppc64-linux only: R2 value to be + used on entry */ + Bool* from_opd_out /* ppc64-linux only: did we deref an + .opd entry? */ + ) +{ + Bool plausible, is_in_opd; + + /* Set defaults */ + *sym_name_out = sym_name; + *sym_addr_out = sym_addr; + *sym_size_out = (Int)sym->st_size; + *sym_tocptr_out = 0; /* unknown/inapplicable */ + *from_opd_out = False; + + /* Figure out if we're interested in the symbol. Firstly, is it of + the right flavour? */ + plausible + = (ELFXX_ST_BIND(sym->st_info) == STB_GLOBAL + || ELFXX_ST_BIND(sym->st_info) == STB_LOCAL + || ELFXX_ST_BIND(sym->st_info) == STB_WEAK + ) + && + (ELFXX_ST_TYPE(sym->st_info) == STT_FUNC + || (VG_(needs).data_syms + && ELFXX_ST_TYPE(sym->st_info) == STT_OBJECT) + ); + +# if defined(VGP_ppc64_linux) + /* Allow STT_NOTYPE in the very special case where we're running on + ppc64-linux and the symbol is one which the .opd-chasing hack + below will chase. */ + if (!plausible + && ELFXX_ST_TYPE(sym->st_info) == STT_NOTYPE + && sym->st_size > 0 + && si->opd_start_vma != 0 + && sym_addr >= si->opd_start_vma + && sym_addr < si->opd_start_vma + si->opd_size) + plausible = True; +# endif + + if (!plausible) + return False; + + /* Ignore if nameless, or zero-sized. */ + if (sym->st_name == (ElfXX_Word)NULL + || /* VG_(strlen)(sym_name) == 0 */ + /* equivalent but cheaper ... */ + sym_name[0] == 0 + || sym->st_size == 0) { + TRACE_SYMTAB(" ignore -- size=0: %s\n", sym_name); + return False; + } + + /* This seems to significantly reduce the number of junk + symbols, and particularly reduces the number of + overlapping address ranges. Don't ask me why ... */ + if ((Int)sym->st_value == 0) { + TRACE_SYMTAB( " ignore -- valu=0: %s\n", sym_name); + return False; + } + + /* If it's apparently in a GOT or PLT, it's really a reference to a + symbol defined elsewhere, so ignore it. */ + if (si->got_start_vma != 0 + && sym_addr >= si->got_start_vma + && sym_addr < si->got_start_vma + si->got_size) { + TRACE_SYMTAB(" ignore -- in GOT: %s\n", sym_name); + return False; + } + if (si->plt_start_vma != 0 + && sym_addr >= si->plt_start_vma + && sym_addr < si->plt_start_vma + si->plt_size) { + TRACE_SYMTAB(" ignore -- in PLT: %s\n", sym_name); + return False; + } + + /* ppc64-linux nasty hack: if the symbol is in an .opd section, + then really what we have is the address of a function + descriptor. So use the first word of that as the function's + text. + + See thread starting at + http://gcc.gnu.org/ml/gcc-patches/2004-08/msg00557.html + */ + is_in_opd = False; + + if (si->opd_start_vma != 0 + && sym_addr >= si->opd_start_vma + && sym_addr < si->opd_start_vma + si->opd_size) { +# if !defined(VGP_ppc64_linux) + TRACE_SYMTAB(" ignore -- in OPD: %s\n", sym_name); + return False; +# else + Int offset_in_opd; + ULong* fn_descr; + + if (0) VG_(printf)("opdXXX: si->offset %p, sym_addr %p\n", + (void*)(si->offset), (void*)sym_addr); + + if (!VG_IS_8_ALIGNED(sym_addr)) { + TRACE_SYMTAB(" ignore -- not 8-aligned: %s\n", sym_name); + return False; + } + + /* sym_addr is a vma pointing into the .opd section. We know + the vma of the opd section start, so we can figure out how + far into the opd section this is. */ + + offset_in_opd = (Addr)sym_addr - (Addr)(si->opd_start_vma); + if (offset_in_opd < 0 || offset_in_opd >= si->opd_size) { + TRACE_SYMTAB(" ignore -- invalid OPD offset: %s\n", sym_name); + return False; + } + + /* Now we want to know what's at that offset in the .opd + section. We can't look in the running image since it won't + necessarily have been mapped. But we can consult the oimage. + opd_filea is the start address of the .opd in the oimage. + Hence: */ + + fn_descr = (ULong*)(opd_filea + offset_in_opd); + + if (0) VG_(printf)("opdXXY: offset %d, fn_descr %p\n", + offset_in_opd, fn_descr); + if (0) VG_(printf)("opdXXZ: *fn_descr %p\n", (void*)(fn_descr[0])); + + sym_addr = fn_descr[0]; + + /* Hopefully sym_addr is now an offset into the text section. + Problem is, where did the text section get mapped? Well, + this SegInfo (si) exists because a text section got mapped, + and it got mapped to si->start. Hence add si->start to the + sym_addr to get the real vma. */ + + sym_addr += si->offset; + *sym_addr_out = sym_addr; + *sym_tocptr_out = fn_descr[1] + si->offset; + *from_opd_out = True; + is_in_opd = True; + + /* Do a final sanity check: if the symbol falls outside the + SegInfo's mapped range, ignore it. Since sym_addr has been + updated, that can be achieved simply by falling through to + the test below. */ + +# endif /* ppc64-linux nasty hack */ + } + + /* Here's yet another ppc64-linux hack. Get rid of leading dot if + the symbol is outside .opd. */ +# if defined(VGP_ppc64_linux) + if (si->opd_start_vma != 0 + && !is_in_opd + && sym_name[0] == '.') { + vg_assert(!(*from_opd_out)); + *sym_name_out = &sym_name[1]; + } +# endif + + /* If no part of the symbol falls within the mapped range, + ignore it. */ + if (*sym_addr_out + *sym_size_out <= si->start + || *sym_addr_out >= si->start+si->size) { + TRACE_SYMTAB( " ignore -- outside mapped range\n" ); + return False; + } + +# if defined(VGP_ppc64_linux) + /* It's crucial that we never add symbol addresses in the .opd + section. This would completely mess up function redirection and + intercepting. This assert ensures that any symbols that make it + into the symbol table on ppc64-linux don't point into .opd. */ + if (si->opd_start_vma != 0) { + vg_assert(*sym_addr_out + *sym_size_out <= si->opd_start_vma + || *sym_addr_out >= si->opd_start_vma + si->opd_size); + } +# endif + + /* Acquire! */ + return True; +} + + +/* Read an ELF symbol table (normal or dynamic). This one is for the + "normal" case ({x86,amd64,ppc32}-linux). */ +static +__attribute__((unused)) /* not referred to on all targets */ +void read_elf_symtab__normal( + struct _SegInfo* si, Char* tab_name, + ElfXX_Sym* o_symtab, UInt o_symtab_sz, + UChar* o_strtab, UInt o_strtab_sz, + UChar* opd_filea /* ppc64-linux only */ + ) +{ + Int i; + Addr sym_addr, sym_addr_really; + Char *sym_name, *sym_name_really; + Int sym_size; + Addr sym_tocptr; + Bool from_opd; + DiSym risym; + ElfXX_Sym *sym; + + if (o_strtab == NULL || o_symtab == NULL) { + Char buf[80]; + vg_assert(VG_(strlen)(tab_name) < 40); + VG_(sprintf)(buf, " object doesn't have a %s", tab_name); + ML_(symerr)(buf); + return; + } + + TRACE_SYMTAB("\nReading (ELF, standard) %s (%d entries)\n", tab_name, + o_symtab_sz/sizeof(ElfXX_Sym) ); + + /* Perhaps should start at i = 1; ELF docs suggest that entry + 0 always denotes 'unknown symbol'. */ + for (i = 1; i < (Int)(o_symtab_sz/sizeof(ElfXX_Sym)); i++) { + sym = & o_symtab[i]; + sym_name = (Char*)(o_strtab + sym->st_name); + sym_addr = si->offset + sym->st_value; + + if (VG_(clo_trace_symtab)) + show_raw_elf_symbol(i, sym, sym_name, sym_addr, False); + + if (get_elf_symbol_info(si, sym, sym_name, sym_addr, opd_filea, + &sym_name_really, + &sym_addr_really, + &sym_size, + &sym_tocptr, + &from_opd)) { + + risym.addr = sym_addr_really; + risym.size = sym_size; + risym.name = ML_(addStr) ( si, sym_name_really, -1 ); + risym.tocptr = sym_tocptr; + vg_assert(risym.name != NULL); + vg_assert(risym.tocptr == 0); /* has no role except on ppc64-linux */ + ML_(addSym) ( si, &risym ); + + if (VG_(clo_trace_symtab)) { + VG_(printf)(" record [%4d]: " + " val %010p, sz %4d %s\n", + i, (void*)risym.addr, (Int)risym.size, + (HChar*)risym.name + ); + } + + } + } +} + + +/* Read an ELF symbol table (normal or dynamic). This one is for + ppc64-linux, which requires special treatment. */ + +typedef + struct { + Addr addr; + UChar* name; + } + TempSymKey; + +typedef + struct { + TempSymKey key; + Addr tocptr; + Int size; + Bool from_opd; + } + TempSym; + +static Word cmp_TempSymKey ( TempSymKey* key1, TempSym* elem2 ) { + if (key1->addr < elem2->key.addr) return -1; + if (key1->addr > elem2->key.addr) return 1; + return (Word)VG_(strcmp)(key1->name, elem2->key.name); +} +static void* oset_malloc ( SizeT szB ) { + return VG_(arena_malloc)(VG_AR_SYMTAB, szB); +} +static void oset_free ( void* p ) { + VG_(arena_free)(VG_AR_SYMTAB, p); +} + +static +__attribute__((unused)) /* not referred to on all targets */ +void read_elf_symtab__ppc64_linux( + struct _SegInfo* si, UChar* tab_name, + ElfXX_Sym* o_symtab, UInt o_symtab_sz, + UChar* o_strtab, UInt o_strtab_sz, + UChar* opd_filea /* ppc64-linux only */ + ) +{ + Int i, old_size; + Addr sym_addr, sym_addr_really; + Char *sym_name, *sym_name_really; + Int sym_size; + Addr sym_tocptr, old_tocptr; + Bool from_opd, modify_size, modify_tocptr; + DiSym risym; + ElfXX_Sym *sym; + OSet *oset; + TempSymKey key; + TempSym *elem; + TempSym *prev; + + if (o_strtab == NULL || o_symtab == NULL) { + Char buf[80]; + vg_assert(VG_(strlen)(tab_name) < 40); + VG_(sprintf)(buf, " object doesn't have a %s", tab_name); + ML_(symerr)(buf); + return; + } + + TRACE_SYMTAB("\nReading (ELF, ppc64-linux) %s (%d entries)\n", tab_name, + o_symtab_sz/sizeof(ElfXX_Sym) ); + + oset = VG_(OSet_Create)( offsetof(TempSym,key), + (OSetCmp_t)cmp_TempSymKey, + oset_malloc, oset_free ); + vg_assert(oset); + + /* Perhaps should start at i = 1; ELF docs suggest that entry + 0 always denotes 'unknown symbol'. */ + for (i = 1; i < (Int)(o_symtab_sz/sizeof(ElfXX_Sym)); i++) { + sym = & o_symtab[i]; + sym_name = (Char*)(o_strtab + sym->st_name); + sym_addr = si->offset + sym->st_value; + + if (VG_(clo_trace_symtab)) + show_raw_elf_symbol(i, sym, sym_name, sym_addr, True); + + if (get_elf_symbol_info(si, sym, sym_name, sym_addr, opd_filea, + &sym_name_really, + &sym_addr_really, + &sym_size, + &sym_tocptr, + &from_opd)) { + + /* Check if we've seen this (name,addr) key before. */ + key.addr = sym_addr_really; + key.name = sym_name_really; + prev = VG_(OSet_Lookup)( oset, &key ); + + if (prev) { + + /* Seen it before. Fold in whatever new info we can. */ + modify_size = False; + modify_tocptr = False; + old_size = 0; + old_tocptr = 0; + + if (prev->from_opd && !from_opd + && (prev->size == 24 || prev->size == 16) + && sym_size != prev->size) { + /* Existing one is an opd-redirect, with a bogus size, + so the only useful new fact we have is the real size + of the symbol. */ + modify_size = True; + old_size = prev->size; + prev->size = sym_size; + } + else + if (!prev->from_opd && from_opd + && (sym_size == 24 || sym_size == 16)) { + /* Existing one is non-opd, new one is opd. What we + can acquire from the new one is the TOC ptr to be + used. Since the existing sym is non-toc, it + shouldn't currently have an known TOC ptr. */ + vg_assert(prev->tocptr == 0); + modify_tocptr = True; + old_tocptr = prev->tocptr; + prev->tocptr = sym_tocptr; + } + else { + /* ignore. can we do better here? */ + } + + /* Only one or the other is possible (I think) */ + vg_assert(!(modify_size && modify_tocptr)); + + if (modify_size && VG_(clo_trace_symtab)) { + VG_(printf)(" modify (old sz %4d) " + " val %010p, toc %010p, sz %4d %s\n", + old_size, + (void*) prev->key.addr, + (void*) prev->tocptr, + (Int) prev->size, + (HChar*)prev->key.name + ); + } + if (modify_tocptr && VG_(clo_trace_symtab)) { + VG_(printf)(" modify (upd tocptr) " + " val %010p, toc %010p, sz %4d %s\n", + (void*) prev->key.addr, + (void*) prev->tocptr, + (Int) prev->size, + (HChar*)prev->key.name + ); + } + + } else { + + /* A new (name,addr) key. Add and continue. */ + elem = VG_(OSet_AllocNode)(oset, sizeof(TempSym)); + vg_assert(elem); + elem->key = key; + elem->tocptr = sym_tocptr; + elem->size = sym_size; + elem->from_opd = from_opd; + VG_(OSet_Insert)(oset, elem); + if (VG_(clo_trace_symtab)) { + VG_(printf)(" to-oset [%4d]: " + " val %010p, toc %010p, sz %4d %s\n", + i, (void*) elem->key.addr, + (void*) elem->tocptr, + (Int) elem->size, + (HChar*)elem->key.name + ); + } + + } + } + } + + /* All the syms that matter are in the oset. Now pull them out, + build a "standard" symbol table, and nuke the oset. */ + + i = 0; + VG_(OSet_ResetIter)( oset ); + + while ( (elem = VG_(OSet_Next)(oset)) ) { + risym.addr = elem->key.addr; + risym.size = elem->size; + risym.name = ML_(addStr) ( si, elem->key.name, -1 ); + risym.tocptr = elem->tocptr; + vg_assert(risym.name != NULL); + + ML_(addSym) ( si, &risym ); + if (VG_(clo_trace_symtab)) { + VG_(printf)(" record [%4d]: " + " val %010p, toc %010p, sz %4d %s\n", + i, (void*) risym.addr, + (void*) risym.tocptr, + (Int) risym.size, + (HChar*)risym.name + ); + } + i++; + } + + VG_(OSet_Destroy)( oset, NULL ); +} + + +/* + * This routine for calculating the CRC for a separate debug file + * is GPLed code borrowed from GNU binutils. + */ +static UInt +calc_gnu_debuglink_crc32(UInt crc, const UChar *buf, Int len) +{ + static const UInt crc32_table[256] = + { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, + 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, + 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, + 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856, + 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, + 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, + 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, + 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a, + 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, + 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, + 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, + 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e, + 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, + 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, + 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, + 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, + 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, + 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010, + 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, + 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, + 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, + 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344, + 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, + 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, + 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, + 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c, + 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, + 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, + 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, + 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c, + 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, + 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, + 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278, + 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, + 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, + 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, + 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, + 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, + 0x2d02ef8d + }; + const UChar *end; + + crc = ~crc & 0xffffffff; + for (end = buf + len; buf < end; ++ buf) + crc = crc32_table[(crc ^ *buf) & 0xff] ^ (crc >> 8); + return ~crc & 0xffffffff;; +} + +/* + * Try and open a separate debug file, ignoring any where the CRC does + * not match the value from the main object file. + */ +static +Addr open_debug_file( Char* name, UInt crc, UInt* size ) +{ + SysRes fd, sres; + struct vki_stat stat_buf; + UInt calccrc; + + fd = VG_(open)(name, VKI_O_RDONLY, 0); + if (fd.isError) + return 0; + + if (VG_(fstat)(fd.val, &stat_buf) != 0) { + VG_(close)(fd.val); + return 0; + } + + if (VG_(clo_verbosity) > 1) + VG_(message)(Vg_DebugMsg, "Reading debug info from %s...", name); + + *size = stat_buf.st_size; + + sres = VG_(am_mmap_file_float_valgrind) + ( *size, VKI_PROT_READ, fd.val, 0 ); + + VG_(close)(fd.val); + + if (sres.isError) + return 0; + + calccrc = calc_gnu_debuglink_crc32(0, (UChar*)sres.val, *size); + if (calccrc != crc) { + SysRes res = VG_(am_munmap_valgrind)(sres.val, *size); + vg_assert(!res.isError); + if (VG_(clo_verbosity) > 1) + VG_(message)(Vg_DebugMsg, "... CRC mismatch (computed %08x wanted %08x)", calccrc, crc); + return 0; + } + + return sres.val; +} + +/* + * Try to find a separate debug file for a given object file. + */ +static +Addr find_debug_file( Char* objpath, Char* debugname, UInt crc, UInt* size ) +{ + Char *objdir = VG_(arena_strdup)(VG_AR_SYMTAB, objpath); + Char *objdirptr; + Char *debugpath; + Addr addr = 0; + + if ((objdirptr = VG_(strrchr)(objdir, '/')) != NULL) + *objdirptr = '\0'; + + debugpath = VG_(arena_malloc)(VG_AR_SYMTAB, VG_(strlen)(objdir) + VG_(strlen)(debugname) + 16); + + VG_(sprintf)(debugpath, "%s/%s", objdir, debugname); + + if ((addr = open_debug_file(debugpath, crc, size)) == 0) { + VG_(sprintf)(debugpath, "%s/.debug/%s", objdir, debugname); + if ((addr = open_debug_file(debugpath, crc, size)) == 0) { + VG_(sprintf)(debugpath, "/usr/lib/debug%s/%s", objdir, debugname); + addr = open_debug_file(debugpath, crc, size); + } + } + + VG_(arena_free)(VG_AR_SYMTAB, debugpath); + VG_(arena_free)(VG_AR_SYMTAB, objdir); + + return addr; +} + + +/* The central function for reading ELF debug info. For the + object/exe specified by the SegInfo, find ELF sections, then read + the symbols, line number info, file name info, CFA (stack-unwind + info) and anything else we want, into the tables within the + supplied SegInfo. +*/ +Bool ML_(read_elf_debug_info) ( struct _SegInfo* si ) +{ + Bool res; + ElfXX_Ehdr* ehdr; /* The ELF header */ + ElfXX_Shdr* shdr; /* The section table */ + UChar* sh_strtab; /* The section table's string table */ + SysRes fd, sres; + Int i; + Bool ok; + Addr oimage; + UInt n_oimage; + Addr dimage = 0; + UInt n_dimage = 0; + struct vki_stat stat_buf; + + oimage = (Addr)NULL; + if (VG_(clo_verbosity) > 1 || VG_(clo_trace_redir)) + VG_(message)(Vg_DebugMsg, "Reading syms from %s (%p)", + si->filename, si->start ); + + /* mmap the object image aboard, so that we can read symbols and + line number info out of it. It will be munmapped immediately + thereafter; it is only aboard transiently. */ + + fd = VG_(stat)(si->filename, &stat_buf); + if (fd.isError) { + ML_(symerr)("Can't stat .so/.exe (to determine its size)?!"); + return False; + } + n_oimage = stat_buf.st_size; + + fd = VG_(open)(si->filename, VKI_O_RDONLY, 0); + if (fd.isError) { + ML_(symerr)("Can't open .so/.exe to read symbols?!"); + return False; + } + + sres = VG_(am_mmap_file_float_valgrind) + ( n_oimage, VKI_PROT_READ, fd.val, 0 ); + + VG_(close)(fd.val); + + if (sres.isError) { + VG_(message)(Vg_UserMsg, "warning: mmap failed on %s", si->filename ); + VG_(message)(Vg_UserMsg, " no symbols or debug info loaded" ); + return False; + } + + oimage = sres.val; + + /* Ok, the object image is safely in oimage[0 .. n_oimage-1]. + Now verify that it is a valid ELF .so or executable image. + */ + res = False; + ok = (n_oimage >= sizeof(ElfXX_Ehdr)); + ehdr = (ElfXX_Ehdr*)oimage; + + if (ok) + ok &= ML_(is_elf_object_file)(ehdr); + + if (!ok) { + ML_(symerr)("Invalid ELF header, or missing stringtab/sectiontab."); + goto out; + } + + /* Walk the LOAD headers in the phdr and update the SegInfo to + include them all, so that this segment also contains data and + bss memory. Also computes correct symbol offset value for this + ELF file. */ + if (ehdr->e_phoff + ehdr->e_phnum*sizeof(ElfXX_Phdr) > n_oimage) { + ML_(symerr)("ELF program header is beyond image end?!"); + goto out; + } + { + Bool offset_set = False; + ElfXX_Addr prev_addr = 0; + Addr baseaddr = 0; + + si->offset = 0; + + vg_assert(si->soname == NULL); + + for (i = 0; i < ehdr->e_phnum; i++) { + ElfXX_Phdr *o_phdr; + ElfXX_Addr mapped, mapped_end; + + o_phdr = &((ElfXX_Phdr *)(oimage + ehdr->e_phoff))[i]; + + /* Try to get the soname. If there isn't one, use "NONE". + The seginfo needs to have some kind of soname in order to + facilitate writing redirect functions, since all redirect + specifications require a soname (pattern). */ + if (o_phdr->p_type == PT_DYNAMIC && si->soname == NULL) { + const ElfXX_Dyn *dyn = (const ElfXX_Dyn *)(oimage + o_phdr->p_offset); + Int stroff = -1; + Char *strtab = NULL; + Int j; + + for(j = 0; dyn[j].d_tag != DT_NULL; j++) { + switch(dyn[j].d_tag) { + case DT_SONAME: + stroff = dyn[j].d_un.d_val; + break; + + case DT_STRTAB: + strtab = (Char *)oimage + dyn[j].d_un.d_ptr - baseaddr; + break; + } + } + + if (stroff != -1 && strtab != 0) { + TRACE_SYMTAB("soname=%s\n", strtab+stroff); + si->soname = VG_(arena_strdup)(VG_AR_SYMTAB, strtab+stroff); + } + } + + if (o_phdr->p_type != PT_LOAD) + continue; + + if (!offset_set) { + offset_set = True; + si->offset = si->start - o_phdr->p_vaddr; + baseaddr = o_phdr->p_vaddr; + } + + // Make sure the Phdrs are in order + if (o_phdr->p_vaddr < prev_addr) { + ML_(symerr)("ELF Phdrs are out of order!?"); + goto out; + } + prev_addr = o_phdr->p_vaddr; + + // Get the data and bss start/size if appropriate + mapped = o_phdr->p_vaddr + si->offset; + mapped_end = mapped + o_phdr->p_memsz; + if (si->data_start_vma == 0 && + (o_phdr->p_flags & (PF_R|PF_W|PF_X)) == (PF_R|PF_W)) { + si->data_start_vma = mapped; + si->data_size = o_phdr->p_filesz; + si->bss_start_vma = mapped + o_phdr->p_filesz; + if (o_phdr->p_memsz > o_phdr->p_filesz) + si->bss_size = o_phdr->p_memsz - o_phdr->p_filesz; + else + si->bss_size = 0; + } + + mapped = mapped & ~(VKI_PAGE_SIZE-1); + mapped_end = (mapped_end + VKI_PAGE_SIZE - 1) & ~(VKI_PAGE_SIZE-1); + + if (VG_(needs).data_syms && + (mapped >= si->start && mapped <= (si->start+si->size)) && + (mapped_end > (si->start+si->size))) { + UInt newsz = mapped_end - si->start; + if (newsz > si->size) { + if (0) + VG_(printf)("extending mapping %p..%p %d -> ..%p %d\n", + si->start, si->start+si->size, si->size, + si->start+newsz, newsz); + + si->size = newsz; + } + } + } + } + + /* If, after looking at all the program headers, we still didn't + find a soname, add a fake one. */ + if (si->soname == NULL) { + TRACE_SYMTAB("soname(fake)=\"NONE\"\n"); + si->soname = "NONE"; + } + + TRACE_SYMTAB("shoff = %d, shnum = %d, size = %d, n_vg_oimage = %d\n", + ehdr->e_shoff, ehdr->e_shnum, sizeof(ElfXX_Shdr), n_oimage ); + + if (ehdr->e_shoff + ehdr->e_shnum*sizeof(ElfXX_Shdr) > n_oimage) { + ML_(symerr)("ELF section header is beyond image end?!"); + goto out; + } + + shdr = (ElfXX_Shdr*)(oimage + ehdr->e_shoff); + sh_strtab = (UChar*)(oimage + shdr[ehdr->e_shstrndx].sh_offset); + + /* Find interesting sections, read the symbol table(s), read any debug + information */ + { + /* Pointers to start of sections (in the oimage, not in the + running image) */ + UChar* o_strtab = NULL; /* .strtab */ + ElfXX_Sym* o_symtab = NULL; /* .symtab */ + UChar* o_dynstr = NULL; /* .dynstr */ + ElfXX_Sym* o_dynsym = NULL; /* .dynsym */ + Char* debuglink = NULL; /* .gnu_debuglink */ + UChar* stab = NULL; /* .stab (stabs) */ + UChar* stabstr = NULL; /* .stabstr (stabs) */ + UChar* debug_line = NULL; /* .debug_line (dwarf2) */ + UChar* debug_info = NULL; /* .debug_info (dwarf2) */ + UChar* debug_abbv = NULL; /* .debug_abbrev (dwarf2) */ + UChar* debug_str = NULL; /* .debug_str (dwarf2) */ + UChar* dwarf1d = NULL; /* .debug (dwarf1) */ + UChar* dwarf1l = NULL; /* .line (dwarf1) */ + UChar* ehframe = NULL; /* .eh_frame (dwarf2) */ + UChar* opd_filea = NULL; /* .opd (dwarf2, ppc64-linux) */ + UChar* dummy_filea = NULL; + + /* Section sizes, in bytes */ + UInt o_strtab_sz = 0; + UInt o_symtab_sz = 0; + UInt o_dynstr_sz = 0; + UInt o_dynsym_sz = 0; + UInt debuglink_sz = 0; + UInt stab_sz = 0; + UInt stabstr_sz = 0; + UInt debug_line_sz = 0; + UInt debug_info_sz = 0; + UInt debug_abbv_sz = 0; + UInt debug_str_sz = 0; + UInt dwarf1d_sz = 0; + UInt dwarf1l_sz = 0; + UInt ehframe_sz = 0; + + /* Section virtual addresses */ + Addr dummy_vma = 0; + Addr ehframe_vma = 0; + + /* Find all interesting sections */ + + /* What FIND does: it finds the section called SEC_NAME. The + size of it is assigned to SEC_SIZE. The address that it will + appear in the running image is assigned to SEC_VMA (note, + this will be meaningless for sections which are not marked + loadable. Even for sections which are marked loadable, the + client's ld.so may not have loaded them yet, so there is no + guarantee that we can safely prod around in any such area) + The address of the section in the transiently loaded oimage + is assigned to SEC_FILEA. Because the entire object file is + transiently mapped aboard for inspection, it's always safe to + inspect that area. */ + + for (i = 0; i < ehdr->e_shnum; i++) { + +# define FIND(sec_name, sec_size, sec_filea, sec_vma) \ + if (0 == VG_(strcmp)(sec_name, sh_strtab + shdr[i].sh_name)) { \ + Bool nobits; \ + sec_vma = (Addr)(si->offset + shdr[i].sh_addr); \ + sec_filea = (void*)(oimage + shdr[i].sh_offset); \ + sec_size = shdr[i].sh_size; \ + nobits = shdr[i].sh_type == SHT_NOBITS; \ + TRACE_SYMTAB( "%18s: filea %p .. %p, vma %p .. %p\n", \ + sec_name, (UChar*)sec_filea, \ + ((UChar*)sec_filea) + sec_size - 1, \ + sec_vma, sec_vma + sec_size - 1); \ + /* SHT_NOBITS sections have zero size in the file. */ \ + if ( shdr[i].sh_offset + (nobits ? 0 : sec_size) > n_oimage ) { \ + ML_(symerr)(" section beyond image end?!"); \ + goto out; \ + } \ + } + + /* Nb: must find where .got and .plt sections will be in the + * executable image, not in the object image transiently loaded. */ + /* NAME SIZE ADDR_IN_OIMAGE ADDR_WHEN_MAPPED */ + FIND(".dynsym", o_dynsym_sz, o_dynsym, dummy_vma) + FIND(".dynstr", o_dynstr_sz, o_dynstr, dummy_vma) + FIND(".symtab", o_symtab_sz, o_symtab, dummy_vma) + FIND(".strtab", o_strtab_sz, o_strtab, dummy_vma) + + FIND(".gnu_debuglink", debuglink_sz, debuglink, dummy_vma) + + FIND(".stab", stab_sz, stab, dummy_vma) + FIND(".stabstr", stabstr_sz, stabstr, dummy_vma) + + FIND(".debug_line", debug_line_sz, debug_line, dummy_vma) + FIND(".debug_info", debug_info_sz, debug_info, dummy_vma) + FIND(".debug_abbrev", debug_abbv_sz, debug_abbv, dummy_vma) + FIND(".debug_str", debug_str_sz, debug_str, dummy_vma) + + FIND(".debug", dwarf1d_sz, dwarf1d, dummy_vma) + FIND(".line", dwarf1l_sz, dwarf1l, dummy_vma) + FIND(".eh_frame", ehframe_sz, ehframe, ehframe_vma) + + FIND(".got", si->got_size, dummy_filea, si->got_start_vma) + FIND(".plt", si->plt_size, dummy_filea, si->plt_start_vma) + FIND(".opd", si->opd_size, opd_filea, si->opd_start_vma) + +# undef FIND + } + + /* Check some sizes */ + vg_assert((o_dynsym_sz % sizeof(ElfXX_Sym)) == 0); + vg_assert((o_symtab_sz % sizeof(ElfXX_Sym)) == 0); + + /* Did we find a debuglink section? */ + if (debuglink != NULL) { + UInt crc_offset = VG_ROUNDUP(VG_(strlen)(debuglink)+1, 4); + UInt crc; + + vg_assert(crc_offset + sizeof(UInt) <= debuglink_sz); + + /* Extract the CRC from the debuglink section */ + crc = *(UInt *)(debuglink + crc_offset); + + /* See if we can find a matching debug file */ + if ((dimage = find_debug_file(si->filename, debuglink, crc, &n_dimage)) != 0) { + ehdr = (ElfXX_Ehdr*)dimage; + + if (n_dimage >= sizeof(ElfXX_Ehdr) + && ML_(is_elf_object_file(ehdr))) { + shdr = (ElfXX_Shdr*)(dimage + ehdr->e_shoff); + sh_strtab = (UChar*)(dimage + shdr[ehdr->e_shstrndx].sh_offset); + + /* Same deal as previous FIND, except simpler - doesn't + look for vma, only oimage address. */ + + /* Find all interesting sections */ + for (i = 0; i < ehdr->e_shnum; i++) { + +# define FIND(sec_name, sec_size, sec_filea) \ + if (0 == VG_(strcmp)(sec_name, sh_strtab + shdr[i].sh_name)) { \ + Bool nobits; \ + if (0 != sec_filea) \ + VG_(core_panic)("repeated section!\n"); \ + sec_filea = (void*)(dimage + shdr[i].sh_offset); \ + sec_size = shdr[i].sh_size; \ + nobits = shdr[i].sh_type == SHT_NOBITS; \ + TRACE_SYMTAB( "%18s: filea %p .. %p\n", \ + sec_name, (UChar*)sec_filea, \ + ((UChar*)sec_filea) + sec_size - 1); \ + /* SHT_NOBITS sections have zero size in the file. */ \ + if ( shdr[i].sh_offset + (nobits ? 0 : sec_size) > n_dimage ) { \ + ML_(symerr)(" section beyond image end?!"); \ + goto out; \ + } \ + } + + FIND(".stab", stab_sz, stab) + FIND(".stabstr", stabstr_sz, stabstr) + FIND(".debug_line", debug_line_sz, debug_line) + FIND(".debug_info", debug_info_sz, debug_info) + FIND(".debug_abbrev", debug_abbv_sz, debug_abbv) + FIND(".debug_str", debug_str_sz, debug_str) + FIND(".debug", dwarf1d_sz, dwarf1d) + FIND(".line", dwarf1l_sz, dwarf1l) + +# undef FIND + } + } + } + } + + /* Read symbols */ + { + void (*read_elf_symtab)(struct _SegInfo*,Char*,ElfXX_Sym*, + UInt,UChar*,UInt,UChar*); +# if defined(VGP_ppc64_linux) + read_elf_symtab = read_elf_symtab__ppc64_linux; +# else + read_elf_symtab = read_elf_symtab__normal; +# endif + read_elf_symtab(si, "symbol table", + o_symtab, o_symtab_sz, + o_strtab, o_strtab_sz, opd_filea); + + read_elf_symtab(si, "dynamic symbol table", + o_dynsym, o_dynsym_sz, + o_dynstr, o_dynstr_sz, opd_filea); + } + + /* Read .eh_frame (call-frame-info) if any */ + if (ehframe) { + ML_(read_callframe_info_dwarf2) ( si, ehframe, ehframe_sz, ehframe_vma ); + } + + /* Read the stabs and/or dwarf2 debug information, if any. It + appears reading stabs stuff on amd64-linux doesn't work, so + we ignore it. */ +# if !defined(VGP_amd64_linux) + if (stab && stabstr) { + ML_(read_debuginfo_stabs) ( si, stab, stab_sz, + stabstr, stabstr_sz ); + } +# endif + /* jrs 2006-01-01: icc-8.1 has been observed to generate + binaries without debug_str sections. Don't preclude + debuginfo reading for that reason, but, in + read_unitinfo_dwarf2, do check that debugstr is non-NULL + before using it. */ + if (debug_info && debug_abbv && debug_line /* && debug_str */) { + ML_(read_debuginfo_dwarf2) ( si, + debug_info, debug_info_sz, + debug_abbv, + debug_line, debug_line_sz, + debug_str ); + } + if (dwarf1d && dwarf1l) { + ML_(read_debuginfo_dwarf1) ( si, dwarf1d, dwarf1d_sz, + dwarf1l, dwarf1l_sz ); + } + } + res = True; + + out: { + SysRes m_res; + /* Last, but not least, heave the image(s) back overboard. */ + if (dimage) { + m_res = VG_(am_munmap_valgrind) ( dimage, n_dimage ); + vg_assert(!m_res.isError); + } + m_res = VG_(am_munmap_valgrind) ( oimage, n_oimage ); + vg_assert(!m_res.isError); + return res; + } +} + + +/*--------------------------------------------------------------------*/ +/*--- end ---*/ +/*--------------------------------------------------------------------*/ diff --git a/coregrind/m_debuginfo/readstabs.c b/coregrind/m_debuginfo/readstabs.c new file mode 100644 index 0000000000..b82a7d31de --- /dev/null +++ b/coregrind/m_debuginfo/readstabs.c @@ -0,0 +1,377 @@ + +/*--------------------------------------------------------------------*/ +/*--- Read stabs debug info. readstabs.c ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2000-2005 Julian Seward + jseward@acm.org + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ +/* + Stabs reader greatly improved by Nick Nethercote, Apr 02. + This module was also extensively hacked on by Jeremy Fitzhardinge + and Tom Hughes. +*/ + +#include "pub_core_basics.h" +#include "pub_core_debuginfo.h" +#include "pub_core_libcbase.h" +#include "pub_core_libcassert.h" +#include "pub_core_libcprint.h" +#include "pub_core_mallocfree.h" +#include "priv_storage.h" +#include "priv_readstabs.h" /* self */ + +/* --- !!! --- EXTERNAL HEADERS start --- !!! --- */ +#include /* stabs defns */ +/* --- !!! --- EXTERNAL HEADERS end --- !!! --- */ + +/*------------------------------------------------------------*/ +/*--- Read STABS format debug info. ---*/ +/*------------------------------------------------------------*/ + +/* Stabs entry types, from: + * The "stabs" debug format + * Menapace, Kingdon and MacKenzie + * Cygnus Support + */ +typedef enum { N_UNDEF = 0, /* undefined symbol, new stringtab */ + N_GSYM = 32, /* Global symbol */ + N_FUN = 36, /* Function start or end */ + N_STSYM = 38, /* Data segment file-scope variable */ + N_LCSYM = 40, /* BSS segment file-scope variable */ + N_RSYM = 64, /* Register variable */ + N_SLINE = 68, /* Source line number */ + N_SO = 100, /* Source file path and name */ + N_LSYM = 128, /* Stack variable or type */ + N_BINCL = 130, /* Beginning of an include file */ + N_SOL = 132, /* Include file name */ + N_PSYM = 160, /* Function parameter */ + N_EINCL = 162, /* End of an include file */ + N_LBRAC = 192, /* Start of lexical block */ + N_EXCL = 194, /* Placeholder for an include file */ + N_RBRAC = 224 /* End of lexical block */ + } stab_types; + + +/* Read stabs-format debug info. This is all rather horrible because + stabs is a underspecified, kludgy hack. +*/ +void ML_(read_debuginfo_stabs) ( SegInfo* si, + UChar* stabC, Int stab_sz, + UChar* stabstr, Int stabstr_sz ) +{ + const Bool debug = False; + const Bool contdebug = False; + Int i; + Int n_stab_entries; + struct nlist* stab = (struct nlist*)stabC; + UChar *next_stabstr = NULL; + /* state for various things */ + struct { + Addr start; /* start address */ + Addr end; /* end address */ + Int line; /* first line */ + } func = { 0, 0, -1 }; + struct { + Char *name; + Bool same; + } file = { NULL, True }; + struct { + Int prev; /* prev line */ + Int no; /* current line */ + Int ovf; /* line wrap */ + Addr addr; /* start of this line */ + Bool first; /* first line in function */ + } line = { 0, 0, 0, 0, False }; + + /* Ok. It all looks plausible. Go on and read debug data. + stab kinds: 100 N_SO a source file name + 68 N_SLINE a source line number + 36 N_FUN start of a function + + In this loop, we maintain a current file name, updated as + N_SO/N_SOLs appear, and a current function base address, + updated as N_FUNs appear. Based on that, address ranges for + N_SLINEs are calculated, and stuffed into the line info table. + + Finding the instruction address range covered by an N_SLINE is + complicated; see the N_SLINE case below. + */ + file.name = ML_(addStr)(si,"???", -1); + + n_stab_entries = stab_sz/(int)sizeof(struct nlist); + + for (i = 0; i < n_stab_entries; i++) { + const struct nlist *st = &stab[i]; + Char *string; + + if (debug && 1) { + VG_(printf) ( "%2d type=%d othr=%d desc=%d value=0x%x strx=%d %s\n", i, + st->n_type, st->n_other, st->n_desc, + (int)st->n_value, + (int)st->n_un.n_strx, + stabstr + st->n_un.n_strx ); + } + + /* handle continued string stabs */ + { + Int qbuflen = 0; + Int qidx = 0; + Char* qbuf = NULL; + Int qlen; + Bool qcontinuing = False; + UInt qstringidx; + + qstringidx = st->n_un.n_strx; + string = stabstr + qstringidx; + qlen = VG_(strlen)(string); + + while (string + && qlen > 0 + && (qcontinuing || string[qlen-1] == '\\')) { + /* Gak, we have a continuation. Skip forward through + subsequent stabs to gather all the parts of the + continuation. Increment i, but keep st pointing at + current stab. */ + + qcontinuing = string[qlen-1] == '\\'; + + /* remove trailing \ */ + while (string[qlen-1] == '\\' && qlen > 0) + qlen--; + + if (contdebug) + VG_(printf)("found extension string: \"%s\" " + "len=%d(%c) idx=%d buflen=%d\n", + string, qlen, string[qlen-1], qidx, qbuflen); + + /* XXX this is silly. The si->strtab should have a way of + appending to the last added string... */ + if ((qidx + qlen) >= qbuflen) { + Char *n; + + if (qbuflen == 0) + qbuflen = 16; + while ((qidx + qlen) >= qbuflen) + qbuflen *= 2; + n = VG_(arena_malloc)(VG_AR_SYMTAB, qbuflen); + VG_(memcpy)(n, qbuf, qidx); + + if (qbuf != NULL) + VG_(arena_free)(VG_AR_SYMTAB, qbuf); + qbuf = n; + } + + VG_(memcpy)(&qbuf[qidx], string, qlen); + qidx += qlen; + if (contdebug) { + qbuf[qidx] = '\0'; + VG_(printf)("working buf=\"%s\"\n", qbuf); + } + + i++; + if (i >= n_stab_entries) + break; + + if (stab[i].n_un.n_strx) { + string = stabstr + stab[i].n_un.n_strx; + qlen = VG_(strlen)(string); + } else { + string = NULL; + qlen = 0; + } + } + + if (qbuf != NULL) { + i--; /* overstepped */ + string = ML_(addStr)(si, qbuf, qidx); + VG_(arena_free)(VG_AR_SYMTAB, qbuf); + if (contdebug) + VG_(printf)("made composite: \"%s\"\n", string); + } + } + + switch(st->n_type) { + case N_UNDEF: + /* new string table base */ + if (next_stabstr != NULL) { + stabstr_sz -= next_stabstr - stabstr; + stabstr = next_stabstr; + if (stabstr_sz <= 0) { + VG_(printf)(" @@ bad stabstr size %d\n", stabstr_sz); + return; + } + } + next_stabstr = stabstr + st->n_value; + break; + + case N_BINCL: { + break; + } + + case N_EINCL: + break; + + case N_EXCL: + break; + + case N_SOL: /* sub-source (include) file */ + if (line.ovf != 0) + VG_(message)(Vg_UserMsg, + "Warning: file %s is very big (> 65535 lines) " + "Line numbers and annotation for this file might " + "be wrong. Sorry", + file.name); + /* FALLTHROUGH */ + + case N_SO: { /* new source file */ + UChar *nm = string; + UInt len = VG_(strlen)(nm); + Addr addr = func.start + st->n_value; + + if (line.addr != 0) { + /* finish off previous line */ + ML_(addLineInfo)(si, file.name, NULL, line.addr, + addr, line.no + line.ovf * LINENO_OVERFLOW, i); + } + + /* reset line state */ + line.ovf = 0; + line.addr = 0; + line.prev = 0; + line.no = 0; + + if (len > 0 && nm[len-1] != '/') { + file.name = ML_(addStr)(si, nm, -1); + if (debug) + VG_(printf)("new source: %s\n", file.name); + } else if (len == 0) + file.name = ML_(addStr)(si, "?1\0", -1); + + break; + } + + case N_SLINE: { /* line info */ + Addr addr = func.start + st->n_value; + + if (line.addr != 0) { + /* there was a previous */ + ML_(addLineInfo)(si, file.name, NULL, line.addr, + addr, line.no + line.ovf * LINENO_OVERFLOW, i); + } + + line.addr = addr; + line.prev = line.no; + line.no = (Int)((UShort)st->n_desc); + + if (line.prev > line.no + OVERFLOW_DIFFERENCE && file.same) { + VG_(message)(Vg_DebugMsg, + "Line number overflow detected (%d --> %d) in %s", + line.prev, line.no, file.name); + line.ovf++; + } + file.same = True; + + /* This is pretty horrible. If this is the first line of + the function, then bind any unbound symbols to the arg + scope, since they're probably arguments. */ + if (line.first) { + line.first = False; + + /* remember first line of function */ + if (func.start != 0) { + func.line = line.no; + } + } + break; + } + + case N_FUN: { /* function start/end */ + Addr addr = 0; /* end address for prev line/scope */ + Bool newfunc = False; + + /* if this the end of the function or we haven't + previously finished the previous function... */ + if (*string == '\0' || func.start != 0) { + /* end of function */ + newfunc = False; + line.first = False; + + /* end line at end of function */ + addr = func.start + st->n_value; + + /* now between functions */ + func.start = 0; + + // XXXX DEAD POINT XXXX + } + + if (*string != '\0') { + /* new function */ + newfunc = True; + line.first = True; + + /* line ends at start of next function */ + addr = si->offset + st->n_value; + + func.start = addr; + } + + if (line.addr) { + ML_(addLineInfo)(si, file.name, NULL, line.addr, + addr, line.no + line.ovf * LINENO_OVERFLOW, i); + line.addr = 0; + } + + //DEAD POINT + //DEAD POINT + break; + } + + case N_LBRAC: { + /* open new scope */ + // DEAD POINT + break; + } + + case N_RBRAC: { + /* close scope */ + // DEAD POINT + break; + } + + case N_GSYM: /* global variable */ + case N_STSYM: /* static in data segment */ + case N_LCSYM: /* static in bss segment */ + case N_PSYM: /* function parameter */ + case N_LSYM: /* stack variable */ + case N_RSYM: /* register variable */ + break; + } + } +} + +/*--------------------------------------------------------------------*/ +/*--- end ---*/ +/*--------------------------------------------------------------------*/ diff --git a/coregrind/m_debuginfo/stabs.c b/coregrind/m_debuginfo/stabs.c deleted file mode 100644 index 4727f17a9d..0000000000 --- a/coregrind/m_debuginfo/stabs.c +++ /dev/null @@ -1,1725 +0,0 @@ - -/*--------------------------------------------------------------------*/ -/*--- Read stabs debug info. stabs.c ---*/ -/*--------------------------------------------------------------------*/ - -/* - This file is part of Valgrind, a dynamic binary instrumentation - framework. - - Copyright (C) 2000-2005 Julian Seward - jseward@acm.org - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file COPYING. -*/ - -#include "pub_core_basics.h" -#include "pub_core_debuginfo.h" -#include "pub_core_libcbase.h" -#include "pub_core_libcassert.h" -#include "pub_core_libcprint.h" -#include "pub_core_mallocfree.h" - -#include "priv_symtypes.h" -#include "priv_symtab.h" - -#include /* stabs defns */ - -/*------------------------------------------------------------*/ -/*--- Read STABS format debug info. ---*/ -/*------------------------------------------------------------*/ - -/* Stabs entry types, from: - * The "stabs" debug format - * Menapace, Kingdon and MacKenzie - * Cygnus Support - */ -typedef enum { N_UNDEF = 0, /* undefined symbol, new stringtab */ - N_GSYM = 32, /* Global symbol */ - N_FUN = 36, /* Function start or end */ - N_STSYM = 38, /* Data segment file-scope variable */ - N_LCSYM = 40, /* BSS segment file-scope variable */ - N_RSYM = 64, /* Register variable */ - N_SLINE = 68, /* Source line number */ - N_SO = 100, /* Source file path and name */ - N_LSYM = 128, /* Stack variable or type */ - N_BINCL = 130, /* Beginning of an include file */ - N_SOL = 132, /* Include file name */ - N_PSYM = 160, /* Function parameter */ - N_EINCL = 162, /* End of an include file */ - N_LBRAC = 192, /* Start of lexical block */ - N_EXCL = 194, /* Placeholder for an include file */ - N_RBRAC = 224 /* End of lexical block */ - } stab_types; - - -/* stabs use a two-dimensional numbering scheme for types: the type - number is either of the form name:N or name:(M,N); name may be - empty. N is the type number within a file context; M is the file - number (an object may have multiple files by inclusion). -*/ - -typedef struct _StabType { - Char *str; /* string as it appears in file */ - SymType *type; /* our type info */ -} StabType; - -typedef struct _StabFile { - StabType *types; - Int ntypes; - UInt fileidx; /* for reference, idx of creation */ -} StabFile; - -typedef struct _StabTypeTab { - StabFile **files; - Int nfiles; - - /* List of structure tag names, used for mapping them to actual - definitions of the structures. There should really be one of - these per object and a global one to cope with cross-object - references. */ - struct structlist { - Char *name; - Bool isstruct; /* struct (or union) */ - SymType *type; /* reference */ - struct structlist *next; - } *structlist; - -#define HEADER_HASHSZ 53 - struct header { - Char *filename; /* header file name */ - StabFile *types; /* types for that header */ - UInt instance; /* instance */ - struct header *next; - } *headerhash[HEADER_HASHSZ]; -} StabTypeTab; - -static const Bool stabs_debug = False; - -static UInt header_hash(Char *filename, UInt instance) -{ - Char *cp; - UInt hash = 0; - - for(cp = filename; *cp; cp++) { - hash += *cp; - hash = (hash << 17) | (hash >> (32-17)); - } - hash += instance; - - return hash % HEADER_HASHSZ; -} - -/* Look up a struct/union tag name in table, and return reference to - existing type, or create a new tag entry. - XXX make this a proper data structure -*/ -static SymType *structRef(StabTypeTab *tab, SymType *def, Bool isstruct, Char *name) -{ - const Bool debug = False || stabs_debug; - struct structlist *sl; - SymType *ty; - static Int warnlen = 0; - Int len = 0; - - for(sl = tab->structlist; sl != NULL; sl = sl->next) { - len++; - - if (isstruct == sl->isstruct && VG_(strcmp)(name, sl->name) == 0) { - if (debug) - VG_(printf)("found %s ref for %s\n", - isstruct ? "struct" : "union", name); - return sl->type; - } - } - - if (debug && (len > warnlen*2)) { - warnlen = len; - VG_(printf)("struct ref list reached %d entries\n", len); - } - - sl = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof(*sl)); - if (isstruct) - ty = ML_(st_mkstruct)(def, 0, 0); - else - ty = ML_(st_mkunion)(def, 0, 0); - - ML_(st_setname)(ty, name); - sl->isstruct = isstruct; - sl->type = ty; - sl->name = name; - sl->next = tab->structlist; - tab->structlist = sl; - - if (debug) - VG_(printf)("created %s ref for %s = %p\n", - isstruct ? "struct" : "union", name, ty); - - return ty; -} - -/* Add a structural defintion for a struct/union reference */ -static SymType *structDef(StabTypeTab *tab, SymType *def, Bool isstruct, Char *name) -{ - const Bool debug = False || stabs_debug; - SymType *ref = structRef(tab, NULL, isstruct, name); - - /* it seems that GNAT likes to declare names as both struct tags - and typedefs so check we aren't about to make a structure a - reference to itself as that will create a loop */ - if (ref == def) { - if (debug) - VG_(printf)("ignoring %s self ref for %s %p -> %p\n", - isstruct ? "struct" : "union", name, ref, def); - } - else { - if (debug) - VG_(printf)("defining %s ref for %s %p -> %p\n", - isstruct ? "struct" : "union", name, ref, def); - - def = ML_(st_mktypedef)(ref, name, ML_(st_basetype)(def, False)); - } - ML_(st_setname)(def, name); - return def; -} - -static StabFile *getStabFile(StabTypeTab *tab, Int file, StabFile *set) -{ - StabFile *sf; - file++; /* file == -1 -> no file */ - - if (file < 0) - return NULL; - - if (file >= tab->nfiles) { - UInt i; - StabFile **n = VG_(arena_malloc)(VG_AR_SYMTAB, (file+1) * sizeof(*n)); - - for(i = 0; i <= file; i++) { - if (i < tab->nfiles) - n[i] = tab->files[i]; - else { - n[i] = NULL; - } - } - - if (tab->files != NULL) - VG_(arena_free)(VG_AR_SYMTAB, tab->files); - - tab->files = n; - tab->nfiles = file+1; - } - - if (set != NULL) - tab->files[file] = set; - - if (tab->files[file] == NULL) { - sf = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof(*sf)); - tab->files[file] = sf; - sf->types = NULL; - sf->ntypes = 0; - sf->fileidx = file - 1; /* compensate for file++ above */ - } - - sf = tab->files[file]; - - return sf; -} - -/* add a new index for a file */ -static void addFileAlias(StabTypeTab *tab, Char *filename, UInt instance, Int idx) -{ - const Bool debug = False || stabs_debug; - struct header *hp; - - for(hp = tab->headerhash[header_hash(filename, instance)]; hp != NULL; hp = hp->next) { - if (hp->instance == instance && VG_(strcmp)(filename, hp->filename) == 0) { - if (debug) - VG_(printf)("adding alias for \"%s\"/%d fileidx %d to fileidx %d\n", - filename, instance, idx, hp->types->fileidx); - getStabFile(tab, idx, hp->types); - return; - } - } - - VG_(printf)("Couldn't find previous reference to \"%s\"/%d for fileidx %d\n", - filename, instance, idx); -} - -static void addHeader(StabTypeTab *tab, Char *filename, UInt instance, Int idx) -{ - const Bool debug = False || stabs_debug; - struct header *hp, **bucket; - - if (debug) - VG_(printf)("adding new header %s/%d fileidx %d\n", filename, instance, idx); - - bucket = &tab->headerhash[header_hash(filename, instance)]; - - hp = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof(*hp)); - hp->filename = filename; - hp->instance = instance; - hp->types = getStabFile(tab, idx, NULL); - hp->next = *bucket; - *bucket = hp; -} - -static void clearStabFiles(StabTypeTab *tab) -{ - VG_(arena_free)(VG_AR_SYMTAB, tab->files); - - tab->files = NULL; - tab->nfiles = 0; -} - -static StabType *getStabType(StabTypeTab *tab, Int file, Int sym) -{ - StabFile *sf; - - sf = getStabFile(tab, file, NULL); - - if (sf == NULL || sym < 0) - return NULL; - - if (sym >= sf->ntypes) { - UInt i; - StabType *n = VG_(arena_malloc)(VG_AR_SYMTAB, (sym+1) * sizeof(*n)); - - for(i = 0; i <= sym; i++) { - if (i < sf->ntypes) - n[i] = sf->types[i]; - else { - n[i].str = NULL; - n[i].type = NULL; - } - } - - if (sf->types != NULL) - VG_(arena_free)(VG_AR_SYMTAB, sf->types); - - sf->types = n; - sf->ntypes = sym+1; - } - - return &sf->types[sym]; -} - -static Bool isdigit_base(Char c, Int base, Int *vp) -{ - Bool ret = False; - Int v = 0; - - switch(base) { - case 10: - case 0: - v = c - '0'; - ret = (c >= '0' && c <= '9'); - break; - - case 8: - v = c - '0'; - ret = (c >= '0' && c <= '7'); - break; - - case 16: - if (c >= '0' && c <= '9') { - v = c - '0'; - ret = True; - } else if (c >= 'a' && c <= 'f') { - v = c - 'a'; - ret = True; - } else if (c >= 'A' && c <= 'F') { - v = c - 'F'; - ret = True; - } - break; - } - - if (vp && ret) - *vp = v; - - return ret; -} - -static inline Int getbase(Char **pp) -{ - Char *p = *pp; - Int base = 10; - - if (p[0] == '0') { - if (p[1] == 'x') { - base = 16; - p += 2; - } else { - base = 8; - p++; - } - } - *pp = p; - - return base; -} - -static Int atoi(Char **pp, Int base) -{ - Char *p = *pp; - Int ret = 0; - Int v; - Bool neg = False; - - if (*p == '-') { - neg = True; - p++; - } - - if (base == 0) - base = getbase(&p); - - while(isdigit_base(*p, base, &v)) { - ret *= base; - ret += v; - p++; - } - - *pp = p; - if (neg) - ret = -ret; - return ret; -} - -static UInt atou(Char **pp, Int base) -{ - Char *p = *pp; - UInt ret = 0; - Int v; - - if (base == 0) - base = getbase(&p); - - while(isdigit_base(*p, base, &v)) { - ret *= base; - ret += v; - p++; - } - - *pp = p; - return ret; -} - -/* Skip a ':'-delimited name which may have ::, 'char' or other things in - <> brackets */ -static Char *templ_name(Char *p) -{ - Int brac = 0; - - for(;;) { - if (*p == '<') - brac++; - if (*p == '>') - brac--; - /* skip quoted character (note, it could be anything, even a - literal \0) - - XXX This is a complete botch; we can't do anything sane here, - like support \-quoting, because gcc doesn't seem to generate - it, and even if it did, we wouldn't know what "'\'" means - - the begining of '\'' or a char in itself ('\\')? - */ - if (brac && p[0] == '\'' && p[2] == '\'') - p += 3; - - /* If we're within <>, then treat :: as part of the name (a single - : still terminates) */ - if (*p == ':') { - if (brac && p[1] == ':' && p[-1] != '<') - p++; - else - break; - } - p++; - } - - return p; -} - -/* updates pp to point to after parsed typeref */ -static void parse_typeref(Char **pp, Int *filep, Int *symp) -{ - Char *p = *pp; - Int file, sym; - - file = sym = *filep = *symp = -1; - - if (*p == '(') { - p++; - file = atoi(&p, 10); - if (*p++ != ',') - return; - sym = atoi(&p, 10); - if (*p++ != ')') - return; - } else if (VG_(isdigit)(*p)) { - sym = atoi(&p, 10); - } - - *pp = p; - *filep = file; - *symp = sym; -} - -static void stab_resolve(SymType *st, void *data) -{ - const Bool debug = False || stabs_debug; - Char *str = (Char *)data; - vg_assert(!ML_(st_isresolved)(st)); - - if (debug) - VG_(printf)("stab_resolve: failing to do anything useful with symtype %p=%s\n", - st, str); -} - -/* Top level of recursive descent parser for stab type information. - This only extracts the information needed by vg_symtypes.c, which - is just structure shapes, pointers and arrays. It is still - necessary to parse everything else, because there's no way to skip - it to get to the interesting bits. Also, new types can be - introduced anywhere, so we need to scan it all to pick them up. */ -static SymType *stabtype_parser(SegInfo *si, SymType *def, Char **pp) -{ - const Bool debug = False || stabs_debug; - Char *p = *pp; - Char t; - SymType *type; - StabTypeTab *tab = si->stab_typetab; - -/* make sure *p == 'c' and skip over it */ -#define EXPECT(c, msg) \ - do { \ - if (p == NULL || *p++ != c) { \ - VG_(printf)("\n @@ expected '%c' at %s (remains=\"%s\")\n", c, msg, p); \ - return NULL; \ - } \ - } while(0) - -/* return a pointer to just after the next ch after (and including) ptr */ -#define SKIPPAST(ptr, ch, msg) \ - ({ \ - Char *__zz_charptr = VG_(strchr)((ptr), (ch)); \ - if (__zz_charptr == NULL) { \ - VG_(printf)("\n @@ expected '%c' at %s (ptr=\"%s\")\n", (ch), (msg), (ptr)); \ - return NULL; \ - } \ - __zz_charptr+1; \ - }) - - t = *p++; - - if (0 && debug) - VG_(printf)("stabtype_parser: parsing '%c' remains=\"%s\"\n", t, p); - - switch(t) { - case '(': - case '0' ... '9': { /* reference (and perhaps definition) */ - SymType *symtype; - Int file, sym; - Char *prev; - - p--; - prev = p; - - parse_typeref(&p, &file, &sym); - - { - /* keep stabtype reference local, because the stabtype table - can be rearranged by new insertions, invalidating this - pointer; so copy the bits we need and don't hold onto the - pointer. */ - StabType *stabtype = getStabType(tab, file, sym); - - if (stabtype == NULL) { - VG_(printf)(" @@ bad type ref: %s\n", prev); - return NULL; - } - - if (stabtype->type == NULL) { - stabtype->type = ML_(st_mkunresolved)(def, stab_resolve, NULL); - if (debug) - VG_(printf)("making (%d,%d) %p unresolved\n", file, sym, stabtype->type); - } - - symtype = stabtype->type; - } - - if (*p == '=') { - /* a type definition */ - p++; - - if (ML_(st_isresolved)(symtype)) { - /* a redefinition; clear the old type out */ - StabType *stabtype = getStabType(tab, file, sym); - - symtype = stabtype->type = ML_(st_mkunresolved)(NULL, stab_resolve, NULL); - if (debug) - VG_(printf)("creating new type %p for definition (%d,%d)\n", - symtype, file, sym); - } else - ML_(st_unresolved_setdata)(symtype, stab_resolve, p); - - if (debug) - VG_(printf)("defining type %p (%d,%d) = %s\n", symtype, file, sym, p); - - /* Skip type attributes - '@' could also be pointer-to-member, so we need to see if - the following character looks like a type reference or not. - */ - while(*p == '@' && !(VG_(isdigit)(p[1]) || p[1] == '-' || p[1] == '(') ) - p = SKIPPAST(p+1, ';', "type attrib"); - - prev = p; - - type = stabtype_parser(si, symtype, &p); - if (debug) - VG_(printf)("parsed definition: type=%p symtype=%p\n", type, symtype); - - if (type != symtype) { - StabType *stabtype = getStabType(tab, file, sym); - - vg_assert(stabtype->type != NULL); - if (0) { - /* XXX bogus */ - vg_assert(!ML_(st_isresolved)(stabtype->type)); - VG_(arena_free)(VG_AR_SYMTAB, stabtype->type); /* XXX proper free method? */ - } - stabtype->type = type; - } else if (!ML_(st_isresolved)(type)) { - /* If type is defined in terms of itself, and is - therefore not resolved, it is void */ - if (debug) - VG_(printf)("type %p is defined in terms of self - making void\n", type); - type = ML_(st_mkvoid)(type); - } - } else { - /* just a type reference */ - type = symtype; - if ((0 || debug) && !ML_(st_isresolved)(type)) - VG_(printf)("type %p (%d,%d) is unresolved\n", type, file, sym); - if ((0 || debug) && ML_(st_isresolved)(type)) - VG_(printf)("reference (%d,%d) -> %p\n", file, sym, type); - } - break; - } - - case '-': { /* -ve types for builtins? */ - Int n; - p--; - n = atoi(&p, 0); - switch(n) { - case -1: type = ML_(st_mkint)(def, 4, True); break; - case -2: type = ML_(st_mkint)(def, 1, True); break; - case -3: type = ML_(st_mkint)(def, 2, True); break; - case -4: type = ML_(st_mkint)(def, 4, True); break; - case -5: type = ML_(st_mkint)(def, 1, False); break; - case -6: type = ML_(st_mkint)(def, 1, True); break; - case -7: type = ML_(st_mkint)(def, 2, False); break; - case -8: type = ML_(st_mkint)(def, 4, False); break; - case -9: type = ML_(st_mkint)(def, 4, False); break; - case -10: type = ML_(st_mkint)(def, 4, False); break; - case -11: type = ML_(st_mkvoid)(def); break; - case -12: type = ML_(st_mkfloat)(def, 4); break; - case -13: type = ML_(st_mkfloat)(def, 8); break; - case -15: type = ML_(st_mkint)(def, 4, True); break; - case -16: type = ML_(st_mkbool)(def, 4); break; - case -17: type = ML_(st_mkfloat)(def, 4); break; - case -18: type = ML_(st_mkfloat)(def, 8); break; - case -20: type = ML_(st_mkint)(def, 1, False); break; - case -21: type = ML_(st_mkint)(def, 1, False); break; - case -22: type = ML_(st_mkint)(def, 2, False); break; - case -23: type = ML_(st_mkint)(def, 4, False); break; - case -24: type = ML_(st_mkint)(def, 4, False); break; - case -27: type = ML_(st_mkint)(def, 1, True); break; - case -28: type = ML_(st_mkint)(def, 2, True); break; - case -29: type = ML_(st_mkint)(def, 4, True); break; - case -30: type = ML_(st_mkint)(def, 2, False); break; - case -31: type = ML_(st_mkint)(def, 8, True); break; - case -32: type = ML_(st_mkint)(def, 8, False); break; - case -33: type = ML_(st_mkint)(def, 8, False); break; - case -34: type = ML_(st_mkint)(def, 8, True); break; - - default: - VG_(printf)(" @@ unrecognized negative type %d\n", n); - type = NULL; - break; - } - /* Different versions of gcc seem to disagree about whether a - negative type is followed by a semicolon or not, and the stabs - spec (susch as it is) is not clear either so we will skip a - semicolon if there is one. */ - if (*p == ';') - p++; - break; - } - - case 't': { /* typedef: 't' TYPE */ - SymType *td = stabtype_parser(si, NULL, &p); - type = ML_(st_mktypedef)(def, NULL, td); - break; - } - - case 'R': { /* FP type: 'R' FP-TYPE ';' BYTES ';' (extra) ';' */ - Int fptype, bytes; - - fptype = atoi(&p, 0); - EXPECT(';', "FP-TYPE"); - bytes = atoi(&p, 0); - EXPECT(';', "FP-TYPE bytes"); - - type = ML_(st_mkfloat)(def, bytes); - break; - } - - case 'r': { /* range: 'r' TYPE ';' MIN ';' MAX ';' */ - Int min, max; - SymType *rtype = stabtype_parser(si, NULL, &p); - - EXPECT(';', "range TYPE"); - - /* MIN and MAX are: (INTEGER | 'A' OFFSET | 'T' OFFSET | 'a' REGNO | 't' REGNO | 'J') - only expect INTEGER for now (no way to represent the rest yet, and no need so far) - */ - min = atoi(&p, 0); - EXPECT(';', "range MIN"); - max = atoi(&p, 0); - EXPECT(';', "range MAX"); - - if (debug && 0) - VG_(printf)("range: rtype=%p def=%p min=%d max=%d remains = \"%s\"\n", - rtype, def, min, max, p); - - if (rtype == def) { - if (debug) - VG_(printf)("type %p is subrange of self - making int\n", def); - type = ML_(st_mkint)(def, sizeof(int), False); - } else if (min > max && max == 0) { - if (debug) - VG_(printf)("type %p has backwards range %d - %d: making float\n", - def, min, max); - type = ML_(st_mkfloat)(def, min); - } else - type = ML_(st_mkrange)(def, rtype, min, max); - - vg_assert(ML_(st_isresolved)(type)); - break; - } - - case '&': /* reference */ - case '*': { /* pointer */ - /* ('*' | '&') TYPE */ - type = stabtype_parser(si, NULL, &p); - type = ML_(st_mkpointer)(def, type); - break; - } - - case 'k': /* const */ - case 'B': /* volatile */ - case 'd': { /* file (pascal only) */ - /* ('k' | 'B' | 'd') TYPE */ - type = stabtype_parser(si, NULL, &p); - break; - } - - case 'x': { /* reference to undefined type */ - /* 'x' ('s' | 'u' | 'e') NAME ':' */ - Char kind = *p++; /* get kind */ - Char *name = p; - - p = templ_name(name); - EXPECT(':', "struct/union/enum ref"); - - name = ML_(addStr)(si, name, p-1-name); - - switch (kind) { - case 's': /* struct */ - case 'u': /* union */ - type = structRef(tab, def, kind == 's', name); - break; - - case 'e': /* enum */ - type = ML_(st_mkenum)(def, 0); - break; - - default: - VG_(printf)(" @@ unexpected type ref %c\n", p[-1]); - return NULL; - }; - - break; - } - - case 'S': { /* set/bitstring */ - /* 'S' TYPE */ - SymType *typeinfo; - - typeinfo = stabtype_parser(si, NULL, &p); - - type = ML_(st_mkarray)(def, typeinfo, ML_(st_mkint)(NULL, 1, True)); - break; - } - - case 'P': /* packed array */ - case 'a': { /* array */ - /* ( 'a' | 'P' ) IDX-TYPE TYPE */ - SymType *idxtype; - SymType *artype; - - idxtype = stabtype_parser(si, NULL, &p); - artype = stabtype_parser(si, NULL, &p); - - type = ML_(st_mkarray)(def, idxtype, artype); - - break; - } - - case 'e': { /* enum */ - /* 'e' ( NAME ':' N ',' )* ';' */ - - type = ML_(st_mkenum)(def, 0); - - /* don't really care about tags; just skip them */ - while(*p != ';') { - p = SKIPPAST(p, ':', "enum tag NAME"); - p = SKIPPAST(p, ',', "enum tag N"); - } - p++; /* skip ';' */ - - break; - } - - case 'u': /* union */ - case 's': { /* struct */ - /* Gad. Here we go: - - ( 's' | 'u' ) SIZE - ( '!' NBASE ',' ( VIRT PUB OFF ',' BASE-TYPE ){NBASE} )? - - ( NAME ( ':' ( '/' [0-9] )? TYPE ',' OFFSET ( ',' SIZE )? - | '::' ( METHOD-TYPE ':' MANGLE-ARGS ';' - PROT QUAL ( '.' | '*' VIRT | '?' ) )+ - ) - ';' - )* - - ( '~%' FIRST-BASE-CLASS )? - ';' - */ - UInt size; - Bool method = False; - - size = atou(&p, 0); - type = (t == 's' ? ML_(st_mkstruct) : ML_(st_mkunion))(def, size, 0); - - if (*p == '!') { - /* base classes */ - Int nbase; - - p++; - nbase = atoi(&p, 0); - EXPECT(',', "class base class count"); - while(nbase--) { - p++; /* VIRT flag */ - p++; /* PUB flag */ - atoi(&p, 0); /* offset */ - EXPECT(',', "class base class ref"); - stabtype_parser(si, NULL, &p); - - if (*p == ';') /* who eats this? */ - p++; - } - } - - while(*p != ';') { - Char *start = p; - Char *name; - UInt off, sz; - SymType *fieldty; - - if (VG_(strncmp)(p, "operator<::", 11) == 0 || - VG_(strncmp)(p, "operator>::", 11) == 0 || - VG_(strncmp)(p, "operator<=::", 12) == 0 || - VG_(strncmp)(p, "operator>=::", 12) == 0 || - VG_(strncmp)(p, "operator<<::", 12) == 0 || - VG_(strncmp)(p, "operator>>::", 12) == 0 || - VG_(strncmp)(p, "operator<<=::", 13) == 0 || - VG_(strncmp)(p, "operator>>=::", 13) == 0 || - VG_(strncmp)(p, "operator->::", 12) == 0) { - p = SKIPPAST(p, ':', "member name"); - } else { - p = templ_name(p); - EXPECT(':', "member name"); - } - - if (p[0] == ':') { - /* c++ method names end in :: */ - method = True; - - if (VG_(strncmp)(start, "op$", 3) == 0) { - /* According to stabs.info, operators are named - ( "op$::" OP '.' ), where OP is +=, etc. Current - gcc doesn't seem to use this; operators just - appear as "operator==::" */ - p = SKIPPAST(p, '.', "op$ name"); - } - name = ML_(addStr)(si, start, p-start-1); - p = p+1; - } else { - name = ML_(addStr)(si, start, p-start-1); - } - - if (method) { - /* don't care about methods, but we still have to crunch - through this goo */ - fieldty = NULL; - off = sz = 0; - - do { - stabtype_parser(si, NULL, &p); /* METHOD-TYPE */ - - EXPECT(':', "struct method MANGLE-ARGS"); - p = SKIPPAST(p, ';', "struct method MANGLE-ARGS"); - - p += 1; /* skip PROT */ - if (*p >= 'A' && *p <= 'Z') - p++; /* skip QUAL (if present) */ - - switch(*p++) { - case '*': /* VIRT: VTAB-IDX ';' OVERRIDE-CLASS ';' */ - atoi(&p, 0); /* skip VTAB-IDX */ - EXPECT(';', "struct method vtab idx"); - stabtype_parser(si, NULL, &p); /* skip OVERRIDE-CLASS */ - EXPECT(';', "struct method vtab override"); - break; - - default: - VG_(printf)(" @@ struct method unexpected member-type '%c' \"%s\" remains\n", - p[-1], p); - /* FALLTHROUGH */ - case '?': - case '.': - break; - } - } while (*p != ';'); - } else { - if (*p == '/') { - /* c++ visibility spec: '/' PROT */ - p += 2; - } - - fieldty = stabtype_parser(si, NULL, &p); - - if (*p == ':') { - /* static member; don't care (it will appear later) */ - fieldty = NULL; - off = sz = 0; - - p = SKIPPAST(p, ';', "struct static member"); - p--; /* point at ';' */ - } else { - EXPECT(',', "struct TYPE"); - - /* logic dictates that the offset would always be - positive and that atou would work here but GNAT has - has other ideas - see bug 90128 for more details */ - off = atoi(&p, 0); - - if (*p == ',') { - EXPECT(',', "struct OFFSET"); - - /* as with the offset, it seems that GNAT likes to - generate negative sizes so we use atoi here in - order to allow them - see bug 109385 for details */ - sz = atoi(&p, 0); - } else { - /* sometimes the size is missing and assumed to be a - pointer (in bits) */ - sz = sizeof(void *) * 8; - } - } - } - - if (fieldty != NULL) - ML_(st_addfield)(type, name, fieldty, off, sz); - - EXPECT(';', "struct field end"); - } - p++; /* skip final ';' */ - - /* one final C++ surprise */ - if (*p == '~') { - /* "~%" FIRST-BASE-CLASS ';' */ - p++; - EXPECT('%', "struct first base"); - stabtype_parser(si, NULL, &p); /* skip FIRST-BASE-CLASS */ - EXPECT(';', "struct first base semi"); - } - - break; - } - - case 'f': /* function */ - /* 'f' TYPE */ - type = ML_(st_mkvoid)(def); /* approximate functions as void */ - stabtype_parser(si, NULL, &p); - break; - - case '#': /* method */ - /* '#' ( '#' RET-TYPE | - CLASS-TYPE ',' RET-TYPE ',' ( ARG-TYPE ( ',' ARG-TYPE )* )? ) - ';' - */ - type = ML_(st_mkvoid)(def); /* methods are really void */ - - if (*p == '#') { - p++; /* skip '#' */ - stabtype_parser(si, NULL, &p); /* RET-TYPE */ - } else { - stabtype_parser(si, NULL, &p); /* CLASS-TYPE */ - EXPECT(',', "method CLASS-TYPE"); - - stabtype_parser(si, NULL, &p); /* RET-TYPE */ - EXPECT(',', "method RET-TYPE"); - - while (*p != ';') { - stabtype_parser(si, NULL, &p); - if (*p == ',') - p++; - else if (*p != ';') - VG_(printf)(" @@ method ARG-TYPE list unexpected '%c'\n", *p); - } - } - - EXPECT(';', "method definition"); - break; - - case '@': /* pointer to member */ - /* '@' CLASS-TYPE ',' MEMBER-TYPE */ - type = ML_(st_mkint)(def, sizeof(int), False); /* make it an int for our use */ - - stabtype_parser(si, NULL, &p); /* CLASS-TYPE */ - EXPECT(',', "member-pointer CLASS-TYPE"); - stabtype_parser(si, NULL, &p); /* MEMBER-TYPE */ - break; - - default: - VG_(printf)(" @@ don't know what type '%c' is\n", t); - type = NULL; - break; - } -#undef EXPECT -#undef SKIPPAST - - if (type == NULL) - VG_(printf)(" @@ parsing %s gave NULL type (%s remains)\n", *pp, p); - - *pp = p; - - return type; -} - -/* parse a symbol reference: NAME ':' DESC TYPE */ -static Bool initSym(SegInfo *si, Sym *sym, stab_types kind, Char **namep, Int val) -{ - const Bool debug = False || stabs_debug; - Char *name = *namep; - Char *ty; - Int len; - Bool isTypedef = False; - Bool isStruct = False; - SymType *base; - - if (debug && 0) - VG_(printf)("initSym(si=%p, tab=%p, sym=%p, kind=%d, name=%p \"%s\", val=%d)\n", - si, si->stab_typetab, sym, kind, name, name, val); - - /* First first ':' */ - ty = VG_(strchr)(name, ':'); - - /* Skip '::' */ - while (ty && ty[1] == ':') - ty = VG_(strchr)(ty + 2, ':'); - - if (ty == NULL) { - /* there was no ':' */ - *namep += VG_(strlen)(name); - return True; /* skip */ - } - - len = ty - name; - - if (debug) { - Char buf[len+1]; - VG_(strncpy_safely)(buf, name, len+1); - VG_(printf)("\ninitSym name=\"%s\" type=%s\n", buf, ty+1); - } - - if (*ty != ':') { - /* no type info */ - sym->type = ML_(st_mkvoid)(NULL); - } else { - ty++; /* skip ':' */ - - /* chew through an initial sequence of - type descriptor type describers */ - for(;;) { - switch(*ty) { - case 'a': case 'b': case 'c': case 'C': - case 'd': case 'D': case 'f': case 'F': - case 'G': case 'i': case 'I': case 'J': - case 'L': case 'm': case 'p': case 'P': - case 'Q': case 'R': case 'r': case 'S': - case 's': case 'v': case 'V': case 'x': - case 'X': - break; - - case 'T': /* struct/union/enum */ - isStruct = True; - break; - - case 't': /* typedef handled within stabtype_parser */ - isTypedef = True; - /* FALLTHROUGH */ - case '(': case '-': case '0' ... '9': /* type reference */ - default: - goto out; - } - ty++; - } - - out: - sym->type = stabtype_parser(si, NULL, &ty); - base = ML_(st_basetype)(sym->type, False); - if (isStruct && (ML_(st_isstruct)(base) || ML_(st_isunion)(base))) { - Char *sname = ML_(addStr)(si, name, len); - structDef(si->stab_typetab, base, ML_(st_isstruct)(base), sname); - } - - if (isTypedef) { - Char *tname = ML_(addStr)(si, name, len); - vg_assert(sym->type != base); - if (debug) - VG_(printf)(" typedef %p \"%s\"\n", sym->type, tname); - ML_(st_setname)(sym->type, tname); - ML_(st_setname)(base, tname); - } - } - *namep = ty; - - switch(kind) { - case N_STSYM: - case N_LCSYM: - sym->kind = SyStatic; - sym->u.addr = si->offset + (Addr)val; - break; - - case N_PSYM: - sym->kind = SyEBPrel; /* +ve offset off EBP (erk, or ESP if no frame pointer) */ - sym->u.offset = val; - break; - - case N_LSYM: - if (val < 0) - sym->kind = SyEBPrel; /* -ve off EBP when there's a frame pointer */ - else - sym->kind = SyESPrel; /* +ve off ESP when there's no frame pointer */ - sym->u.offset = val; - break; - - case N_RSYM: - sym->kind = SyReg; - sym->u.regno = val; - break; - - case N_GSYM: - sym->kind = SyGlobal; - sym->u.addr = 0; /* XXX should really look up global address */ - break; - - default: - VG_(core_panic)("bad sym kind"); - } - - if (debug) - VG_(printf)(" %s = type=%p\n", (isStruct || isTypedef) ? "skipping" : "adding", sym->type); - - if (isStruct || isTypedef) { - return True; /* skip */ - } else { - sym->name = ML_(addStr)(si, name, len); - return False; /* don't skip */ - } -} - -/* list of unbound symbols for next scope */ -struct symlist { - Sym sym; - struct symlist *next; -}; - -/* XXX TODO: make sure added syms are unique. A lot of syms added to - the global scope are not. On the other hand, skipping type - definitions helps a lot. */ -static Scope *addSymsToScope(Scope *sc, struct symlist *list, Int nsyms, Scope *outer) -{ - const Bool debug = False || stabs_debug; - Int j; - struct symlist *n; - Int base; - - if (sc == NULL) { - sc = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof(*sc)); - sc->syms = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof(*sc->syms) * nsyms); - sc->nsyms = nsyms; - base = 0; - sc->outer = outer; - if (outer == NULL) - sc->depth = 0; - else - sc->depth = outer->depth+1; - } else { - Sym *s = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof(*s) * (sc->nsyms + nsyms)); - - VG_(memcpy)(s, sc->syms, sc->nsyms * sizeof(*s)); - VG_(arena_free)(VG_AR_SYMTAB, sc->syms); - sc->syms = s; - base = sc->nsyms; - sc->nsyms += nsyms; - } - - /* bind any unbound syms to new scope */ - for(j = 0; j < nsyms; j++, list = n) { - if (debug) - VG_(printf)(" adding (%p) %s to scope %p depth %d\n", - list->sym.name, list->sym.name, sc, sc->depth); - n = list->next; - sc->syms[base+j] = list->sym; - VG_(arena_free)(VG_AR_SYMTAB, list); - } - vg_assert(list == NULL); - - return sc; -} - -/* Read stabs-format debug info. This is all rather horrible because - stabs is a underspecified, kludgy hack. -*/ -void ML_(read_debuginfo_stabs) ( SegInfo* si, - UChar* stabC, Int stab_sz, - UChar* stabstr, Int stabstr_sz ) -{ - const Bool debug = False || stabs_debug; - Int i; - Int n_stab_entries; - struct nlist* stab = (struct nlist*)stabC; - UChar *next_stabstr = NULL; - /* state for various things */ - struct { - Addr start; /* start address */ - Addr end; /* end address */ - Char *name; /* name */ - Char *filename; /* source file name */ - Int line; /* first line */ - } func = { 0, 0, NULL, NULL, -1 }; - struct { - Char *name; - Bool same; - } file = { NULL, True }; - struct { - Int prev; /* prev line */ - Int no; /* current line */ - Int ovf; /* line wrap */ - Addr addr; /* start of this line */ - Bool first; /* first line in function */ - Bool jump; /* was a jump from prev line (inline?) */ - } line = { 0, 0, 0, 0, False }; - struct { - Scope *scope; /* current scope */ - struct symlist *symlist; /* unbound symbols */ - Int nsyms; /* number of unbound scopes */ - Addr addr; /* start of range */ - Int depth; - } scope = { NULL, NULL, 0, 0 }; - Scope *global; - Int fileidx = 0; - StabTypeTab *tab; - - if (si->stab_typetab == NULL) { - si->stab_typetab = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof(StabTypeTab)); - VG_(memset)(si->stab_typetab, 0, sizeof(StabTypeTab)); - } - tab = si->stab_typetab; - - /* Ok. It all looks plausible. Go on and read debug data. - stab kinds: 100 N_SO a source file name - 68 N_SLINE a source line number - 36 N_FUN start of a function - - In this loop, we maintain a current file name, updated as - N_SO/N_SOLs appear, and a current function base address, - updated as N_FUNs appear. Based on that, address ranges for - N_SLINEs are calculated, and stuffed into the line info table. - - Finding the instruction address range covered by an N_SLINE is - complicated; see the N_SLINE case below. - */ - file.name = ML_(addStr)(si,"???", -1); - - n_stab_entries = stab_sz/(int)sizeof(struct nlist); - - /* empty initial file-wide scope */ - global = addSymsToScope(NULL, NULL, 0, NULL); - scope.scope = global; - - for (i = 0; i < n_stab_entries; i++) { - const struct nlist *st = &stab[i]; - Char *no_fn_name = "???"; - Char *string; - - if (debug && 1) { - VG_(printf) ( "%2d type=%d othr=%d desc=%d value=0x%x strx=%d %s\n", i, - st->n_type, st->n_other, st->n_desc, - (int)st->n_value, - (int)st->n_un.n_strx, - stabstr + st->n_un.n_strx ); - } - - /* handle continued string stabs */ - { - const Bool contdebug = False || stabs_debug; - Int buflen = 0; - Int idx = 0; - Char *buf = NULL; - Int len; - Bool continuing = False; - UInt stringidx; - - stringidx = st->n_un.n_strx; - string = stabstr + stringidx; - len = VG_(strlen)(string); - - while(string && len > 0 && (continuing || string[len-1] == '\\')) { - /* Gak, we have a continuation. Skip forward through - subsequent stabs to gather all the parts of the - continuation. Increment i, but keep st pointing at - current stab. */ - - continuing = string[len-1] == '\\'; - - /* remove trailing \ */ - while(string[len-1] == '\\' && len > 0) - len--; - - if (contdebug) - VG_(printf)("found extension string: \"%s\" len=%d(%c) idx=%d buflen=%d\n", - string, len, string[len-1], idx, buflen); - - /* XXX this is silly. The si->strtab should have a way of - appending to the last added string... */ - if ((idx + len) >= buflen) { - Char *n; - - if (buflen == 0) - buflen = 16; - while((idx + len) >= buflen) - buflen *= 2; - n = VG_(arena_malloc)(VG_AR_SYMTAB, buflen); - VG_(memcpy)(n, buf, idx); - - if (buf != NULL) - VG_(arena_free)(VG_AR_SYMTAB, buf); - buf = n; - } - - VG_(memcpy)(&buf[idx], string, len); - idx += len; - if (contdebug) { - buf[idx] = '\0'; - VG_(printf)("working buf=\"%s\"\n", buf); - } - - i++; - if (i >= n_stab_entries) - break; - - if (stab[i].n_un.n_strx) { - string = stabstr + stab[i].n_un.n_strx; - len = VG_(strlen)(string); - } else { - string = NULL; - len = 0; - } - } - - if (buf != NULL) { - i--; /* overstepped */ - string = ML_(addStr)(si, buf, idx); - VG_(arena_free)(VG_AR_SYMTAB, buf); - if (contdebug) - VG_(printf)("made composite: \"%s\"\n", string); - } - } - - switch(st->n_type) { - case N_UNDEF: - /* new string table base */ - if (next_stabstr != NULL) { - stabstr_sz -= next_stabstr - stabstr; - stabstr = next_stabstr; - if (stabstr_sz <= 0) { - VG_(printf)(" @@ bad stabstr size %d\n", stabstr_sz); - return; - } - } - next_stabstr = stabstr + st->n_value; - break; - - case N_BINCL: { - fileidx++; - addHeader(tab, stabstr + st->n_un.n_strx, st->n_value, fileidx); - - if (debug) - VG_(printf)("BINCL: pushed %s fileidx=%d\n", - stabstr + st->n_un.n_strx, fileidx); - break; - } - - case N_EINCL: - break; - - case N_EXCL: - ++fileidx; - - addFileAlias(tab, stabstr + st->n_un.n_strx, st->n_value, fileidx); - - if (debug) { - VG_(printf)("reference to excluded include file %s; fileidx=%d\n", - stabstr + st->n_un.n_strx, fileidx); - } - break; - - case N_SOL: /* sub-source (include) file */ - if (line.ovf != 0) - VG_(message)(Vg_UserMsg, - "Warning: file %s is very big (> 65535 lines) " - "Line numbers and annotation for this file might " - "be wrong. Sorry", - file.name); - /* FALLTHROUGH */ - - case N_SO: { /* new source file */ - UChar *nm = string; - UInt len = VG_(strlen)(nm); - Addr addr = func.start + st->n_value; - - if (line.addr != 0) { - /* finish off previous line */ - ML_(addLineInfo)(si, file.name, NULL, line.addr, - addr, line.no + line.ovf * LINENO_OVERFLOW, i); - } - - /* reset line state */ - line.ovf = 0; - line.addr = 0; - line.prev = 0; - line.no = 0; - line.jump = True; - - if (len > 0 && nm[len-1] != '/') { - file.name = ML_(addStr)(si, nm, -1); - if (debug) - VG_(printf)("new source: %s\n", file.name); - if (st->n_type == N_SO) { - fileidx = 0; - clearStabFiles(tab); - } - } else if (len == 0) - file.name = ML_(addStr)(si, "?1\0", -1); - - if (func.start != 0) - line.jump = True; - break; - } - - case N_SLINE: { /* line info */ - Addr addr = func.start + st->n_value; - - if (line.addr != 0) { - /* there was a previous */ - ML_(addLineInfo)(si, file.name, NULL, line.addr, - addr, line.no + line.ovf * LINENO_OVERFLOW, i); - } - - line.addr = addr; - line.prev = line.no; - line.no = (Int)((UShort)st->n_desc); - - if (line.prev > line.no + OVERFLOW_DIFFERENCE && file.same) { - VG_(message)(Vg_DebugMsg, - "Line number overflow detected (%d --> %d) in %s", - line.prev, line.no, file.name); - line.ovf++; - } - file.same = True; - - /* This is pretty horrible. If this is the first line of - the function, then bind any unbound symbols to the arg - scope, since they're probably arguments. */ - if (line.first) { - line.first = False; - - if (scope.nsyms != 0) { - addSymsToScope(scope.scope, scope.symlist, scope.nsyms, NULL); - scope.symlist = NULL; - scope.nsyms = 0; - } - - /* remember first line of function */ - if (func.start != 0) { - func.filename = file.name; - func.line = line.no; - } - } else if (func.start != 0 && (line.no < func.line || func.filename != file.name)) { - /* If we're suddenly in code before the function starts - or in a different file, then it seems like its - probably some inlined code. Should do something - useful with this information. */ - //VG_(printf)("possible inline?\n"); - line.jump = True; - } - break; - } - - case N_FUN: { /* function start/end */ - Addr addr = 0; /* end address for prev line/scope */ - Bool newfunc = False; - - if (scope.nsyms != 0) { - /* clean up any unbound symbols */ - addSymsToScope(scope.scope, scope.symlist, scope.nsyms, NULL); - scope.symlist = NULL; - scope.nsyms = 0; - } - - /* if this the end of the function or we haven't - previously finished the previous function... */ - if (*string == '\0' || func.start != 0) { - /* end of function */ - newfunc = False; - line.first = False; - - /* end line at end of function */ - addr = func.start + st->n_value; - - if (debug) - VG_(printf)("ending func %s at %p\n", func.name, addr); - - /* now between functions */ - func.name = no_fn_name; - func.start = 0; - - if (scope.addr != 0) { - /* finish any previous scope range */ - ML_(addScopeInfo)(si, scope.addr, addr, scope.scope); - } - - /* tidy up arg scope */ - /* XXX LEAK: free scope if it or any of its inner scopes was - never added to a scope range */ - - if (scope.scope->depth == 0) { - VG_(message)(Vg_UserMsg, - "It seems there's more scopes closed than opened...\n"); - break; - } - - scope.scope = scope.scope->outer; - scope.addr = addr; - scope.addr = 0; - } - - if (*string != '\0') { - /* new function */ - newfunc = True; - line.first = True; - - /* line ends at start of next function */ - addr = si->offset + st->n_value; - - func.start = addr; - func.name = string; - - if (debug) - VG_(printf)("\nnew func %s at %p\n", func.name, func.start); - - } - - if (line.addr) { - ML_(addLineInfo)(si, file.name, NULL, line.addr, - addr, line.no + line.ovf * LINENO_OVERFLOW, i); - line.addr = 0; - } - - if (scope.addr) { - /* finish any previous scope range */ - ML_(addScopeInfo)(si, scope.addr, addr, scope.scope); - } - - if (newfunc) { - /* make little wrapper scope for args */ - Scope *sc; - if (scope.addr) { - /* finish any previous scope range */ - ML_(addScopeInfo)(si, scope.addr, addr, scope.scope); - } - - sc = addSymsToScope(NULL, scope.symlist, scope.nsyms, scope.scope); - scope.scope = sc; - scope.nsyms = 0; - scope.symlist = NULL; - scope.addr = addr; - } - break; - } - - case N_LBRAC: { - /* open new scope */ - Scope *sc; - Addr addr = func.start + st->n_value; - - if (scope.addr) { - /* end previous range */ - ML_(addScopeInfo)(si, scope.addr, addr, scope.scope); - } - - scope.addr = addr; - - if (debug) { - static const Char indent[]= - " " - " "; - Int idx; - - idx = sizeof(indent)-1 - (scope.depth * 2); - scope.depth++; - VG_(printf)("%s{\n", &indent[idx >= 0 ? idx : 0]); - } - /* add unbound syms to scope */ - sc = addSymsToScope(NULL, scope.symlist, scope.nsyms, scope.scope); - scope.scope = sc; - scope.nsyms = 0; - scope.symlist = NULL; - - break; - } - - case N_RBRAC: { - /* close scope */ - Addr addr = func.start + st->n_value; - - if (scope.nsyms != 0) { - /* If there's any unbound symbols, tidy them up */ - addSymsToScope(scope.scope, scope.symlist, scope.nsyms, NULL); - scope.symlist = NULL; - scope.nsyms = 0; - } - - vg_assert(scope.addr != 0); - ML_(addScopeInfo)(si, scope.addr, addr, scope.scope); - - /* XXX LEAK: free scope if it or any of its inner scopes was - never added to a scope range */ - - if (scope.scope->depth == 0) { - /* complain */ - VG_(message)(Vg_UserMsg, "It seems there's more scopes closed than opened...\n"); - break; - } - - scope.scope = scope.scope->outer; - scope.addr = addr; - if (debug) { - static const Char indent[]= - " " - " "; - Int idx; - - scope.depth--; - idx = sizeof(indent)-1 - (scope.depth * 2); - VG_(printf)("%s}\n", &indent[idx >= 0 ? idx : 0]); - } - - break; - } - - case N_GSYM: /* global variable */ - case N_STSYM: /* static in data segment */ - case N_LCSYM: /* static in bss segment */ - case N_PSYM: /* function parameter */ - case N_LSYM: /* stack variable */ - case N_RSYM: { /* register variable */ - Char *cp = string; - Int val = st->n_value; - - /* a single string can have multiple definitions nested in it */ - while(*cp != '\0') { - struct symlist *s = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof(*s)); - - if (initSym(si, &s->sym, st->n_type, &cp, val)) { - VG_(arena_free)(VG_AR_SYMTAB, s); /* not interesting */ - } else { - s->next = scope.symlist; - scope.symlist = s; - scope.nsyms++; - } - switch(*cp) { - case '\0': /* all done */ - break; - - case '0' ... '9': /* symbol */ - case 'A' ... 'Z': - case 'a' ... 'z': - case '_': - break; - - case ' ': case ':': /* nameless type */ - break; - - default: - VG_(printf)(" @@ unlikely looking definition in unparsed remains \"%s\"\n", cp); - break; - } - } - break; - } - } - } - - if (scope.nsyms != 0) - addSymsToScope(scope.scope, scope.symlist, scope.nsyms, NULL); -} - -/*--------------------------------------------------------------------*/ -/*--- end ---*/ -/*--------------------------------------------------------------------*/ diff --git a/coregrind/m_debuginfo/storage.c b/coregrind/m_debuginfo/storage.c new file mode 100644 index 0000000000..066cb920ea --- /dev/null +++ b/coregrind/m_debuginfo/storage.c @@ -0,0 +1,831 @@ + +/*--------------------------------------------------------------------*/ +/*--- Format-neutral storage of and querying of info acquired from ---*/ +/*--- ELF/XCOFF stabs/dwarf1/dwarf2 debug info. ---*/ +/*--- storage.c ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2000-2006 Julian Seward + jseward@acm.org + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +/* This file manages the data structures built by the debuginfo + system. These are: the top level SegInfo list. For each SegInfo, + there are tables for for address-to-symbol mappings, + address-to-src-file/line mappings, and address-to-CFI-info + mappings. +*/ + +#include "pub_core_basics.h" +#include "pub_core_options.h" /* VG_(clo_verbosity) */ +#include "pub_core_libcassert.h" +#include "pub_core_libcbase.h" +#include "pub_core_libcprint.h" +#include "pub_core_mallocfree.h" +#include "priv_storage.h" /* self */ + + +/*------------------------------------------------------------*/ +/*--- Misc (printing, errors) ---*/ +/*------------------------------------------------------------*/ + +/* Show a non-fatal debug info reading error. Use vg_panic if + terminal. */ +void ML_(symerr) ( HChar* msg ) +{ + if (VG_(clo_verbosity) > 1) + VG_(message)(Vg_DebugMsg, "%s", msg); +} + +/* Print a symbol. */ +void ML_(ppSym) ( Int idx, DiSym* sym ) +{ + VG_(printf)( "%5d: %8p .. %8p (%d) %s\n", + idx, + sym->addr, + sym->addr + sym->size - 1, sym->size, + sym->name ); +} + +/* Print a call-frame-info summary. */ +void ML_(ppDiCfSI) ( DiCfSI* si ) +{ +# define SHOW_HOW(_how, _off) \ + do { \ + if (_how == CFIR_UNKNOWN) { \ + VG_(printf)("Unknown"); \ + } else \ + if (_how == CFIR_SAME) { \ + VG_(printf)("Same"); \ + } else \ + if (_how == CFIR_CFAREL) { \ + VG_(printf)("cfa+%d", _off); \ + } else \ + if (_how == CFIR_MEMCFAREL) { \ + VG_(printf)("*(cfa+%d)", _off); \ + } else { \ + VG_(printf)("???"); \ + } \ + } while (0) + + VG_(printf)("[%p .. %p]: ", si->base, + si->base + (UWord)si->len - 1); + VG_(printf)("let cfa=%s+%d", + si->cfa_sprel ? "oldSP" : "oldFP", si->cfa_off); + VG_(printf)(" in RA="); + SHOW_HOW(si->ra_how, si->ra_off); + VG_(printf)(" SP="); + SHOW_HOW(si->sp_how, si->sp_off); + VG_(printf)(" FP="); + SHOW_HOW(si->fp_how, si->fp_off); + VG_(printf)("\n"); +# undef SHOW_HOW +} + + +/*------------------------------------------------------------*/ +/*--- Adding stuff ---*/ +/*------------------------------------------------------------*/ + +/* Add a str to the string table, including terminating zero, and + return pointer to the string in vg_strtab. Unless it's been seen + recently, in which case we find the old pointer and return that. + This avoids the most egregious duplications. + + JSGF: changed from returning an index to a pointer, and changed to + a chunking memory allocator rather than reallocating, so the + pointers are stable. +*/ +UChar* ML_(addStr) ( struct _SegInfo* si, UChar* str, Int len ) +{ + struct strchunk *chunk; + Int space_needed; + UChar* p; + + if (len == -1) + len = VG_(strlen)(str); + + space_needed = 1 + len; + + // Allocate a new strtab chunk if necessary + if (si->strchunks == NULL || + (si->strchunks->strtab_used + + space_needed) > SEGINFO_STRCHUNKSIZE) { + chunk = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof(*chunk)); + chunk->strtab_used = 0; + chunk->next = si->strchunks; + si->strchunks = chunk; + } + chunk = si->strchunks; + + p = &chunk->strtab[chunk->strtab_used]; + VG_(memcpy)(p, str, len); + chunk->strtab[chunk->strtab_used+len] = '\0'; + chunk->strtab_used += space_needed; + + return p; +} + + +/* Add a symbol to the symbol table. +*/ +void ML_(addSym) ( struct _SegInfo* si, DiSym* sym ) +{ + UInt new_sz, i; + DiSym* new_tab; + + /* Ignore zero-sized syms. */ + if (sym->size == 0) return; + + if (si->symtab_used == si->symtab_size) { + new_sz = 2 * si->symtab_size; + if (new_sz == 0) new_sz = 500; + new_tab = VG_(arena_malloc)(VG_AR_SYMTAB, new_sz * sizeof(DiSym) ); + if (si->symtab != NULL) { + for (i = 0; i < si->symtab_used; i++) + new_tab[i] = si->symtab[i]; + VG_(arena_free)(VG_AR_SYMTAB, si->symtab); + } + si->symtab = new_tab; + si->symtab_size = new_sz; + } + + si->symtab[si->symtab_used] = *sym; + si->symtab_used++; + vg_assert(si->symtab_used <= si->symtab_size); +} + + +/* Add a location to the location table. +*/ +static void addLoc ( struct _SegInfo* si, DiLoc* loc ) +{ + UInt new_sz, i; + DiLoc* new_tab; + + /* Zero-sized locs should have been ignored earlier */ + vg_assert(loc->size > 0); + + if (si->loctab_used == si->loctab_size) { + new_sz = 2 * si->loctab_size; + if (new_sz == 0) new_sz = 500; + new_tab = VG_(arena_malloc)(VG_AR_SYMTAB, new_sz * sizeof(DiLoc) ); + if (si->loctab != NULL) { + for (i = 0; i < si->loctab_used; i++) + new_tab[i] = si->loctab[i]; + VG_(arena_free)(VG_AR_SYMTAB, si->loctab); + } + si->loctab = new_tab; + si->loctab_size = new_sz; + } + + si->loctab[si->loctab_used] = *loc; + si->loctab_used++; + vg_assert(si->loctab_used <= si->loctab_size); +} + + +/* Top-level place to call to add a source-location mapping entry. +*/ +void ML_(addLineInfo) ( struct _SegInfo* si, + UChar* filename, + UChar* dirname, /* NULL == directory is unknown */ + Addr this, + Addr next, + Int lineno, + Int entry /* only needed for debug printing */ + ) +{ + static const Bool debug = False; + DiLoc loc; + Int size = next - this; + + /* Ignore zero-sized locs */ + if (this == next) return; + + if (debug) + VG_(printf)( " src %s %s line %d %p-%p\n", + dirname ? dirname : (UChar*)"(unknown)", + filename, lineno, this, next ); + + /* Maximum sanity checking. Some versions of GNU as do a shabby + * job with stabs entries; if anything looks suspicious, revert to + * a size of 1. This should catch the instruction of interest + * (since if using asm-level debug info, one instruction will + * correspond to one line, unlike with C-level debug info where + * multiple instructions can map to the one line), but avoid + * catching any other instructions bogusly. */ + if (this > next) { + if (VG_(clo_verbosity) > 2) { + VG_(message)(Vg_DebugMsg, + "warning: line info addresses out of order " + "at entry %d: 0x%x 0x%x", entry, this, next); + } + size = 1; + } + + if (size > MAX_LOC_SIZE) { + if (0) + VG_(message)(Vg_DebugMsg, + "warning: line info address range too large " + "at entry %d: %d", entry, size); + size = 1; + } + + /* vg_assert(this < si->start + si->size && next-1 >= si->start); */ + if (this >= si->start + si->size || next-1 < si->start) { + if (0) + VG_(message)(Vg_DebugMsg, + "warning: ignoring line info entry falling " + "outside current SegInfo: %p %p %p %p", + si->start, si->start + si->size, + this, next-1); + return; + } + + vg_assert(lineno >= 0); + if (lineno > MAX_LINENO) { + static Bool complained = False; + if (!complained) { + complained = True; + VG_(message)(Vg_UserMsg, + "warning: ignoring line info entry with " + "huge line number (%d)", lineno); + VG_(message)(Vg_UserMsg, + " Can't handle line numbers " + "greater than %d, sorry", MAX_LINENO); + VG_(message)(Vg_UserMsg, + "(Nb: this message is only shown once)"); + } + return; + } + + loc.addr = this; + loc.size = (UShort)size; + loc.lineno = lineno; + loc.filename = filename; + loc.dirname = dirname; + + if (0) VG_(message)(Vg_DebugMsg, + "addLoc: addr %p, size %d, line %d, file %s", + this,size,lineno,filename); + + addLoc ( si, &loc ); +} + + +/* Top-level place to call to add a CFI summary record. The supplied + DiCfSI is copied. */ +void ML_(addDiCfSI) ( struct _SegInfo* si, DiCfSI* cfsi ) +{ + static const Bool debug = False; + UInt new_sz, i; + DiCfSI* new_tab; + + if (debug) { + VG_(printf)("adding DiCfSI: "); + ML_(ppDiCfSI)(cfsi); + } + + vg_assert(cfsi->len > 0 && cfsi->len < 2000000); + + /* Rule out ones which are completely outside the segment. These + probably indicate some kind of bug, but for the meantime ignore + them. */ + if ( cfsi->base + cfsi->len - 1 < si->start + || si->start + si->size - 1 < cfsi->base ) { + static Int complaints = 3; + if (VG_(clo_trace_cfi) || complaints > 0) { + complaints--; + if (VG_(clo_verbosity) > 1) { + VG_(message)( + Vg_DebugMsg, + "warning: DiCfSI %p .. %p outside segment %p .. %p", + cfsi->base, + cfsi->base + cfsi->len - 1, + si->start, + si->start + si->size - 1 + ); + } + if (VG_(clo_trace_cfi)) + ML_(ppDiCfSI)(cfsi); + } + return; + } + + if (si->cfsi_used == si->cfsi_size) { + new_sz = 2 * si->cfsi_size; + if (new_sz == 0) new_sz = 20; + new_tab = VG_(arena_malloc)(VG_AR_SYMTAB, new_sz * sizeof(DiCfSI) ); + if (si->cfsi != NULL) { + for (i = 0; i < si->cfsi_used; i++) + new_tab[i] = si->cfsi[i]; + VG_(arena_free)(VG_AR_SYMTAB, si->cfsi); + } + si->cfsi = new_tab; + si->cfsi_size = new_sz; + } + + si->cfsi[si->cfsi_used] = *cfsi; + si->cfsi_used++; + vg_assert(si->cfsi_used <= si->cfsi_size); +} + + +/*------------------------------------------------------------*/ +/*--- Canonicalisers ---*/ +/*------------------------------------------------------------*/ + +/* Sort the symtab by starting address, and emit warnings if any + symbols have overlapping address ranges. We use that old chestnut, + shellsort. Mash the table around so as to establish the property + that addresses are in order and the ranges to not overlap. This + facilitates using binary search to map addresses to symbols when we + come to query the table. +*/ +static Int compare_DiSym ( void* va, void* vb ) +{ + DiSym* a = (DiSym*)va; + DiSym* b = (DiSym*)vb; + if (a->addr < b->addr) return -1; + if (a->addr > b->addr) return 1; + return 0; +} + + +/* Two symbols have the same address. Which name do we prefer? + + The general rule is to prefer the shorter symbol name. If the + symbol contains a '@', which means its versioned, then the length + up to the '@' is used for length comparison purposes (so + "foo@GLIBC_2.4.2" is considered shorter than "foobar"), but if two + symbols have the same length, the one with the version string is + preferred. If all else fails, use alphabetical ordering. + + Very occasionally this goes wrong (eg. 'memcmp' and 'bcmp' are + aliases in glibc, we choose the 'bcmp' symbol because it's shorter, + so we can misdescribe memcmp() as bcmp()). This is hard to avoid. + It's mentioned in the FAQ file. + */ +static DiSym* prefersym ( DiSym* a, DiSym* b ) +{ + Int lena, lenb; /* full length */ + Int vlena, vlenb; /* length without version */ + const UChar *vpa, *vpb; + + vlena = lena = VG_(strlen)(a->name); + vlenb = lenb = VG_(strlen)(b->name); + + vpa = VG_(strchr)(a->name, '@'); + vpb = VG_(strchr)(b->name, '@'); + + if (vpa) + vlena = vpa - a->name; + if (vpb) + vlenb = vpb - b->name; + + TRACE_SYMTAB("choosing between '%s' and '%s'\n", a->name, b->name); + + /* MPI hack: prefer PMPI_Foo over MPI_Foo */ + if (0==VG_(strncmp)(a->name, "MPI_", 4) + && 0==VG_(strncmp)(b->name, "PMPI_", 5) + && 0==VG_(strcmp)(a->name, 1+b->name)) + return b; + else + if (0==VG_(strncmp)(b->name, "MPI_", 4) + && 0==VG_(strncmp)(a->name, "PMPI_", 5) + && 0==VG_(strcmp)(b->name, 1+a->name)) + return a; + + /* Select the shortest unversioned name */ + if (vlena < vlenb) + return a; + else if (vlenb < vlena) + return b; + + /* Equal lengths; select the versioned name */ + if (vpa && !vpb) + return a; + if (vpb && !vpa) + return b; + + /* Either both versioned or neither is versioned; select them + alphabetically */ + if (VG_(strcmp)(a->name, b->name) < 0) + return a; + else + return b; +} + +static void canonicaliseSymtab ( struct _SegInfo* si ) +{ + Int i, j, n_merged, n_truncated; + Addr s1, s2, e1, e2; + +# define SWAP(ty,aa,bb) \ + do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0) + + if (si->symtab_used == 0) + return; + + VG_(ssort)(si->symtab, si->symtab_used, + sizeof(*si->symtab), compare_DiSym); + + cleanup_more: + + /* If two symbols have identical address ranges, we pick one + using prefersym() (see it for details). */ + do { + n_merged = 0; + j = si->symtab_used; + si->symtab_used = 0; + for (i = 0; i < j; i++) { + if (i < j-1 + && si->symtab[i].addr == si->symtab[i+1].addr + && si->symtab[i].size == si->symtab[i+1].size) { + n_merged++; + /* merge the two into one */ + si->symtab[si->symtab_used++] + = *prefersym(&si->symtab[i], &si->symtab[i+1]); + i++; + } else { + si->symtab[si->symtab_used++] = si->symtab[i]; + } + } + TRACE_SYMTAB( "%d merged\n", n_merged); + } + while (n_merged > 0); + + /* Detect and "fix" overlapping address ranges. */ + n_truncated = 0; + + for (i = 0; i < ((Int)si->symtab_used) -1; i++) { + + vg_assert(si->symtab[i].addr <= si->symtab[i+1].addr); + + /* Check for common (no overlap) case. */ + if (si->symtab[i].addr + si->symtab[i].size + <= si->symtab[i+1].addr) + continue; + + /* There's an overlap. Truncate one or the other. */ + if (VG_(clo_trace_symtab)) { + VG_(printf)("overlapping address ranges in symbol table\n\t"); + ML_(ppSym)( i, &si->symtab[i] ); + VG_(printf)("\t"); + ML_(ppSym)( i+1, &si->symtab[i+1] ); + VG_(printf)("\n"); + } + + /* Truncate one or the other. */ + s1 = si->symtab[i].addr; + s2 = si->symtab[i+1].addr; + e1 = s1 + si->symtab[i].size - 1; + e2 = s2 + si->symtab[i+1].size - 1; + if (s1 < s2) { + e1 = s2-1; + } else { + vg_assert(s1 == s2); + if (e1 > e2) { + s1 = e2+1; SWAP(Addr,s1,s2); SWAP(Addr,e1,e2); + } else + if (e1 < e2) { + s2 = e1+1; + } else { + /* e1 == e2. Identical addr ranges. We'll eventually wind + up back at cleanup_more, which will take care of it. */ + } + } + si->symtab[i].addr = s1; + si->symtab[i+1].addr = s2; + si->symtab[i].size = e1 - s1 + 1; + si->symtab[i+1].size = e2 - s2 + 1; + vg_assert(s1 <= s2); + vg_assert(si->symtab[i].size > 0); + vg_assert(si->symtab[i+1].size > 0); + /* It may be that the i+1 entry now needs to be moved further + along to maintain the address order requirement. */ + j = i+1; + while (j < ((Int)si->symtab_used)-1 + && si->symtab[j].addr > si->symtab[j+1].addr) { + SWAP(DiSym,si->symtab[j],si->symtab[j+1]); + j++; + } + n_truncated++; + } + + if (n_truncated > 0) goto cleanup_more; + + /* Ensure relevant postconditions hold. */ + for (i = 0; i < ((Int)si->symtab_used)-1; i++) { + /* No zero-sized symbols. */ + vg_assert(si->symtab[i].size > 0); + /* In order. */ + vg_assert(si->symtab[i].addr < si->symtab[i+1].addr); + /* No overlaps. */ + vg_assert(si->symtab[i].addr + si->symtab[i].size - 1 + < si->symtab[i+1].addr); + } +# undef SWAP +} + + +/* Sort the location table by starting address. Mash the table around + so as to establish the property that addresses are in order and the + ranges do not overlap. This facilitates using binary search to map + addresses to locations when we come to query the table. +*/ +static Int compare_RiLoc ( void* va, void* vb ) +{ + DiLoc* a = (DiLoc*)va; + DiLoc* b = (DiLoc*)vb; + if (a->addr < b->addr) return -1; + if (a->addr > b->addr) return 1; + return 0; +} + +static void canonicaliseLoctab ( struct _SegInfo* si ) +{ + Int i, j; + +# define SWAP(ty,aa,bb) \ + do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0); + + if (si->loctab_used == 0) + return; + + /* Sort by start address. */ + VG_(ssort)(si->loctab, si->loctab_used, + sizeof(*si->loctab), compare_RiLoc); + + /* If two adjacent entries overlap, truncate the first. */ + for (i = 0; i < ((Int)si->loctab_used)-1; i++) { + vg_assert(si->loctab[i].size < 10000); + if (si->loctab[i].addr + si->loctab[i].size > si->loctab[i+1].addr) { + /* Do this in signed int32 because the actual .size fields + are only 12 bits. */ + Int new_size = si->loctab[i+1].addr - si->loctab[i].addr; + if (new_size < 0) { + si->loctab[i].size = 0; + } else + if (new_size > MAX_LOC_SIZE) { + si->loctab[i].size = MAX_LOC_SIZE; + } else { + si->loctab[i].size = (UShort)new_size; + } + } + } + + /* Zap any zero-sized entries resulting from the truncation + process. */ + j = 0; + for (i = 0; i < (Int)si->loctab_used; i++) { + if (si->loctab[i].size > 0) { + if (j != i) + si->loctab[j] = si->loctab[i]; + j++; + } + } + si->loctab_used = j; + + /* Ensure relevant postconditions hold. */ + for (i = 0; i < ((Int)si->loctab_used)-1; i++) { + /* + VG_(printf)("%d (%d) %d 0x%x\n", + i, si->loctab[i+1].confident, + si->loctab[i+1].size, si->loctab[i+1].addr ); + */ + /* No zero-sized symbols. */ + vg_assert(si->loctab[i].size > 0); + /* In order. */ + vg_assert(si->loctab[i].addr < si->loctab[i+1].addr); + /* No overlaps. */ + vg_assert(si->loctab[i].addr + si->loctab[i].size - 1 + < si->loctab[i+1].addr); + } +# undef SWAP +} + + +/* Sort the call-frame-info table by starting address. Mash the table + around so as to establish the property that addresses are in order + and the ranges do not overlap. This facilitates using binary + search to map addresses to locations when we come to query the + table. + + Also, set cfisi_minaddr and cfisi_maxaddr to be the min and max of + any of the address ranges contained in cfisi[0 .. cfisi_used-1], so + as to facilitate rapidly skipping this SegInfo when looking for an + address which falls outside that range. +*/ +static Int compare_DiCfSI ( void* va, void* vb ) +{ + DiCfSI* a = (DiCfSI*)va; + DiCfSI* b = (DiCfSI*)vb; + if (a->base < b->base) return -1; + if (a->base > b->base) return 1; + return 0; +} + +static void canonicaliseCFI ( struct _SegInfo* si ) +{ + Int i, j; + const Addr minAddr = 0; + const Addr maxAddr = ~minAddr; + + /* Note: take care in here. si->cfsi can be NULL, in which + case _used and _size fields will be zero. */ + if (si->cfsi == NULL) { + vg_assert(si->cfsi_used == 0); + vg_assert(si->cfsi_size == 0); + } + + /* Set cfsi_minaddr and cfsi_maxaddr to summarise the entire + address range contained in cfsi[0 .. cfsi_used-1]. */ + si->cfsi_minaddr = maxAddr; + si->cfsi_maxaddr = minAddr; + for (i = 0; i < (Int)si->cfsi_used; i++) { + Addr here_min = si->cfsi[i].base; + Addr here_max = si->cfsi[i].base + si->cfsi[i].len - 1; + if (here_min < si->cfsi_minaddr) + si->cfsi_minaddr = here_min; + if (here_max > si->cfsi_maxaddr) + si->cfsi_maxaddr = here_max; + } + + if (VG_(clo_trace_cfi)) + VG_(printf)("canonicaliseCfiSI: %d entries, %p .. %p\n", + si->cfsi_used, + si->cfsi_minaddr, si->cfsi_maxaddr); + + /* Sort the cfsi array by base address. */ + VG_(ssort)(si->cfsi, si->cfsi_used, sizeof(*si->cfsi), compare_DiCfSI); + + /* If two adjacent entries overlap, truncate the first. */ + for (i = 0; i < (Int)si->cfsi_used-1; i++) { + if (si->cfsi[i].base + si->cfsi[i].len > si->cfsi[i+1].base) { + Int new_len = si->cfsi[i+1].base - si->cfsi[i].base; + /* how could it be otherwise? The entries are sorted by the + .base field. */ + vg_assert(new_len >= 0); + vg_assert(new_len <= si->cfsi[i].len); + si->cfsi[i].len = new_len; + } + } + + /* Zap any zero-sized entries resulting from the truncation + process. */ + j = 0; + for (i = 0; i < (Int)si->cfsi_used; i++) { + if (si->cfsi[i].len > 0) { + if (j != i) + si->cfsi[j] = si->cfsi[i]; + j++; + } + } + /* VG_(printf)("XXXXXXXXXXXXX %d %d\n", si->cfsi_used, j); */ + si->cfsi_used = j; + + /* Ensure relevant postconditions hold. */ + for (i = 0; i < (Int)si->cfsi_used; i++) { + /* No zero-length ranges. */ + vg_assert(si->cfsi[i].len > 0); + /* Makes sense w.r.t. summary address range */ + vg_assert(si->cfsi[i].base >= si->cfsi_minaddr); + vg_assert(si->cfsi[i].base + si->cfsi[i].len - 1 + <= si->cfsi_maxaddr); + + if (i < si->cfsi_used - 1) { + /* + if (!(si->cfsi[i].base < si->cfsi[i+1].base)) { + VG_(printf)("\nOOO cfsis:\n"); + ML_(ppCfiSI)(&si->cfsi[i]); + ML_(ppCfiSI)(&si->cfsi[i+1]); + } + */ + /* In order. */ + vg_assert(si->cfsi[i].base < si->cfsi[i+1].base); + /* No overlaps. */ + vg_assert(si->cfsi[i].base + si->cfsi[i].len - 1 + < si->cfsi[i+1].base); + } + } + +} + + +/* Canonicalise the tables held by 'si', in preparation for use. Call + this after finishing adding entries to these tables. */ +void ML_(canonicaliseTables) ( struct _SegInfo* si ) +{ + canonicaliseSymtab ( si ); + canonicaliseLoctab ( si ); + canonicaliseCFI ( si ); +} + + +/*------------------------------------------------------------*/ +/*--- Searching the tables ---*/ +/*------------------------------------------------------------*/ + +/* Find a symbol-table index containing the specified pointer, or -1 + if not found. Binary search. */ + +Int ML_(search_one_symtab) ( struct _SegInfo* si, Addr ptr, + Bool match_anywhere_in_fun ) +{ + Addr a_mid_lo, a_mid_hi; + Int mid, size, + lo = 0, + hi = si->symtab_used-1; + while (True) { + /* current unsearched space is from lo to hi, inclusive. */ + if (lo > hi) return -1; /* not found */ + mid = (lo + hi) / 2; + a_mid_lo = si->symtab[mid].addr; + size = ( match_anywhere_in_fun + ? si->symtab[mid].size + : 1); + a_mid_hi = ((Addr)si->symtab[mid].addr) + size - 1; + + if (ptr < a_mid_lo) { hi = mid-1; continue; } + if (ptr > a_mid_hi) { lo = mid+1; continue; } + vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi); + return mid; + } +} + + +/* Find a location-table index containing the specified pointer, or -1 + if not found. Binary search. */ + +Int ML_(search_one_loctab) ( struct _SegInfo* si, Addr ptr ) +{ + Addr a_mid_lo, a_mid_hi; + Int mid, + lo = 0, + hi = si->loctab_used-1; + while (True) { + /* current unsearched space is from lo to hi, inclusive. */ + if (lo > hi) return -1; /* not found */ + mid = (lo + hi) / 2; + a_mid_lo = si->loctab[mid].addr; + a_mid_hi = ((Addr)si->loctab[mid].addr) + si->loctab[mid].size - 1; + + if (ptr < a_mid_lo) { hi = mid-1; continue; } + if (ptr > a_mid_hi) { lo = mid+1; continue; } + vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi); + return mid; + } +} + + +/* Find a CFI-table index containing the specified pointer, or -1 + if not found. Binary search. */ + +Int ML_(search_one_cfitab) ( struct _SegInfo* si, Addr ptr ) +{ + Addr a_mid_lo, a_mid_hi; + Int mid, size, + lo = 0, + hi = si->cfsi_used-1; + while (True) { + /* current unsearched space is from lo to hi, inclusive. */ + if (lo > hi) return -1; /* not found */ + mid = (lo + hi) / 2; + a_mid_lo = si->cfsi[mid].base; + size = si->cfsi[mid].len; + a_mid_hi = a_mid_lo + size - 1; + vg_assert(a_mid_hi >= a_mid_lo); + if (ptr < a_mid_lo) { hi = mid-1; continue; } + if (ptr > a_mid_hi) { lo = mid+1; continue; } + vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi); + return mid; + } +} + + +/*--------------------------------------------------------------------*/ +/*--- end ---*/ +/*--------------------------------------------------------------------*/ diff --git a/coregrind/m_debuginfo/symtab.c b/coregrind/m_debuginfo/symtab.c deleted file mode 100644 index d1895ee11a..0000000000 --- a/coregrind/m_debuginfo/symtab.c +++ /dev/null @@ -1,3246 +0,0 @@ - -/*--------------------------------------------------------------------*/ -/*--- Management of symbols and debugging information. ---*/ -/*--- symtab.c ---*/ -/*--------------------------------------------------------------------*/ - -/* - This file is part of Valgrind, a dynamic binary instrumentation - framework. - - Copyright (C) 2000-2005 Julian Seward - jseward@acm.org - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file COPYING. -*/ - -/* - Stabs reader greatly improved by Nick Nethercote, Apr 02. -*/ - - -#include "pub_core_basics.h" -#include "pub_core_threadstate.h" -#include "pub_core_debuginfo.h" -#include "pub_core_demangle.h" -#include "pub_core_libcbase.h" -#include "pub_core_libcassert.h" -#include "pub_core_libcfile.h" -#include "pub_core_libcprint.h" -#include "pub_core_machine.h" -#include "pub_core_mallocfree.h" -#include "pub_core_options.h" -#include "pub_core_redir.h" // VG_(redir_notify_{new,delete}_SegInfo) -#include "pub_core_tooliface.h" // VG_(needs).data_syms -#include "pub_core_oset.h" // for ppc64-linux elf symbol reading - -#include "pub_core_aspacemgr.h" - -#include "priv_symtypes.h" -#include "priv_symtab.h" - -#include /* ELF defns */ - -/* The root structure for the entire symbol table system. It is a - linked list of SegInfos. Note that this entire mechanism assumes - that what we read from /proc/self/maps doesn't contain overlapping - address ranges, and as a result the SegInfos in this list describe - disjoint address ranges. -*/ -static SegInfo* segInfo_list = NULL; - -/*------------------------------------------------------------*/ -/*--- 32/64-bit parameterisation ---*/ -/*------------------------------------------------------------*/ - -/* For all the ELF macros and types which specify '32' or '64', - select the correct variant for this platform and give it - an 'XX' name. Then use the 'XX' variant consistently in - the rest of this file. -*/ -#if VG_WORDSIZE == 4 -# define ElfXX_Ehdr Elf32_Ehdr -# define ElfXX_Shdr Elf32_Shdr -# define ElfXX_Phdr Elf32_Phdr -# define ElfXX_Sym Elf32_Sym -# define ElfXX_Word Elf32_Word -# define ElfXX_Addr Elf32_Addr -# define ElfXX_Dyn Elf32_Dyn -# define ELFXX_ST_BIND ELF32_ST_BIND -# define ELFXX_ST_TYPE ELF32_ST_TYPE - -#elif VG_WORDSIZE == 8 -# define ElfXX_Ehdr Elf64_Ehdr -# define ElfXX_Shdr Elf64_Shdr -# define ElfXX_Phdr Elf64_Phdr -# define ElfXX_Sym Elf64_Sym -# define ElfXX_Word Elf64_Word -# define ElfXX_Addr Elf64_Addr -# define ElfXX_Dyn Elf64_Dyn -# define ELFXX_ST_BIND ELF64_ST_BIND -# define ELFXX_ST_TYPE ELF64_ST_TYPE - -#else -# error "VG_WORDSIZE should be 4 or 8" -#endif - - -/*------------------------------------------------------------*/ -/*--- Forwards decls ---*/ -/*------------------------------------------------------------*/ - -static Bool is_elf_object_file ( const void *buf ); -static void unload_symbols ( Addr start, SizeT length ); - - -/*------------------------------------------------------------*/ -/*--- TOP LEVEL ---*/ -/*------------------------------------------------------------*/ - -/* If this mapping is at the beginning of a file, isn't part of - Valgrind, is at least readable and seems to contain an object - file, then try reading symbols from it. - - Getting this heuristic right is critical. On x86-linux, - objects are typically mapped twice: - - 1b8fb000-1b8ff000 r-xp 00000000 08:02 4471477 vgpreload_memcheck.so - 1b8ff000-1b900000 rw-p 00004000 08:02 4471477 vgpreload_memcheck.so - - whereas ppc32-linux mysteriously does this: - - 118a6000-118ad000 r-xp 00000000 08:05 14209428 vgpreload_memcheck.so - 118ad000-118b6000 ---p 00007000 08:05 14209428 vgpreload_memcheck.so - 118b6000-118bd000 rwxp 00000000 08:05 14209428 vgpreload_memcheck.so - - The third mapping should not be considered to have executable code in. - Therefore a test which works for both is: r and x and NOT w. Reading - symbols from the rwx segment -- which overlaps the r-x segment in the - file -- causes the redirection mechanism to redirect to addresses in - that third segment, which is wrong and causes crashes. - - ------ - JRS 28 Dec 05: unfortunately icc 8.1 on x86 has been seen to - produce executables with a single rwx segment rather than a - (r-x,rw-) pair. That means the rules have to be modified thusly: - - x86-linux: consider if r and x - all others: consider if r and x and NOT w - -*/ - -static void nuke_syms_in_range ( Addr start, SizeT length ) -{ - /* Repeatedly scan the segInfo list, looking for segInfos in this - range, and call unload_symbols on the segInfo's stated start - point. This modifies the list, hence the multiple - iterations. */ - Bool found; - SegInfo* curr; - - while (True) { - found = False; - - curr = segInfo_list; - while (True) { - if (curr == NULL) break; - if (start+length-1 < curr->start - || curr->start+curr->size-1 < start) { - /* no overlap */ - } else { - found = True; - break; - } - curr = curr->next; - } - - if (!found) break; - unload_symbols( curr->start, curr->size ); - } -} - -/* Notify the debuginfo system about a new mapping. This is the way - new debug information gets loaded. If allow_SkFileV is True, it - will try load debug info if the mapping at 'a' belongs to Valgrind; - whereas normally (False) it will not do that. This allows us to - carefully control when the thing will read symbols from the - Valgrind executable itself. */ - -void VG_(di_notify_mmap)( Addr a, Bool allow_SkFileV ) -{ - NSegment* seg; - HChar* filename; - Bool ok; - - /* See comment at start of section for explanation of this do/don't - logic. */ -# if defined(VGP_x86_linux) - Bool require_no_W = False; -# else - Bool require_no_W = True; -# endif - - seg = VG_(am_find_nsegment)(a); - vg_assert(seg); - - filename = VG_(am_get_filename)( seg ); - if (!filename) - return; - - filename = VG_(arena_strdup)( VG_AR_SYMTAB, filename ); - - ok = (seg->kind == SkFileC || (seg->kind == SkFileV && allow_SkFileV)) - && seg->offset == 0 - && seg->fnIdx != -1 - && seg->hasR - && seg->hasX - && (require_no_W ? (!seg->hasW) : True) - && is_elf_object_file( (const void*)seg->start ); - - if (!ok) { - VG_(arena_free)(VG_AR_SYMTAB, filename); - return; - } - - nuke_syms_in_range( seg->start, seg->end + 1 - seg->start ); - VG_(read_seg_symbols)( seg->start, seg->end + 1 - seg->start, - seg->offset, filename ); - - /* VG_(read_seg_symbols) makes its own copy of filename, so is safe - to free it. */ - VG_(arena_free)(VG_AR_SYMTAB, filename); -} - -void VG_(di_notify_munmap)( Addr a, SizeT len ) -{ - nuke_syms_in_range(a, len); -} - -void VG_(di_notify_mprotect)( Addr a, SizeT len, UInt prot ) -{ - Bool exe_ok = toBool(prot & VKI_PROT_EXEC); -# if defined(VGP_x86_linux) - exe_ok = exe_ok || toBool(prot & VKI_PROT_READ); -# endif - if (0 && !exe_ok) - nuke_syms_in_range(a, len); -} - - -/*------------------------------------------------------------*/ -/*--- Adding stuff ---*/ -/*------------------------------------------------------------*/ - -/* Add a str to the string table, including terminating zero, and - return pointer to the string in vg_strtab. Unless it's been seen - recently, in which case we find the old pointer and return that. - This avoids the most egregious duplications. - - JSGF: changed from returning an index to a pointer, and changed to - a chunking memory allocator rather than reallocating, so the - pointers are stable. -*/ - -Char* ML_(addStr) ( SegInfo* si, Char* str, Int len ) -{ - struct strchunk *chunk; - Int space_needed; - Char* p; - - if (len == -1) - len = VG_(strlen)(str); - - space_needed = 1 + len; - - // Allocate a new strtab chunk if necessary - if (si->strchunks == NULL || - (si->strchunks->strtab_used + space_needed) > STRCHUNKSIZE) { - chunk = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof(*chunk)); - chunk->strtab_used = 0; - chunk->next = si->strchunks; - si->strchunks = chunk; - } - chunk = si->strchunks; - - p = &chunk->strtab[chunk->strtab_used]; - VG_(memcpy)(p, str, len); - chunk->strtab[chunk->strtab_used+len] = '\0'; - chunk->strtab_used += space_needed; - - return p; -} - -/* Add a symbol to the symbol table. */ -static void addSym ( SegInfo* si, RiSym* sym ) -{ - UInt new_sz, i; - RiSym* new_tab; - - /* Ignore zero-sized syms. */ - if (sym->size == 0) return; - - if (si->symtab_used == si->symtab_size) { - new_sz = 2 * si->symtab_size; - if (new_sz == 0) new_sz = 500; - new_tab = VG_(arena_malloc)(VG_AR_SYMTAB, new_sz * sizeof(RiSym) ); - if (si->symtab != NULL) { - for (i = 0; i < si->symtab_used; i++) - new_tab[i] = si->symtab[i]; - VG_(arena_free)(VG_AR_SYMTAB, si->symtab); - } - si->symtab = new_tab; - si->symtab_size = new_sz; - } - - si->symtab[si->symtab_used] = *sym; - si->symtab_used++; - vg_assert(si->symtab_used <= si->symtab_size); -} - -/* Add a location to the location table. */ - -static __inline__ -void addLoc ( SegInfo* si, RiLoc* loc ) -{ - UInt new_sz, i; - RiLoc* new_tab; - - /* Zero-sized locs should have been ignored earlier */ - vg_assert(loc->size > 0); - - if (si->loctab_used == si->loctab_size) { - new_sz = 2 * si->loctab_size; - if (new_sz == 0) new_sz = 500; - new_tab = VG_(arena_malloc)(VG_AR_SYMTAB, new_sz * sizeof(RiLoc) ); - if (si->loctab != NULL) { - for (i = 0; i < si->loctab_used; i++) - new_tab[i] = si->loctab[i]; - VG_(arena_free)(VG_AR_SYMTAB, si->loctab); - } - si->loctab = new_tab; - si->loctab_size = new_sz; - } - - si->loctab[si->loctab_used] = *loc; - si->loctab_used++; - vg_assert(si->loctab_used <= si->loctab_size); -} - - -/* Top-level place to call to add a source-location mapping entry. */ - -void ML_(addLineInfo) ( SegInfo* si, - Char* filename, - Char* dirname, /* NULL == directory is unknown */ - Addr this, - Addr next, - Int lineno, - Int entry /* only needed for debug printing */ - ) -{ - static const Bool debug = False; - RiLoc loc; - Int size = next - this; - - /* Ignore zero-sized locs */ - if (this == next) return; - - if (debug) - VG_(printf)( " src %s %s line %d %p-%p\n", - dirname ? dirname : (Char*)"(unknown)", - filename, lineno, this, next ); - - /* Maximum sanity checking. Some versions of GNU as do a shabby - * job with stabs entries; if anything looks suspicious, revert to - * a size of 1. This should catch the instruction of interest - * (since if using asm-level debug info, one instruction will - * correspond to one line, unlike with C-level debug info where - * multiple instructions can map to the one line), but avoid - * catching any other instructions bogusly. */ - if (this > next) { - if (VG_(clo_verbosity) > 2) { - VG_(message)(Vg_DebugMsg, - "warning: line info addresses out of order " - "at entry %d: 0x%x 0x%x", entry, this, next); - } - size = 1; - } - - if (size > MAX_LOC_SIZE) { - if (0) - VG_(message)(Vg_DebugMsg, - "warning: line info address range too large " - "at entry %d: %d", entry, size); - size = 1; - } - - /* vg_assert(this < si->start + si->size && next-1 >= si->start); */ - if (this >= si->start + si->size || next-1 < si->start) { - if (0) - VG_(message)(Vg_DebugMsg, - "warning: ignoring line info entry falling " - "outside current SegInfo: %p %p %p %p", - si->start, si->start + si->size, - this, next-1); - return; - } - - vg_assert(lineno >= 0); - if (lineno > MAX_LINENO) { - static Bool complained = False; - if (!complained) { - complained = True; - VG_(message)(Vg_UserMsg, - "warning: ignoring line info entry with " - "huge line number (%d)", lineno); - VG_(message)(Vg_UserMsg, - " Can't handle line numbers " - "greater than %d, sorry", MAX_LINENO); - VG_(message)(Vg_UserMsg, - "(Nb: this message is only shown once)"); - } - return; - } - - loc.addr = this; - loc.size = (UShort)size; - loc.lineno = lineno; - loc.filename = filename; - loc.dirname = dirname; - - if (0) VG_(message)(Vg_DebugMsg, - "addLoc: addr %p, size %d, line %d, file %s", - this,size,lineno,filename); - - addLoc ( si, &loc ); -} - -static __inline__ -void addScopeRange ( SegInfo* si, ScopeRange *range ) -{ - Int new_sz, i; - ScopeRange* new_tab; - - /* Zero-sized scopes should have been ignored earlier */ - vg_assert(range->size > 0); - - if (si->scopetab_used == si->scopetab_size) { - new_sz = 2 * si->scopetab_size; - if (new_sz == 0) new_sz = 500; - new_tab = VG_(arena_malloc)(VG_AR_SYMTAB, new_sz * sizeof(*new_tab) ); - if (si->scopetab != NULL) { - for (i = 0; i < si->scopetab_used; i++) - new_tab[i] = si->scopetab[i]; - VG_(arena_free)(VG_AR_SYMTAB, si->scopetab); - } - si->scopetab = new_tab; - si->scopetab_size = new_sz; - } - - si->scopetab[si->scopetab_used] = *range; - si->scopetab_used++; - vg_assert(si->scopetab_used <= si->scopetab_size); -} - - -/* Top-level place to call to add a source-location mapping entry. */ - -void ML_(addScopeInfo) ( SegInfo* si, - Addr this, - Addr next, - Scope *scope) -{ - static const Bool debug = False; - Int size = next - this; - ScopeRange range; - - /* Ignore zero-sized or negative scopes */ - if (size <= 0) { - if (debug) - VG_(printf)("ignoring zero-sized range, scope %p at %p\n", scope, this); - return; - } - - if (debug) - VG_(printf)("adding scope range %p-%p (size=%d) scope %p (%d)\n", - this, next, next-this, scope, scope->depth); - - range.addr = this; - range.size = size; - range.scope = scope; - - addScopeRange ( si, &range ); -} - - -/* Top-level place to call to add a CFI summary record. The supplied - CfiSI is copied. */ -void ML_(addCfiSI) ( SegInfo* si, CfiSI* cfisi ) -{ - static const Bool debug = False; - UInt new_sz, i; - CfiSI* new_tab; - - if (debug) { - VG_(printf)("adding CfiSI: "); - ML_(ppCfiSI)(cfisi); - } - - vg_assert(cfisi->len > 0 && cfisi->len < 2000000); - - /* Rule out ones which are completely outside the segment. These - probably indicate some kind of bug, but for the meantime ignore - them. */ - if ( cfisi->base + cfisi->len - 1 < si->start - || si->start + si->size - 1 < cfisi->base ) { - static Int complaints = 3; - if (VG_(clo_trace_cfi) || complaints > 0) { - complaints--; - if (VG_(clo_verbosity) > 1) { - VG_(message)( - Vg_DebugMsg, - "warning: CfiSI %p .. %p outside segment %p .. %p", - cfisi->base, - cfisi->base + cfisi->len - 1, - si->start, - si->start + si->size - 1 - ); - } - if (VG_(clo_trace_cfi)) - ML_(ppCfiSI)(cfisi); - } - return; - } - - if (si->cfisi_used == si->cfisi_size) { - new_sz = 2 * si->cfisi_size; - if (new_sz == 0) new_sz = 20; - new_tab = VG_(arena_malloc)(VG_AR_SYMTAB, new_sz * sizeof(CfiSI) ); - if (si->cfisi != NULL) { - for (i = 0; i < si->cfisi_used; i++) - new_tab[i] = si->cfisi[i]; - VG_(arena_free)(VG_AR_SYMTAB, si->cfisi); - } - si->cfisi = new_tab; - si->cfisi_size = new_sz; - } - - si->cfisi[si->cfisi_used] = *cfisi; - si->cfisi_used++; - vg_assert(si->cfisi_used <= si->cfisi_size); -} - - -/*------------------------------------------------------------*/ -/*--- Helpers ---*/ -/*------------------------------------------------------------*/ - -/* Non-fatal -- use vg_panic if terminal. */ -void ML_(symerr) ( Char* msg ) -{ - if (VG_(clo_verbosity) > 1) - VG_(message)(Vg_DebugMsg,"%s", msg ); -} - - -/* Print a symbol. */ -static -void printSym ( SegInfo* si, Int i ) -{ - VG_(printf)( "%5d: %8p .. %8p (%d) %s\n", - i, - si->symtab[i].addr, - si->symtab[i].addr + si->symtab[i].size - 1, si->symtab[i].size, - si->symtab[i].name ); -} - -#define TRACE_SYMTAB(format, args...) \ - if (VG_(clo_trace_symtab)) { VG_(printf)(format, ## args); } - - -#if 0 -/* Print the entire sym tab. */ -static __attribute__ ((unused)) -void printSymtab ( void ) -{ - Int i; - VG_(printf)("\n------ BEGIN vg_symtab ------\n"); - for (i = 0; i < vg_symtab_used; i++) - printSym(i); - VG_(printf)("------ BEGIN vg_symtab ------\n"); -} -#endif - -#if 0 -/* Paranoid strcat. */ -static -void safeCopy ( UChar* dst, UInt maxlen, UChar* src ) -{ - UInt i = 0, j = 0; - while (True) { - if (i >= maxlen) return; - if (dst[i] == 0) break; - i++; - } - while (True) { - if (i >= maxlen) return; - dst[i] = src[j]; - if (src[j] == 0) return; - i++; j++; - } -} -#endif - - -/*------------------------------------------------------------*/ -/*--- Canonicalisers ---*/ -/*------------------------------------------------------------*/ - -/* Sort the symtab by starting address, and emit warnings if any - symbols have overlapping address ranges. We use that old chestnut, - shellsort. Mash the table around so as to establish the property - that addresses are in order and the ranges to not overlap. This - facilitates using binary search to map addresses to symbols when we - come to query the table. -*/ -static Int compare_RiSym(void *va, void *vb) { - RiSym *a = (RiSym *)va; - RiSym *b = (RiSym *)vb; - - if (a->addr < b->addr) return -1; - if (a->addr > b->addr) return 1; - return 0; -} - -/* Two symbols have the same address. Which name do we prefer? - - The general rule is to prefer the shorter symbol name. If the - symbol contains a '@', which means its versioned, then the length - up to the '@' is used for length comparison purposes (so - "foo@GLIBC_2.4.2" is considered shorter than "foobar"), but if two - symbols have the same length, the one with the version string is - preferred. If all else fails, use alphabetical ordering. - - Very occasionally this goes wrong (eg. 'memcmp' and 'bcmp' are aliases - in glibc, we choose the 'bcmp' symbol because it's shorter, so we - can misdescribe memcmp() as bcmp()). This is hard to avoid. It's - mentioned in the FAQ file. - */ -static RiSym *prefersym(RiSym *a, RiSym *b) -{ - Int lena, lenb; /* full length */ - Int vlena, vlenb; /* length without version */ - const Char *vpa, *vpb; - - vlena = lena = VG_(strlen)(a->name); - vlenb = lenb = VG_(strlen)(b->name); - - vpa = VG_(strchr)(a->name, '@'); - vpb = VG_(strchr)(b->name, '@'); - - if (vpa) - vlena = vpa - a->name; - if (vpb) - vlenb = vpb - b->name; - - TRACE_SYMTAB("choosing between '%s' and '%s'\n", a->name, b->name); - - /* MPI hack: prefer PMPI_Foo over MPI_Foo */ - if (0==VG_(strncmp)(a->name, "MPI_", 4) - && 0==VG_(strncmp)(b->name, "PMPI_", 5) - && 0==VG_(strcmp)(a->name, 1+b->name)) - return b; - else - if (0==VG_(strncmp)(b->name, "MPI_", 4) - && 0==VG_(strncmp)(a->name, "PMPI_", 5) - && 0==VG_(strcmp)(b->name, 1+a->name)) - return a; - - /* Select the shortest unversioned name */ - if (vlena < vlenb) - return a; - else if (vlenb < vlena) - return b; - - /* Equal lengths; select the versioned name */ - if (vpa && !vpb) - return a; - if (vpb && !vpa) - return b; - - /* Either both versioned or neither is versioned; select them - alphabetically */ - if (VG_(strcmp)(a->name, b->name) < 0) - return a; - else - return b; -} - -static -void canonicaliseSymtab ( SegInfo* si ) -{ - Int i, j, n_merged, n_truncated; - Addr s1, s2, e1, e2; - -# define SWAP(ty,aa,bb) \ - do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0) - - if (si->symtab_used == 0) - return; - - VG_(ssort)(si->symtab, si->symtab_used, sizeof(*si->symtab), compare_RiSym); - - cleanup_more: - - /* If two symbols have identical address ranges, we pick one - using prefersym() (see it for details). */ - do { - n_merged = 0; - j = si->symtab_used; - si->symtab_used = 0; - for (i = 0; i < j; i++) { - if (i < j-1 - && si->symtab[i].addr == si->symtab[i+1].addr - && si->symtab[i].size == si->symtab[i+1].size) { - n_merged++; - /* merge the two into one */ - si->symtab[si->symtab_used++] = *prefersym(&si->symtab[i], &si->symtab[i+1]); - i++; - } else { - si->symtab[si->symtab_used++] = si->symtab[i]; - } - } - TRACE_SYMTAB( "%d merged\n", n_merged); - } - while (n_merged > 0); - - /* Detect and "fix" overlapping address ranges. */ - n_truncated = 0; - - for (i = 0; i < ((Int)si->symtab_used) -1; i++) { - - vg_assert(si->symtab[i].addr <= si->symtab[i+1].addr); - - /* Check for common (no overlap) case. */ - if (si->symtab[i].addr + si->symtab[i].size - <= si->symtab[i+1].addr) - continue; - - /* There's an overlap. Truncate one or the other. */ - if (VG_(clo_trace_symtab)) { - VG_(printf)("overlapping address ranges in symbol table\n\t"); - printSym(si,i); - VG_(printf)("\t"); - printSym(si,i+1); - VG_(printf)("\n"); - } - - /* Truncate one or the other. */ - s1 = si->symtab[i].addr; - s2 = si->symtab[i+1].addr; - e1 = s1 + si->symtab[i].size - 1; - e2 = s2 + si->symtab[i+1].size - 1; - if (s1 < s2) { - e1 = s2-1; - } else { - vg_assert(s1 == s2); - if (e1 > e2) { - s1 = e2+1; SWAP(Addr,s1,s2); SWAP(Addr,e1,e2); - } else - if (e1 < e2) { - s2 = e1+1; - } else { - /* e1 == e2. Identical addr ranges. We'll eventually wind - up back at cleanup_more, which will take care of it. */ - } - } - si->symtab[i].addr = s1; - si->symtab[i+1].addr = s2; - si->symtab[i].size = e1 - s1 + 1; - si->symtab[i+1].size = e2 - s2 + 1; - vg_assert(s1 <= s2); - vg_assert(si->symtab[i].size > 0); - vg_assert(si->symtab[i+1].size > 0); - /* It may be that the i+1 entry now needs to be moved further - along to maintain the address order requirement. */ - j = i+1; - while (j < ((Int)si->symtab_used)-1 - && si->symtab[j].addr > si->symtab[j+1].addr) { - SWAP(RiSym,si->symtab[j],si->symtab[j+1]); - j++; - } - n_truncated++; - } - - if (n_truncated > 0) goto cleanup_more; - - /* Ensure relevant postconditions hold. */ - for (i = 0; i < ((Int)si->symtab_used)-1; i++) { - /* No zero-sized symbols. */ - vg_assert(si->symtab[i].size > 0); - /* In order. */ - vg_assert(si->symtab[i].addr < si->symtab[i+1].addr); - /* No overlaps. */ - vg_assert(si->symtab[i].addr + si->symtab[i].size - 1 - < si->symtab[i+1].addr); - } -# undef SWAP -} - -/* Sort the scope range table by starting address. Mash the table - around so as to establish the property that addresses are in order - and the ranges do not overlap. This facilitates using binary - search to map addresses to scopes when we come to query the - table. -*/ -static Int compare_ScopeRange(void *va, void *vb) { - ScopeRange *a = (ScopeRange *)va; - ScopeRange *b = (ScopeRange *)vb; - - if (a->addr < b->addr) return -1; - if (a->addr > b->addr) return 1; - return 0; -} - -static -void canonicaliseScopetab ( SegInfo* si ) -{ - Int i,j; - - if (si->scopetab_used == 0) - return; - - /* Sort by start address. */ - VG_(ssort)(si->scopetab, si->scopetab_used, sizeof(*si->scopetab), - compare_ScopeRange); - - /* If two adjacent entries overlap, truncate the first. */ - for (i = 0; i < si->scopetab_used-1; i++) { - if (si->scopetab[i].addr + si->scopetab[i].size > si->scopetab[i+1].addr) { - Int new_size = si->scopetab[i+1].addr - si->scopetab[i].addr; - - if (new_size < 0) - si->scopetab[i].size = 0; - else - si->scopetab[i].size = new_size; - } - } - - /* Zap any zero-sized entries resulting from the truncation - process. */ - j = 0; - for (i = 0; i < si->scopetab_used; i++) { - if (si->scopetab[i].size > 0) { - if (j != i) - si->scopetab[j] = si->scopetab[i]; - j++; - } - } - si->scopetab_used = j; - - /* Ensure relevant postconditions hold. */ - for (i = 0; i < si->scopetab_used-1; i++) { - /* - VG_(printf)("%d (%d) %d 0x%x\n", - i, si->scopetab[i+1].confident, - si->scopetab[i+1].size, si->scopetab[i+1].addr ); - */ - /* No zero-sized symbols. */ - vg_assert(si->scopetab[i].size > 0); - /* In order. */ - if (si->scopetab[i].addr >= si->scopetab[i+1].addr) - VG_(printf)("si->scopetab[%d] = %p,size=%d [%d] = %p,size=%d\n", - i, si->scopetab[i].addr, si->scopetab[i].size, - i+1, si->scopetab[i+1].addr, si->scopetab[i+1].size); - vg_assert(si->scopetab[i].addr < si->scopetab[i+1].addr); - /* No overlaps. */ - vg_assert(si->scopetab[i].addr + si->scopetab[i].size - 1 - < si->scopetab[i+1].addr); - } -} - - -/* Sort the location table by starting address. Mash the table around - so as to establish the property that addresses are in order and the - ranges do not overlap. This facilitates using binary search to map - addresses to locations when we come to query the table. -*/ -static Int compare_RiLoc(void *va, void *vb) { - RiLoc *a = (RiLoc *)va; - RiLoc *b = (RiLoc *)vb; - - if (a->addr < b->addr) return -1; - if (a->addr > b->addr) return 1; - return 0; -} - -static -void canonicaliseLoctab ( SegInfo* si ) -{ - Int i, j; - -# define SWAP(ty,aa,bb) \ - do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0); - - if (si->loctab_used == 0) - return; - - /* Sort by start address. */ - VG_(ssort)(si->loctab, si->loctab_used, sizeof(*si->loctab), compare_RiLoc); - - /* If two adjacent entries overlap, truncate the first. */ - for (i = 0; i < ((Int)si->loctab_used)-1; i++) { - vg_assert(si->loctab[i].size < 10000); - if (si->loctab[i].addr + si->loctab[i].size > si->loctab[i+1].addr) { - /* Do this in signed int32 because the actual .size fields - are only 12 bits. */ - Int new_size = si->loctab[i+1].addr - si->loctab[i].addr; - if (new_size < 0) { - si->loctab[i].size = 0; - } else - if (new_size > MAX_LOC_SIZE) { - si->loctab[i].size = MAX_LOC_SIZE; - } else { - si->loctab[i].size = (UShort)new_size; - } - } - } - - /* Zap any zero-sized entries resulting from the truncation - process. */ - j = 0; - for (i = 0; i < (Int)si->loctab_used; i++) { - if (si->loctab[i].size > 0) { - if (j != i) - si->loctab[j] = si->loctab[i]; - j++; - } - } - si->loctab_used = j; - - /* Ensure relevant postconditions hold. */ - for (i = 0; i < ((Int)si->loctab_used)-1; i++) { - /* - VG_(printf)("%d (%d) %d 0x%x\n", - i, si->loctab[i+1].confident, - si->loctab[i+1].size, si->loctab[i+1].addr ); - */ - /* No zero-sized symbols. */ - vg_assert(si->loctab[i].size > 0); - /* In order. */ - vg_assert(si->loctab[i].addr < si->loctab[i+1].addr); - /* No overlaps. */ - vg_assert(si->loctab[i].addr + si->loctab[i].size - 1 - < si->loctab[i+1].addr); - } -# undef SWAP -} - - -/* Sort the call-frame-info table by starting address. Mash the table - around so as to establish the property that addresses are in order - and the ranges do not overlap. This facilitates using binary - search to map addresses to locations when we come to query the - table. - - Also, set cfisi_minaddr and cfisi_maxaddr to be the min and max of - any of the address ranges contained in cfisi[0 .. cfisi_used-1], so - as to facilitate rapidly skipping this SegInfo when looking for an - address which falls outside that range. -*/ -static Int compare_CfiSI(void *va, void *vb) { - CfiSI *a = (CfiSI*)va; - CfiSI *b = (CfiSI*)vb; - - if (a->base < b->base) return -1; - if (a->base > b->base) return 1; - return 0; -} - -static -void canonicaliseCfiSI ( SegInfo* si ) -{ - Int i, j; - const Addr minAddr = 0; - const Addr maxAddr = ~minAddr; - - /* Note: take care in here. si->cfisi can be NULL, in which - case _used and _size fields will be zero. */ - if (si->cfisi == NULL) { - vg_assert(si->cfisi_used == 0); - vg_assert(si->cfisi_size == 0); - } - - /* Set cfisi_minaddr and cfisi_maxaddr to summarise the entire - address range contained in cfisi[0 .. cfisi_used-1]. */ - si->cfisi_minaddr = maxAddr; - si->cfisi_maxaddr = minAddr; - for (i = 0; i < (Int)si->cfisi_used; i++) { - Addr here_min = si->cfisi[i].base; - Addr here_max = si->cfisi[i].base + si->cfisi[i].len - 1; - if (here_min < si->cfisi_minaddr) - si->cfisi_minaddr = here_min; - if (here_max > si->cfisi_maxaddr) - si->cfisi_maxaddr = here_max; - } - - if (VG_(clo_trace_cfi)) - VG_(printf)("canonicaliseCfiSI: %d entries, %p .. %p\n", - si->cfisi_used, - si->cfisi_minaddr, si->cfisi_maxaddr); - - /* Sort the cfisi array by base address. */ - VG_(ssort)(si->cfisi, si->cfisi_used, sizeof(*si->cfisi), compare_CfiSI); - - /* If two adjacent entries overlap, truncate the first. */ - for (i = 0; i < (Int)si->cfisi_used-1; i++) { - if (si->cfisi[i].base + si->cfisi[i].len > si->cfisi[i+1].base) { - Int new_len = si->cfisi[i+1].base - si->cfisi[i].base; - /* how could it be otherwise? The entries are sorted by the - .base field. */ - vg_assert(new_len >= 0); - vg_assert(new_len <= si->cfisi[i].len); - si->cfisi[i].len = new_len; - } - } - - /* Zap any zero-sized entries resulting from the truncation - process. */ - j = 0; - for (i = 0; i < (Int)si->cfisi_used; i++) { - if (si->cfisi[i].len > 0) { - if (j != i) - si->cfisi[j] = si->cfisi[i]; - j++; - } - } - /* VG_(printf)("XXXXXXXXXXXXX %d %d\n", si->cfisi_used, j); */ - si->cfisi_used = j; - - /* Ensure relevant postconditions hold. */ - for (i = 0; i < (Int)si->cfisi_used; i++) { - /* No zero-length ranges. */ - vg_assert(si->cfisi[i].len > 0); - /* Makes sense w.r.t. summary address range */ - vg_assert(si->cfisi[i].base >= si->cfisi_minaddr); - vg_assert(si->cfisi[i].base + si->cfisi[i].len - 1 - <= si->cfisi_maxaddr); - - if (i < si->cfisi_used - 1) { - /* - if (!(si->cfisi[i].base < si->cfisi[i+1].base)) { - VG_(printf)("\nOOO cfisis:\n"); - ML_(ppCfiSI)(&si->cfisi[i]); - ML_(ppCfiSI)(&si->cfisi[i+1]); - } - */ - /* In order. */ - vg_assert(si->cfisi[i].base < si->cfisi[i+1].base); - /* No overlaps. */ - vg_assert(si->cfisi[i].base + si->cfisi[i].len - 1 - < si->cfisi[i+1].base); - } - } - -} - - -/*------------------------------------------------------------*/ -/*--- ---*/ -/*--- Read symbol table and line info from ELF files. ---*/ -/*--- ---*/ -/*------------------------------------------------------------*/ - -/* Identify an ELF object file. */ - -static Bool is_elf_object_file(const void *buf) -{ - ElfXX_Ehdr *ehdr = (ElfXX_Ehdr *)buf; - Int ok = 1; - - ok &= (ehdr->e_ident[EI_MAG0] == 0x7F - && ehdr->e_ident[EI_MAG1] == 'E' - && ehdr->e_ident[EI_MAG2] == 'L' - && ehdr->e_ident[EI_MAG3] == 'F'); - ok &= (ehdr->e_ident[EI_CLASS] == VG_ELF_CLASS - && ehdr->e_ident[EI_DATA] == VG_ELF_DATA2XXX - && ehdr->e_ident[EI_VERSION] == EV_CURRENT); - ok &= (ehdr->e_type == ET_EXEC || ehdr->e_type == ET_DYN); - ok &= (ehdr->e_machine == VG_ELF_MACHINE); - ok &= (ehdr->e_version == EV_CURRENT); - ok &= (ehdr->e_shstrndx != SHN_UNDEF); - ok &= (ehdr->e_shoff != 0 && ehdr->e_shnum != 0); - ok &= (ehdr->e_phoff != 0 && ehdr->e_phnum != 0); - - if (ok) - return True; - else - return False; -} - - -/* Show a raw ELF symbol, given its in-image address and name. */ - -static -void show_raw_elf_symbol ( Int i, - ElfXX_Sym* sym, Char* sym_name, Addr sym_addr, - Bool ppc64_linux_format ) -{ - HChar* space = ppc64_linux_format ? " " : ""; - VG_(printf)("raw symbol [%4d]: ", i); - switch (ELFXX_ST_BIND(sym->st_info)) { - case STB_LOCAL: VG_(printf)("LOC "); break; - case STB_GLOBAL: VG_(printf)("GLO "); break; - case STB_WEAK: VG_(printf)("WEA "); break; - case STB_LOPROC: VG_(printf)("lop "); break; - case STB_HIPROC: VG_(printf)("hip "); break; - default: VG_(printf)("??? "); break; - } - switch (ELFXX_ST_TYPE(sym->st_info)) { - case STT_NOTYPE: VG_(printf)("NOT "); break; - case STT_OBJECT: VG_(printf)("OBJ "); break; - case STT_FUNC: VG_(printf)("FUN "); break; - case STT_SECTION: VG_(printf)("SEC "); break; - case STT_FILE: VG_(printf)("FIL "); break; - case STT_LOPROC: VG_(printf)("lop "); break; - case STT_HIPROC: VG_(printf)("hip "); break; - default: VG_(printf)("??? "); break; - } - VG_(printf)(": val %010p, %ssz %4d %s\n", - sym_addr, space, sym->st_size, - ( sym->st_name ? sym_name : (Char*)"NONAME" ) ); -} - - -/* Decide whether SYM is something we should collect, and if so, copy - relevant info to the _OUT arguments. For {x86,amd64,ppc32}-linux - this is straightforward - the name, address, size are copied out - unchanged. - - For ppc64-linux it's more complex. If the symbol is seen to be in - the .opd section, it is taken to be a function descriptor, and so - a dereference is attempted, in order to get hold of the real entry - point address. Also as part of the dereference, there is an attempt - to calculate the TOC pointer (R2 value) associated with the symbol. - - To support the ppc64-linux pre-"dotless" ABI (prior to gcc 4.0.0), - if the symbol is seen to be outside the .opd section and its name - starts with a dot, an .opd deference is not attempted, and no TOC - pointer is calculated, but the the leading dot is removed from the - name. - - As a result, on ppc64-linux, the caller of this function may have - to piece together the real size, address, name of the symbol from - multiple calls to this function. Ugly and confusing. -*/ -static -Bool get_elf_symbol_info ( - /* INPUTS */ - SegInfo* si, /* containing SegInfo */ - ElfXX_Sym* sym, /* ELF symbol */ - Char* sym_name, /* name */ - Addr sym_addr, /* declared address */ - UChar* opd_filea, /* oimage of .opd sec (ppc64-linux only) */ - /* OUTPUTS */ - Char** sym_name_out, /* name we should record */ - Addr* sym_addr_out, /* addr we should record */ - Int* sym_size_out, /* symbol size */ - Addr* sym_tocptr_out, /* ppc64-linux only: R2 value to be - used on entry */ - Bool* from_opd_out /* ppc64-linux only: did we deref an - .opd entry? */ - ) -{ - Bool plausible, is_in_opd; - - /* Set defaults */ - *sym_name_out = sym_name; - *sym_addr_out = sym_addr; - *sym_size_out = (Int)sym->st_size; - *sym_tocptr_out = 0; /* unknown/inapplicable */ - *from_opd_out = False; - - /* Figure out if we're interested in the symbol. Firstly, is it of - the right flavour? */ - plausible - = (ELFXX_ST_BIND(sym->st_info) == STB_GLOBAL - || ELFXX_ST_BIND(sym->st_info) == STB_LOCAL - || ELFXX_ST_BIND(sym->st_info) == STB_WEAK - ) - && - (ELFXX_ST_TYPE(sym->st_info) == STT_FUNC - || (VG_(needs).data_syms - && ELFXX_ST_TYPE(sym->st_info) == STT_OBJECT) - ); - -# if defined(VGP_ppc64_linux) - /* Allow STT_NOTYPE in the very special case where we're running on - ppc64-linux and the symbol is one which the .opd-chasing hack - below will chase. */ - if (!plausible - && ELFXX_ST_TYPE(sym->st_info) == STT_NOTYPE - && sym->st_size > 0 - && si->opd_start_vma != 0 - && sym_addr >= si->opd_start_vma - && sym_addr < si->opd_start_vma + si->opd_size) - plausible = True; -# endif - - if (!plausible) - return False; - - /* Ignore if nameless, or zero-sized. */ - if (sym->st_name == (ElfXX_Word)NULL - || /* VG_(strlen)(sym_name) == 0 */ - /* equivalent but cheaper ... */ - sym_name[0] == 0 - || sym->st_size == 0) { - TRACE_SYMTAB(" ignore -- size=0: %s\n", sym_name); - return False; - } - - /* This seems to significantly reduce the number of junk - symbols, and particularly reduces the number of - overlapping address ranges. Don't ask me why ... */ - if ((Int)sym->st_value == 0) { - TRACE_SYMTAB( " ignore -- valu=0: %s\n", sym_name); - return False; - } - - /* If it's apparently in a GOT or PLT, it's really a reference to a - symbol defined elsewhere, so ignore it. */ - if (si->got_start_vma != 0 - && sym_addr >= si->got_start_vma - && sym_addr < si->got_start_vma + si->got_size) { - TRACE_SYMTAB(" ignore -- in GOT: %s\n", sym_name); - return False; - } - if (si->plt_start_vma != 0 - && sym_addr >= si->plt_start_vma - && sym_addr < si->plt_start_vma + si->plt_size) { - TRACE_SYMTAB(" ignore -- in PLT: %s\n", sym_name); - return False; - } - - /* ppc64-linux nasty hack: if the symbol is in an .opd section, - then really what we have is the address of a function - descriptor. So use the first word of that as the function's - text. - - See thread starting at - http://gcc.gnu.org/ml/gcc-patches/2004-08/msg00557.html - */ - is_in_opd = False; - - if (si->opd_start_vma != 0 - && sym_addr >= si->opd_start_vma - && sym_addr < si->opd_start_vma + si->opd_size) { -# if !defined(VGP_ppc64_linux) - TRACE_SYMTAB(" ignore -- in OPD: %s\n", sym_name); - return False; -# else - Int offset_in_opd; - ULong* fn_descr; - - if (0) VG_(printf)("opdXXX: si->offset %p, sym_addr %p\n", - (void*)(si->offset), (void*)sym_addr); - - if (!VG_IS_8_ALIGNED(sym_addr)) { - TRACE_SYMTAB(" ignore -- not 8-aligned: %s\n", sym_name); - return False; - } - - /* sym_addr is a vma pointing into the .opd section. We know - the vma of the opd section start, so we can figure out how - far into the opd section this is. */ - - offset_in_opd = (Addr)sym_addr - (Addr)(si->opd_start_vma); - if (offset_in_opd < 0 || offset_in_opd >= si->opd_size) { - TRACE_SYMTAB(" ignore -- invalid OPD offset: %s\n", sym_name); - return False; - } - - /* Now we want to know what's at that offset in the .opd - section. We can't look in the running image since it won't - necessarily have been mapped. But we can consult the oimage. - opd_filea is the start address of the .opd in the oimage. - Hence: */ - - fn_descr = (ULong*)(opd_filea + offset_in_opd); - - if (0) VG_(printf)("opdXXY: offset %d, fn_descr %p\n", - offset_in_opd, fn_descr); - if (0) VG_(printf)("opdXXZ: *fn_descr %p\n", (void*)(fn_descr[0])); - - sym_addr = fn_descr[0]; - - /* Hopefully sym_addr is now an offset into the text section. - Problem is, where did the text section get mapped? Well, - this SegInfo (si) exists because a text section got mapped, - and it got mapped to si->start. Hence add si->start to the - sym_addr to get the real vma. */ - - sym_addr += si->offset; - *sym_addr_out = sym_addr; - *sym_tocptr_out = fn_descr[1] + si->offset; - *from_opd_out = True; - is_in_opd = True; - - /* Do a final sanity check: if the symbol falls outside the - SegInfo's mapped range, ignore it. Since sym_addr has been - updated, that can be achieved simply by falling through to - the test below. */ - -# endif /* ppc64-linux nasty hack */ - } - - /* Here's yet another ppc64-linux hack. Get rid of leading dot if - the symbol is outside .opd. */ -# if defined(VGP_ppc64_linux) - if (si->opd_start_vma != 0 - && !is_in_opd - && sym_name[0] == '.') { - vg_assert(!(*from_opd_out)); - *sym_name_out = &sym_name[1]; - } -# endif - - /* If no part of the symbol falls within the mapped range, - ignore it. */ - if (*sym_addr_out + *sym_size_out <= si->start - || *sym_addr_out >= si->start+si->size) { - TRACE_SYMTAB( " ignore -- outside mapped range\n" ); - return False; - } - -# if defined(VGP_ppc64_linux) - /* It's crucial that we never add symbol addresses in the .opd - section. This would completely mess up function redirection and - intercepting. This assert ensures that any symbols that make it - into the symbol table on ppc64-linux don't point into .opd. */ - if (si->opd_start_vma != 0) { - vg_assert(*sym_addr_out + *sym_size_out <= si->opd_start_vma - || *sym_addr_out >= si->opd_start_vma + si->opd_size); - } -# endif - - /* Acquire! */ - return True; -} - - -/* Read an ELF symbol table (normal or dynamic). This one is for the - "normal" case ({x86,amd64,ppc32}-linux). */ -static -__attribute__((unused)) /* not referred to on all targets */ -void read_elf_symtab__normal( - SegInfo* si, Char* tab_name, - ElfXX_Sym* o_symtab, UInt o_symtab_sz, - UChar* o_strtab, UInt o_strtab_sz, - UChar* opd_filea /* ppc64-linux only */ - ) -{ - Int i; - Addr sym_addr, sym_addr_really; - Char *sym_name, *sym_name_really; - Int sym_size; - Addr sym_tocptr; - Bool from_opd; - RiSym risym; - ElfXX_Sym *sym; - - if (o_strtab == NULL || o_symtab == NULL) { - Char buf[80]; - vg_assert(VG_(strlen)(tab_name) < 40); - VG_(sprintf)(buf, " object doesn't have a %s", tab_name); - ML_(symerr)(buf); - return; - } - - TRACE_SYMTAB("\nReading (ELF, standard) %s (%d entries)\n", tab_name, - o_symtab_sz/sizeof(ElfXX_Sym) ); - - /* Perhaps should start at i = 1; ELF docs suggest that entry - 0 always denotes 'unknown symbol'. */ - for (i = 1; i < (Int)(o_symtab_sz/sizeof(ElfXX_Sym)); i++) { - sym = & o_symtab[i]; - sym_name = (Char*)(o_strtab + sym->st_name); - sym_addr = si->offset + sym->st_value; - - if (VG_(clo_trace_symtab)) - show_raw_elf_symbol(i, sym, sym_name, sym_addr, False); - - if (get_elf_symbol_info(si, sym, sym_name, sym_addr, opd_filea, - &sym_name_really, - &sym_addr_really, - &sym_size, - &sym_tocptr, - &from_opd)) { - - risym.addr = sym_addr_really; - risym.size = sym_size; - risym.name = ML_(addStr) ( si, sym_name_really, -1 ); - risym.tocptr = sym_tocptr; - vg_assert(risym.name != NULL); - vg_assert(risym.tocptr == 0); /* has no role except on ppc64-linux */ - addSym ( si, &risym ); - - if (VG_(clo_trace_symtab)) { - VG_(printf)(" record [%4d]: " - " val %010p, sz %4d %s\n", - i, (void*)risym.addr, (Int)risym.size, - (HChar*)risym.name - ); - } - - } - } -} - - -/* Read an ELF symbol table (normal or dynamic). This one is for - ppc64-linux, which requires special treatment. */ - -typedef - struct { - Addr addr; - Char* name; - } - TempSymKey; - -typedef - struct { - TempSymKey key; - Addr tocptr; - Int size; - Bool from_opd; - } - TempSym; - -static Word cmp_TempSymKey ( TempSymKey* key1, TempSym* elem2 ) { - if (key1->addr < elem2->key.addr) return -1; - if (key1->addr > elem2->key.addr) return 1; - return (Word)VG_(strcmp)(key1->name, elem2->key.name); -} -static void* oset_malloc ( SizeT szB ) { - return VG_(arena_malloc)(VG_AR_SYMTAB, szB); -} -static void oset_free ( void* p ) { - VG_(arena_free)(VG_AR_SYMTAB, p); -} - -static -__attribute__((unused)) /* not referred to on all targets */ -void read_elf_symtab__ppc64_linux( - SegInfo* si, Char* tab_name, - ElfXX_Sym* o_symtab, UInt o_symtab_sz, - UChar* o_strtab, UInt o_strtab_sz, - UChar* opd_filea /* ppc64-linux only */ - ) -{ - Int i, old_size; - Addr sym_addr, sym_addr_really; - Char *sym_name, *sym_name_really; - Int sym_size; - Addr sym_tocptr, old_tocptr; - Bool from_opd, modify_size, modify_tocptr; - RiSym risym; - ElfXX_Sym *sym; - OSet *oset; - TempSymKey key; - TempSym *elem; - TempSym *prev; - - if (o_strtab == NULL || o_symtab == NULL) { - Char buf[80]; - vg_assert(VG_(strlen)(tab_name) < 40); - VG_(sprintf)(buf, " object doesn't have a %s", tab_name); - ML_(symerr)(buf); - return; - } - - TRACE_SYMTAB("\nReading (ELF, ppc64-linux) %s (%d entries)\n", tab_name, - o_symtab_sz/sizeof(ElfXX_Sym) ); - - oset = VG_(OSet_Create)( offsetof(TempSym,key), - (OSetCmp_t)cmp_TempSymKey, - oset_malloc, oset_free ); - vg_assert(oset); - - /* Perhaps should start at i = 1; ELF docs suggest that entry - 0 always denotes 'unknown symbol'. */ - for (i = 1; i < (Int)(o_symtab_sz/sizeof(ElfXX_Sym)); i++) { - sym = & o_symtab[i]; - sym_name = (Char*)(o_strtab + sym->st_name); - sym_addr = si->offset + sym->st_value; - - if (VG_(clo_trace_symtab)) - show_raw_elf_symbol(i, sym, sym_name, sym_addr, True); - - if (get_elf_symbol_info(si, sym, sym_name, sym_addr, opd_filea, - &sym_name_really, - &sym_addr_really, - &sym_size, - &sym_tocptr, - &from_opd)) { - - /* Check if we've seen this (name,addr) key before. */ - key.addr = sym_addr_really; - key.name = sym_name_really; - prev = VG_(OSet_Lookup)( oset, &key ); - - if (prev) { - - /* Seen it before. Fold in whatever new info we can. */ - modify_size = False; - modify_tocptr = False; - old_size = 0; - old_tocptr = 0; - - if (prev->from_opd && !from_opd - && (prev->size == 24 || prev->size == 16) - && sym_size != prev->size) { - /* Existing one is an opd-redirect, with a bogus size, - so the only useful new fact we have is the real size - of the symbol. */ - modify_size = True; - old_size = prev->size; - prev->size = sym_size; - } - else - if (!prev->from_opd && from_opd - && (sym_size == 24 || sym_size == 16)) { - /* Existing one is non-opd, new one is opd. What we - can acquire from the new one is the TOC ptr to be - used. Since the existing sym is non-toc, it - shouldn't currently have an known TOC ptr. */ - vg_assert(prev->tocptr == 0); - modify_tocptr = True; - old_tocptr = prev->tocptr; - prev->tocptr = sym_tocptr; - } - else { - /* ignore. can we do better here? */ - } - - /* Only one or the other is possible (I think) */ - vg_assert(!(modify_size && modify_tocptr)); - - if (modify_size && VG_(clo_trace_symtab)) { - VG_(printf)(" modify (old sz %4d) " - " val %010p, toc %010p, sz %4d %s\n", - old_size, - (void*) prev->key.addr, - (void*) prev->tocptr, - (Int) prev->size, - (HChar*)prev->key.name - ); - } - if (modify_tocptr && VG_(clo_trace_symtab)) { - VG_(printf)(" modify (upd tocptr) " - " val %010p, toc %010p, sz %4d %s\n", - (void*) prev->key.addr, - (void*) prev->tocptr, - (Int) prev->size, - (HChar*)prev->key.name - ); - } - - } else { - - /* A new (name,addr) key. Add and continue. */ - elem = VG_(OSet_AllocNode)(oset, sizeof(TempSym)); - vg_assert(elem); - elem->key = key; - elem->tocptr = sym_tocptr; - elem->size = sym_size; - elem->from_opd = from_opd; - VG_(OSet_Insert)(oset, elem); - if (VG_(clo_trace_symtab)) { - VG_(printf)(" to-oset [%4d]: " - " val %010p, toc %010p, sz %4d %s\n", - i, (void*) elem->key.addr, - (void*) elem->tocptr, - (Int) elem->size, - (HChar*)elem->key.name - ); - } - - } - } - } - - /* All the syms that matter are in the oset. Now pull them out, - build a "standard" symbol table, and nuke the oset. */ - - i = 0; - VG_(OSet_ResetIter)( oset ); - - while ( (elem = VG_(OSet_Next)(oset)) ) { - risym.addr = elem->key.addr; - risym.size = elem->size; - risym.name = ML_(addStr) ( si, elem->key.name, -1 ); - risym.tocptr = elem->tocptr; - vg_assert(risym.name != NULL); - - addSym ( si, &risym ); - if (VG_(clo_trace_symtab)) { - VG_(printf)(" record [%4d]: " - " val %010p, toc %010p, sz %4d %s\n", - i, (void*) risym.addr, - (void*) risym.tocptr, - (Int) risym.size, - (HChar*)risym.name - ); - } - i++; - } - - VG_(OSet_Destroy)( oset, NULL ); -} - - -/* - * This routine for calculating the CRC for a separate debug file - * is GPLed code borrowed from binutils. - */ -static UInt -calc_gnu_debuglink_crc32(UInt crc, const UChar *buf, Int len) -{ - static const UInt crc32_table[256] = - { - 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, - 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, - 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, - 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, - 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856, - 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, - 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, - 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, - 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, - 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a, - 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, - 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, - 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, - 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, - 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e, - 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, - 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, - 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, - 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, - 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, - 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, - 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, - 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010, - 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, - 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, - 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, - 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, - 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, - 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344, - 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, - 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, - 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, - 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, - 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c, - 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, - 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, - 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, - 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, - 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c, - 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, - 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, - 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, - 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, - 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, - 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278, - 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, - 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, - 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, - 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, - 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, - 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, - 0x2d02ef8d - }; - const UChar *end; - - crc = ~crc & 0xffffffff; - for (end = buf + len; buf < end; ++ buf) - crc = crc32_table[(crc ^ *buf) & 0xff] ^ (crc >> 8); - return ~crc & 0xffffffff;; -} - -/* - * Try and open a separate debug file, ignoring any where the CRC does - * not match the value from the main object file. - */ -static -Addr open_debug_file( Char* name, UInt crc, UInt* size ) -{ - SysRes fd, sres; - struct vki_stat stat_buf; - UInt calccrc; - - fd = VG_(open)(name, VKI_O_RDONLY, 0); - if (fd.isError) - return 0; - - if (VG_(fstat)(fd.val, &stat_buf) != 0) { - VG_(close)(fd.val); - return 0; - } - - if (VG_(clo_verbosity) > 1) - VG_(message)(Vg_DebugMsg, "Reading debug info from %s...", name); - - *size = stat_buf.st_size; - - sres = VG_(am_mmap_file_float_valgrind) - ( *size, VKI_PROT_READ, fd.val, 0 ); - - VG_(close)(fd.val); - - if (sres.isError) - return 0; - - calccrc = calc_gnu_debuglink_crc32(0, (UChar*)sres.val, *size); - if (calccrc != crc) { - SysRes res = VG_(am_munmap_valgrind)(sres.val, *size); - vg_assert(!res.isError); - if (VG_(clo_verbosity) > 1) - VG_(message)(Vg_DebugMsg, "... CRC mismatch (computed %08x wanted %08x)", calccrc, crc); - return 0; - } - - return sres.val; -} - -/* - * Try to find a separate debug file for a given object file. - */ -static -Addr find_debug_file( Char* objpath, Char* debugname, UInt crc, UInt* size ) -{ - Char *objdir = VG_(arena_strdup)(VG_AR_SYMTAB, objpath); - Char *objdirptr; - Char *debugpath; - Addr addr = 0; - - if ((objdirptr = VG_(strrchr)(objdir, '/')) != NULL) - *objdirptr = '\0'; - - debugpath = VG_(arena_malloc)(VG_AR_SYMTAB, VG_(strlen)(objdir) + VG_(strlen)(debugname) + 16); - - VG_(sprintf)(debugpath, "%s/%s", objdir, debugname); - - if ((addr = open_debug_file(debugpath, crc, size)) == 0) { - VG_(sprintf)(debugpath, "%s/.debug/%s", objdir, debugname); - if ((addr = open_debug_file(debugpath, crc, size)) == 0) { - VG_(sprintf)(debugpath, "/usr/lib/debug%s/%s", objdir, debugname); - addr = open_debug_file(debugpath, crc, size); - } - } - - VG_(arena_free)(VG_AR_SYMTAB, debugpath); - VG_(arena_free)(VG_AR_SYMTAB, objdir); - - return addr; -} - - -/* The central function for reading ELF debug info. For the - object/exe specified by the SegInfo, find ELF sections, then read - the symbols, line number info, file name info, CFA (stack-unwind - info) and anything else we want, into the tables within the - supplied SegInfo. -*/ -static -Bool read_elf_debug_info ( SegInfo* si ) -{ - Bool res; - ElfXX_Ehdr* ehdr; /* The ELF header */ - ElfXX_Shdr* shdr; /* The section table */ - UChar* sh_strtab; /* The section table's string table */ - SysRes fd, sres; - Int i; - Bool ok; - Addr oimage; - UInt n_oimage; - Addr dimage = 0; - UInt n_dimage = 0; - struct vki_stat stat_buf; - - oimage = (Addr)NULL; - if (VG_(clo_verbosity) > 1 || VG_(clo_trace_redir)) - VG_(message)(Vg_DebugMsg, "Reading syms from %s (%p)", - si->filename, si->start ); - - /* mmap the object image aboard, so that we can read symbols and - line number info out of it. It will be munmapped immediately - thereafter; it is only aboard transiently. */ - - fd = VG_(stat)(si->filename, &stat_buf); - if (fd.isError) { - ML_(symerr)("Can't stat .so/.exe (to determine its size)?!"); - return False; - } - n_oimage = stat_buf.st_size; - - fd = VG_(open)(si->filename, VKI_O_RDONLY, 0); - if (fd.isError) { - ML_(symerr)("Can't open .so/.exe to read symbols?!"); - return False; - } - - sres = VG_(am_mmap_file_float_valgrind) - ( n_oimage, VKI_PROT_READ, fd.val, 0 ); - - VG_(close)(fd.val); - - if (sres.isError) { - VG_(message)(Vg_UserMsg, "warning: mmap failed on %s", si->filename ); - VG_(message)(Vg_UserMsg, " no symbols or debug info loaded" ); - return False; - } - - oimage = sres.val; - - /* Ok, the object image is safely in oimage[0 .. n_oimage-1]. - Now verify that it is a valid ELF .so or executable image. - */ - res = False; - ok = (n_oimage >= sizeof(ElfXX_Ehdr)); - ehdr = (ElfXX_Ehdr*)oimage; - - if (ok) - ok &= is_elf_object_file(ehdr); - - if (!ok) { - ML_(symerr)("Invalid ELF header, or missing stringtab/sectiontab."); - goto out; - } - - /* Walk the LOAD headers in the phdr and update the SegInfo to - include them all, so that this segment also contains data and - bss memory. Also computes correct symbol offset value for this - ELF file. */ - if (ehdr->e_phoff + ehdr->e_phnum*sizeof(ElfXX_Phdr) > n_oimage) { - ML_(symerr)("ELF program header is beyond image end?!"); - goto out; - } - { - Bool offset_set = False; - ElfXX_Addr prev_addr = 0; - Addr baseaddr = 0; - - si->offset = 0; - - vg_assert(si->soname == NULL); - - for (i = 0; i < ehdr->e_phnum; i++) { - ElfXX_Phdr *o_phdr; - ElfXX_Addr mapped, mapped_end; - - o_phdr = &((ElfXX_Phdr *)(oimage + ehdr->e_phoff))[i]; - - /* Try to get the soname. If there isn't one, use "NONE". - The seginfo needs to have some kind of soname in order to - facilitate writing redirect functions, since all redirect - specifications require a soname (pattern). */ - if (o_phdr->p_type == PT_DYNAMIC && si->soname == NULL) { - const ElfXX_Dyn *dyn = (const ElfXX_Dyn *)(oimage + o_phdr->p_offset); - Int stroff = -1; - Char *strtab = NULL; - Int j; - - for(j = 0; dyn[j].d_tag != DT_NULL; j++) { - switch(dyn[j].d_tag) { - case DT_SONAME: - stroff = dyn[j].d_un.d_val; - break; - - case DT_STRTAB: - strtab = (Char *)oimage + dyn[j].d_un.d_ptr - baseaddr; - break; - } - } - - if (stroff != -1 && strtab != 0) { - TRACE_SYMTAB("soname=%s\n", strtab+stroff); - si->soname = VG_(arena_strdup)(VG_AR_SYMTAB, strtab+stroff); - } - } - - if (o_phdr->p_type != PT_LOAD) - continue; - - if (!offset_set) { - offset_set = True; - si->offset = si->start - o_phdr->p_vaddr; - baseaddr = o_phdr->p_vaddr; - } - - // Make sure the Phdrs are in order - if (o_phdr->p_vaddr < prev_addr) { - ML_(symerr)("ELF Phdrs are out of order!?"); - goto out; - } - prev_addr = o_phdr->p_vaddr; - - // Get the data and bss start/size if appropriate - mapped = o_phdr->p_vaddr + si->offset; - mapped_end = mapped + o_phdr->p_memsz; - if (si->data_start_vma == 0 && - (o_phdr->p_flags & (PF_R|PF_W|PF_X)) == (PF_R|PF_W)) { - si->data_start_vma = mapped; - si->data_size = o_phdr->p_filesz; - si->bss_start_vma = mapped + o_phdr->p_filesz; - if (o_phdr->p_memsz > o_phdr->p_filesz) - si->bss_size = o_phdr->p_memsz - o_phdr->p_filesz; - else - si->bss_size = 0; - } - - mapped = mapped & ~(VKI_PAGE_SIZE-1); - mapped_end = (mapped_end + VKI_PAGE_SIZE - 1) & ~(VKI_PAGE_SIZE-1); - - if (VG_(needs).data_syms && - (mapped >= si->start && mapped <= (si->start+si->size)) && - (mapped_end > (si->start+si->size))) { - UInt newsz = mapped_end - si->start; - if (newsz > si->size) { - if (0) - VG_(printf)("extending mapping %p..%p %d -> ..%p %d\n", - si->start, si->start+si->size, si->size, - si->start+newsz, newsz); - - si->size = newsz; - } - } - } - } - - /* If, after looking at all the program headers, we still didn't - find a soname, add a fake one. */ - if (si->soname == NULL) { - TRACE_SYMTAB("soname(fake)=\"NONE\"\n"); - si->soname = "NONE"; - } - - TRACE_SYMTAB("shoff = %d, shnum = %d, size = %d, n_vg_oimage = %d\n", - ehdr->e_shoff, ehdr->e_shnum, sizeof(ElfXX_Shdr), n_oimage ); - - if (ehdr->e_shoff + ehdr->e_shnum*sizeof(ElfXX_Shdr) > n_oimage) { - ML_(symerr)("ELF section header is beyond image end?!"); - goto out; - } - - shdr = (ElfXX_Shdr*)(oimage + ehdr->e_shoff); - sh_strtab = (UChar*)(oimage + shdr[ehdr->e_shstrndx].sh_offset); - - /* Find interesting sections, read the symbol table(s), read any debug - information */ - { - /* Pointers to start of sections (in the oimage, not in the - running image) */ - UChar* o_strtab = NULL; /* .strtab */ - ElfXX_Sym* o_symtab = NULL; /* .symtab */ - UChar* o_dynstr = NULL; /* .dynstr */ - ElfXX_Sym* o_dynsym = NULL; /* .dynsym */ - Char* debuglink = NULL; /* .gnu_debuglink */ - UChar* stab = NULL; /* .stab (stabs) */ - UChar* stabstr = NULL; /* .stabstr (stabs) */ - UChar* debug_line = NULL; /* .debug_line (dwarf2) */ - UChar* debug_info = NULL; /* .debug_info (dwarf2) */ - UChar* debug_abbv = NULL; /* .debug_abbrev (dwarf2) */ - UChar* debug_str = NULL; /* .debug_str (dwarf2) */ - UChar* dwarf1d = NULL; /* .debug (dwarf1) */ - UChar* dwarf1l = NULL; /* .line (dwarf1) */ - UChar* ehframe = NULL; /* .eh_frame (dwarf2) */ - UChar* opd_filea = NULL; /* .opd (dwarf2, ppc64-linux) */ - UChar* dummy_filea = NULL; - - /* Section sizes, in bytes */ - UInt o_strtab_sz = 0; - UInt o_symtab_sz = 0; - UInt o_dynstr_sz = 0; - UInt o_dynsym_sz = 0; - UInt debuglink_sz = 0; - UInt stab_sz = 0; - UInt stabstr_sz = 0; - UInt debug_line_sz = 0; - UInt debug_info_sz = 0; - UInt debug_abbv_sz = 0; - UInt debug_str_sz = 0; - UInt dwarf1d_sz = 0; - UInt dwarf1l_sz = 0; - UInt ehframe_sz = 0; - - /* Section virtual addresses */ - Addr dummy_vma = 0; - Addr ehframe_vma = 0; - - /* Find all interesting sections */ - - /* What FIND does: it finds the section called SEC_NAME. The - size of it is assigned to SEC_SIZE. The address that it will - appear in the running image is assigned to SEC_VMA (note, - this will be meaningless for sections which are not marked - loadable. Even for sections which are marked loadable, the - client's ld.so may not have loaded them yet, so there is no - guarantee that we can safely prod around in any such area) - The address of the section in the transiently loaded oimage - is assigned to SEC_FILEA. Because the entire object file is - transiently mapped aboard for inspection, it's always safe to - inspect that area. */ - - for (i = 0; i < ehdr->e_shnum; i++) { - -# define FIND(sec_name, sec_size, sec_filea, sec_vma) \ - if (0 == VG_(strcmp)(sec_name, sh_strtab + shdr[i].sh_name)) { \ - Bool nobits; \ - sec_vma = (Addr)(si->offset + shdr[i].sh_addr); \ - sec_filea = (void*)(oimage + shdr[i].sh_offset); \ - sec_size = shdr[i].sh_size; \ - nobits = shdr[i].sh_type == SHT_NOBITS; \ - TRACE_SYMTAB( "%18s: filea %p .. %p, vma %p .. %p\n", \ - sec_name, (UChar*)sec_filea, \ - ((UChar*)sec_filea) + sec_size - 1, \ - sec_vma, sec_vma + sec_size - 1); \ - /* SHT_NOBITS sections have zero size in the file. */ \ - if ( shdr[i].sh_offset + (nobits ? 0 : sec_size) > n_oimage ) { \ - ML_(symerr)(" section beyond image end?!"); \ - goto out; \ - } \ - } - - /* Nb: must find where .got and .plt sections will be in the - * executable image, not in the object image transiently loaded. */ - /* NAME SIZE ADDR_IN_OIMAGE ADDR_WHEN_MAPPED */ - FIND(".dynsym", o_dynsym_sz, o_dynsym, dummy_vma) - FIND(".dynstr", o_dynstr_sz, o_dynstr, dummy_vma) - FIND(".symtab", o_symtab_sz, o_symtab, dummy_vma) - FIND(".strtab", o_strtab_sz, o_strtab, dummy_vma) - - FIND(".gnu_debuglink", debuglink_sz, debuglink, dummy_vma) - - FIND(".stab", stab_sz, stab, dummy_vma) - FIND(".stabstr", stabstr_sz, stabstr, dummy_vma) - - FIND(".debug_line", debug_line_sz, debug_line, dummy_vma) - FIND(".debug_info", debug_info_sz, debug_info, dummy_vma) - FIND(".debug_abbrev", debug_abbv_sz, debug_abbv, dummy_vma) - FIND(".debug_str", debug_str_sz, debug_str, dummy_vma) - - FIND(".debug", dwarf1d_sz, dwarf1d, dummy_vma) - FIND(".line", dwarf1l_sz, dwarf1l, dummy_vma) - FIND(".eh_frame", ehframe_sz, ehframe, ehframe_vma) - - FIND(".got", si->got_size, dummy_filea, si->got_start_vma) - FIND(".plt", si->plt_size, dummy_filea, si->plt_start_vma) - FIND(".opd", si->opd_size, opd_filea, si->opd_start_vma) - -# undef FIND - } - - /* Check some sizes */ - vg_assert((o_dynsym_sz % sizeof(ElfXX_Sym)) == 0); - vg_assert((o_symtab_sz % sizeof(ElfXX_Sym)) == 0); - - /* Did we find a debuglink section? */ - if (debuglink != NULL) { - UInt crc_offset = VG_ROUNDUP(VG_(strlen)(debuglink)+1, 4); - UInt crc; - - vg_assert(crc_offset + sizeof(UInt) <= debuglink_sz); - - /* Extract the CRC from the debuglink section */ - crc = *(UInt *)(debuglink + crc_offset); - - /* See if we can find a matching debug file */ - if ((dimage = find_debug_file(si->filename, debuglink, crc, &n_dimage)) != 0) { - ehdr = (ElfXX_Ehdr*)dimage; - - if (n_dimage >= sizeof(ElfXX_Ehdr) && is_elf_object_file(ehdr)) { - shdr = (ElfXX_Shdr*)(dimage + ehdr->e_shoff); - sh_strtab = (UChar*)(dimage + shdr[ehdr->e_shstrndx].sh_offset); - - /* Same deal as previous FIND, except simpler - doesn't - look for vma, only oimage address. */ - - /* Find all interesting sections */ - for (i = 0; i < ehdr->e_shnum; i++) { - -# define FIND(sec_name, sec_size, sec_filea) \ - if (0 == VG_(strcmp)(sec_name, sh_strtab + shdr[i].sh_name)) { \ - Bool nobits; \ - if (0 != sec_filea) \ - VG_(core_panic)("repeated section!\n"); \ - sec_filea = (void*)(dimage + shdr[i].sh_offset); \ - sec_size = shdr[i].sh_size; \ - nobits = shdr[i].sh_type == SHT_NOBITS; \ - TRACE_SYMTAB( "%18s: filea %p .. %p\n", \ - sec_name, (UChar*)sec_filea, \ - ((UChar*)sec_filea) + sec_size - 1); \ - /* SHT_NOBITS sections have zero size in the file. */ \ - if ( shdr[i].sh_offset + (nobits ? 0 : sec_size) > n_dimage ) { \ - ML_(symerr)(" section beyond image end?!"); \ - goto out; \ - } \ - } - - FIND(".stab", stab_sz, stab) - FIND(".stabstr", stabstr_sz, stabstr) - FIND(".debug_line", debug_line_sz, debug_line) - FIND(".debug_info", debug_info_sz, debug_info) - FIND(".debug_abbrev", debug_abbv_sz, debug_abbv) - FIND(".debug_str", debug_str_sz, debug_str) - FIND(".debug", dwarf1d_sz, dwarf1d) - FIND(".line", dwarf1l_sz, dwarf1l) - -# undef FIND - } - } - } - } - - /* Read symbols */ - { - void (*read_elf_symtab)(SegInfo*,Char*,ElfXX_Sym*, - UInt,UChar*,UInt,UChar*); -# if defined(VGP_ppc64_linux) - read_elf_symtab = read_elf_symtab__ppc64_linux; -# else - read_elf_symtab = read_elf_symtab__normal; -# endif - read_elf_symtab(si, "symbol table", - o_symtab, o_symtab_sz, - o_strtab, o_strtab_sz, opd_filea); - - read_elf_symtab(si, "dynamic symbol table", - o_dynsym, o_dynsym_sz, - o_dynstr, o_dynstr_sz, opd_filea); - } - - /* Read .eh_frame (call-frame-info) if any */ - if (ehframe) { - ML_(read_callframe_info_dwarf2) ( si, ehframe, ehframe_sz, ehframe_vma ); - } - - /* Read the stabs and/or dwarf2 debug information, if any. It - appears reading stabs stuff on amd64-linux doesn't work, so - we ignore it. */ -# if !defined(VGP_amd64_linux) - if (stab && stabstr) { - ML_(read_debuginfo_stabs) ( si, stab, stab_sz, - stabstr, stabstr_sz ); - } -# endif - /* jrs 2006-01-01: icc-8.1 has been observed to generate - binaries without debug_str sections. Don't preclude - debuginfo reading for that reason, but, in - read_unitinfo_dwarf2, do check that debugstr is non-NULL - before using it. */ - if (debug_info && debug_abbv && debug_line /* && debug_str */) { - ML_(read_debuginfo_dwarf2) ( si, - debug_info, debug_info_sz, - debug_abbv, - debug_line, debug_line_sz, - debug_str ); - } - if (dwarf1d && dwarf1l) { - ML_(read_debuginfo_dwarf1) ( si, dwarf1d, dwarf1d_sz, - dwarf1l, dwarf1l_sz ); - } - } - res = True; - - out: { - SysRes m_res; - /* Last, but not least, heave the image(s) back overboard. */ - if (dimage) { - m_res = VG_(am_munmap_valgrind) ( dimage, n_dimage ); - vg_assert(!m_res.isError); - } - m_res = VG_(am_munmap_valgrind) ( oimage, n_oimage ); - vg_assert(!m_res.isError); - return res; - } -} - -/*------------------------------------------------------------*/ -/*--- Main entry point for symbols table reading. ---*/ -/*------------------------------------------------------------*/ - -static SegInfo* -alloc_SegInfo(Addr start, SizeT size, OffT foffset, const Char* filename) -{ - SegInfo* si = VG_(arena_calloc)(VG_AR_SYMTAB, 1, sizeof(SegInfo)); - - si->start = start; - si->size = size; - si->foffset = foffset; - si->filename = VG_(arena_strdup)(VG_AR_SYMTAB, filename); - - si->ref = 1; - - // Everything else -- pointers, sizes, arrays -- is zeroed by calloc. - - return si; -} - -static void freeSegInfo ( SegInfo* si ) -{ - struct strchunk *chunk, *next; - vg_assert(si != NULL); - if (si->filename) VG_(arena_free)(VG_AR_SYMTAB, si->filename); - if (si->symtab) VG_(arena_free)(VG_AR_SYMTAB, si->symtab); - if (si->loctab) VG_(arena_free)(VG_AR_SYMTAB, si->loctab); - if (si->scopetab) VG_(arena_free)(VG_AR_SYMTAB, si->scopetab); - if (si->cfisi) VG_(arena_free)(VG_AR_SYMTAB, si->cfisi); - - for(chunk = si->strchunks; chunk != NULL; chunk = next) { - next = chunk->next; - VG_(arena_free)(VG_AR_SYMTAB, chunk); - } - VG_(arena_free)(VG_AR_SYMTAB, si); -} - - -SegInfo *VG_(read_seg_symbols) ( Addr seg_addr, SizeT seg_len, - OffT seg_offset, const Char* seg_filename) -{ - SegInfo* si = alloc_SegInfo(seg_addr, seg_len, seg_offset, seg_filename); - - if (!read_elf_debug_info ( si )) { - // Something went wrong (eg. bad ELF file). - freeSegInfo( si ); - si = NULL; - - } else { - // Prepend si to segInfo_list - si->next = segInfo_list; - segInfo_list = si; - - canonicaliseSymtab ( si ); - canonicaliseLoctab ( si ); - canonicaliseScopetab ( si ); - canonicaliseCfiSI ( si ); - - /* notify m_redir about it */ - VG_(redir_notify_new_SegInfo)( si ); - } - - return si; -} - - -/* When an munmap() call happens, check to see whether it corresponds - to a segment for a .so, and if so discard the relevant SegInfo. - This might not be a very clever idea from the point of view of - accuracy of error messages, but we need to do it in order to - maintain the no-overlapping invariant. -*/ -static void unload_symbols ( Addr start, SizeT length ) -{ - SegInfo** prev_next_ptr = &segInfo_list; - SegInfo* curr = segInfo_list; - - while (curr) { - if (start == curr->start) { - // Found it; remove from list and free it. - if (VG_(clo_verbosity) > 1 || VG_(clo_trace_redir)) - VG_(message)(Vg_DebugMsg, - "Discarding syms at %p-%p in %s due to munmap()", - start, start+length, - curr->filename ? curr->filename : (Char *)"???"); - vg_assert(*prev_next_ptr == curr); - *prev_next_ptr = curr->next; - VG_(redir_notify_delete_SegInfo)( curr ); - freeSegInfo(curr); - return; - } - prev_next_ptr = &curr->next; - curr = curr->next; - } - - // Not found. -} - -/*------------------------------------------------------------*/ -/*--- Use of symbol table & location info to create ---*/ -/*--- plausible-looking stack dumps. ---*/ -/*------------------------------------------------------------*/ - -/* Find a symbol-table index containing the specified pointer, or -1 - if not found. Binary search. */ - -static Int search_one_symtab ( SegInfo* si, Addr ptr, - Bool match_anywhere_in_fun ) -{ - Addr a_mid_lo, a_mid_hi; - Int mid, size, - lo = 0, - hi = si->symtab_used-1; - while (True) { - /* current unsearched space is from lo to hi, inclusive. */ - if (lo > hi) return -1; /* not found */ - mid = (lo + hi) / 2; - a_mid_lo = si->symtab[mid].addr; - size = ( match_anywhere_in_fun - ? si->symtab[mid].size - : 1); - a_mid_hi = ((Addr)si->symtab[mid].addr) + size - 1; - - if (ptr < a_mid_lo) { hi = mid-1; continue; } - if (ptr > a_mid_hi) { lo = mid+1; continue; } - vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi); - return mid; - } -} - - -/* Search all symtabs that we know about to locate ptr. If found, set - *psi to the relevant SegInfo, and *symno to the symtab entry number - within that. If not found, *psi is set to NULL. */ -static void search_all_symtabs ( Addr ptr, /*OUT*/SegInfo** psi, - /*OUT*/Int* symno, - Bool match_anywhere_in_fun ) -{ - Int sno; - SegInfo* si; - - for (si = segInfo_list; si != NULL; si = si->next) { - if (si->start <= ptr && ptr < si->start+si->size) { - sno = search_one_symtab ( si, ptr, match_anywhere_in_fun ); - if (sno == -1) goto not_found; - *symno = sno; - *psi = si; - return; - } - } - not_found: - *psi = NULL; -} - - -/* Find a location-table index containing the specified pointer, or -1 - if not found. Binary search. */ - -static Int search_one_loctab ( SegInfo* si, Addr ptr ) -{ - Addr a_mid_lo, a_mid_hi; - Int mid, - lo = 0, - hi = si->loctab_used-1; - while (True) { - /* current unsearched space is from lo to hi, inclusive. */ - if (lo > hi) return -1; /* not found */ - mid = (lo + hi) / 2; - a_mid_lo = si->loctab[mid].addr; - a_mid_hi = ((Addr)si->loctab[mid].addr) + si->loctab[mid].size - 1; - - if (ptr < a_mid_lo) { hi = mid-1; continue; } - if (ptr > a_mid_hi) { lo = mid+1; continue; } - vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi); - return mid; - } -} - - -/* Search all loctabs that we know about to locate ptr. If found, set - *psi to the relevant SegInfo, and *locno to the loctab entry number - within that. If not found, *psi is set to NULL. -*/ -static void search_all_loctabs ( Addr ptr, /*OUT*/SegInfo** psi, - /*OUT*/Int* locno ) -{ - Int lno; - SegInfo* si; - - for (si = segInfo_list; si != NULL; si = si->next) { - if (si->start <= ptr && ptr < si->start+si->size) { - lno = search_one_loctab ( si, ptr ); - if (lno == -1) goto not_found; - *locno = lno; - *psi = si; - return; - } - } - not_found: - *psi = NULL; -} - - -/* Find a scope-table index containing the specified pointer, or -1 - if not found. Binary search. */ - -static Int search_one_scopetab ( SegInfo* si, Addr ptr ) -{ - Addr a_mid_lo, a_mid_hi; - Int mid, - lo = 0, - hi = si->scopetab_used-1; - while (True) { - /* current unsearched space is from lo to hi, inclusive. */ - if (lo > hi) return -1; /* not found */ - mid = (lo + hi) / 2; - a_mid_lo = si->scopetab[mid].addr; - a_mid_hi = ((Addr)si->scopetab[mid].addr) + si->scopetab[mid].size - 1; - - if (ptr < a_mid_lo) { hi = mid-1; continue; } - if (ptr > a_mid_hi) { lo = mid+1; continue; } - vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi); - return mid; - } -} - - -/* Search all scopetabs that we know about to locate ptr. If found, set - *psi to the relevant SegInfo, and *locno to the scopetab entry number - within that. If not found, *psi is set to NULL. -*/ -static void search_all_scopetabs ( Addr ptr, - /*OUT*/SegInfo** psi, - /*OUT*/Int* scopeno ) -{ - Int scno; - SegInfo* si; - - for (si = segInfo_list; si != NULL; si = si->next) { - if (si->start <= ptr && ptr < si->start+si->size) { - scno = search_one_scopetab ( si, ptr ); - if (scno == -1) goto not_found; - *scopeno = scno; - *psi = si; - return; - } - } - not_found: - *psi = NULL; -} - - -/* Find a CFI-table index containing the specified pointer, or -1 - if not found. Binary search. */ - -static Int search_one_cfitab ( SegInfo* si, Addr ptr ) -{ - Addr a_mid_lo, a_mid_hi; - Int mid, size, - lo = 0, - hi = si->cfisi_used-1; - while (True) { - /* current unsearched space is from lo to hi, inclusive. */ - if (lo > hi) return -1; /* not found */ - mid = (lo + hi) / 2; - a_mid_lo = si->cfisi[mid].base; - size = si->cfisi[mid].len; - a_mid_hi = a_mid_lo + size - 1; - vg_assert(a_mid_hi >= a_mid_lo); - if (ptr < a_mid_lo) { hi = mid-1; continue; } - if (ptr > a_mid_hi) { lo = mid+1; continue; } - vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi); - return mid; - } -} - - -/* The whole point of this whole big deal: map a code address to a - plausible symbol name. Returns False if no idea; otherwise True. - Caller supplies buf and nbuf. If demangle is False, don't do - demangling, regardless of VG_(clo_demangle) -- probably because the - call has come from VG_(get_fnname_nodemangle)(). */ -static -Bool get_fnname ( Bool demangle, Addr a, Char* buf, Int nbuf, - Bool match_anywhere_in_fun, Bool show_offset) -{ - SegInfo* si; - Int sno; - Int offset; - - search_all_symtabs ( a, &si, &sno, match_anywhere_in_fun ); - if (si == NULL) - return False; - if (demangle) { - VG_(demangle) ( True/*do C++ demangle*/, - si->symtab[sno].name, buf, nbuf ); - } else { - VG_(strncpy_safely) ( buf, si->symtab[sno].name, nbuf ); - } - - offset = a - si->symtab[sno].addr; - if (show_offset && offset != 0) { - Char buf2[12]; - Char* symend = buf + VG_(strlen)(buf); - Char* end = buf + nbuf; - Int len; - - len = VG_(sprintf)(buf2, "%c%d", - offset < 0 ? '-' : '+', - offset < 0 ? -offset : offset); - vg_assert(len < (Int)sizeof(buf2)); - - if (len < (end - symend)) { - Char *cp = buf2; - VG_(memcpy)(symend, cp, len+1); - } - } - - return True; -} - -/* ppc64-linux only: find the TOC pointer (R2 value) that should be in - force at the entry point address of the function containing - guest_code_addr. Returns 0 if not known. */ -Addr VG_(get_tocptr) ( Addr guest_code_addr ) -{ - SegInfo* si; - Int sno; - search_all_symtabs ( guest_code_addr, - &si, &sno, True/*match_anywhere_in_fun*/ ); - if (si == NULL) - return 0; - else - return si->symtab[sno].tocptr; -} - -/* This is available to tools... always demangle C++ names, - match anywhere in function, but don't show offsets. */ -Bool VG_(get_fnname) ( Addr a, Char* buf, Int nbuf ) -{ - return get_fnname ( /*demangle*/True, a, buf, nbuf, - /*match_anywhere_in_fun*/True, - /*show offset?*/False ); -} - -/* This is available to tools... always demangle C++ names, - match anywhere in function, and show offset if nonzero. */ -Bool VG_(get_fnname_w_offset) ( Addr a, Char* buf, Int nbuf ) -{ - return get_fnname ( /*demangle*/True, a, buf, nbuf, - /*match_anywhere_in_fun*/True, - /*show offset?*/True ); -} - -/* This is available to tools... always demangle C++ names, - only succeed if 'a' matches first instruction of function, - and don't show offsets. */ -Bool VG_(get_fnname_if_entry) ( Addr a, Char* buf, Int nbuf ) -{ - return get_fnname ( /*demangle*/True, a, buf, nbuf, - /*match_anywhere_in_fun*/False, - /*show offset?*/False ); -} - -/* This is only available to core... don't demangle C++ names, - match anywhere in function, and don't show offsets. */ -Bool VG_(get_fnname_nodemangle) ( Addr a, Char* buf, Int nbuf ) -{ - return get_fnname ( /*demangle*/False, a, buf, nbuf, - /*match_anywhere_in_fun*/True, - /*show offset?*/False ); -} - -/* This is only available to core... don't demangle C++ names, but do - do Z-demangling, match anywhere in function, and don't show - offsets. */ -Bool VG_(get_fnname_Z_demangle_only) ( Addr a, Char* buf, Int nbuf ) -{ -# define N_TMPBUF 4096 /* arbitrary, 4096 == ERRTXT_LEN */ - Char tmpbuf[N_TMPBUF]; - Bool ok; - vg_assert(nbuf > 0); - ok = get_fnname ( /*demangle*/False, a, tmpbuf, N_TMPBUF, - /*match_anywhere_in_fun*/True, - /*show offset?*/False ); - tmpbuf[N_TMPBUF-1] = 0; /* paranoia */ - if (!ok) - return False; - - /* We have something, at least. Try to Z-demangle it. */ - VG_(demangle)( False/*don't do C++ demangling*/, tmpbuf, buf, nbuf); - - buf[nbuf-1] = 0; /* paranoia */ - return True; -# undef N_TMPBUF -} - -/* Map a code address to the name of a shared object file or the executable. - Returns False if no idea; otherwise True. Doesn't require debug info. - Caller supplies buf and nbuf. */ -Bool VG_(get_objname) ( Addr a, Char* buf, Int nbuf ) -{ - SegInfo* si; - - for (si = segInfo_list; si != NULL; si = si->next) { - if (si->start <= a && a < si->start+si->size) { - VG_(strncpy_safely)(buf, si->filename, nbuf); - return True; - } - } - return False; -} - -/* Map a code address to its SegInfo. Returns NULL if not found. Doesn't - require debug info. */ -SegInfo* VG_(find_seginfo) ( Addr a ) -{ - SegInfo* si; - - for (si = segInfo_list; si != NULL; si = si->next) { - if (si->start <= a && a < si->start+si->size) { - return si; - } - } - return NULL; -} - - -/* Map a code address to a filename. Returns True if successful. */ -Bool VG_(get_filename)( Addr a, Char* filename, Int n_filename ) -{ - SegInfo* si; - Int locno; - search_all_loctabs ( a, &si, &locno ); - if (si == NULL) - return False; - VG_(strncpy_safely)(filename, si->loctab[locno].filename, n_filename); - return True; -} - -/* Map a code address to a line number. Returns True if successful. */ -Bool VG_(get_linenum)( Addr a, UInt* lineno ) -{ - SegInfo* si; - Int locno; - search_all_loctabs ( a, &si, &locno ); - if (si == NULL) - return False; - *lineno = si->loctab[locno].lineno; - - return True; -} - -/* Map a code address to a filename/line number/dir name info. - See prototype for detailed description of behaviour. -*/ -Bool VG_(get_filename_linenum) ( Addr a, - /*OUT*/Char* filename, Int n_filename, - /*OUT*/Char* dirname, Int n_dirname, - /*OUT*/Bool* dirname_available, - /*OUT*/UInt* lineno ) -{ - SegInfo* si; - Int locno; - - vg_assert( (dirname == NULL && dirname_available == NULL) - || - (dirname != NULL && dirname_available != NULL) ); - - search_all_loctabs ( a, &si, &locno ); - if (si == NULL) - return False; - VG_(strncpy_safely)(filename, si->loctab[locno].filename, n_filename); - *lineno = si->loctab[locno].lineno; - - if (dirname) { - /* caller wants directory info too .. */ - vg_assert(n_dirname > 0); - if (si->loctab[locno].dirname) { - /* .. and we have some */ - *dirname_available = True; - VG_(strncpy_safely)(dirname, si->loctab[locno].dirname, - n_dirname); - } else { - /* .. but we don't have any */ - *dirname_available = False; - *dirname = 0; - } - } - - return True; -} - -#ifndef TEST - -// Note that R_STACK_PTR and R_FRAME_PTR are used again further below, -// which is why they get a named constant. -static Addr regaddr_from_tst(Int regno, ThreadArchState *arch) -{ -#if defined(VGA_x86) - /* This is the Intel register encoding -- integer regs. */ -# define R_STACK_PTR 4 -# define R_FRAME_PTR 5 - switch (regno) { - case 0: return (Addr) & arch->vex.guest_EAX; - case 1: return (Addr) & arch->vex.guest_ECX; - case 2: return (Addr) & arch->vex.guest_EDX; - case 3: return (Addr) & arch->vex.guest_EBX; - case R_STACK_PTR: return (Addr) & arch->vex.guest_ESP; - case R_FRAME_PTR: return (Addr) & arch->vex.guest_EBP; - case 6: return (Addr) & arch->vex.guest_ESI; - case 7: return (Addr) & arch->vex.guest_EDI; - default: return 0; - } -#elif defined(VGA_amd64) - /* This is the AMD64 register encoding -- integer regs. */ -# define R_STACK_PTR 7 -# define R_FRAME_PTR 6 - switch (regno) { - case 0: return (Addr) & arch->vex.guest_RAX; - case 1: return (Addr) & arch->vex.guest_RDX; - case 2: return (Addr) & arch->vex.guest_RCX; - case 3: return (Addr) & arch->vex.guest_RBX; - case 4: return (Addr) & arch->vex.guest_RSI; - case 5: return (Addr) & arch->vex.guest_RDI; - case R_FRAME_PTR: return (Addr) & arch->vex.guest_RBP; - case R_STACK_PTR: return (Addr) & arch->vex.guest_RSP; - case 8: return (Addr) & arch->vex.guest_R8; - case 9: return (Addr) & arch->vex.guest_R9; - case 10: return (Addr) & arch->vex.guest_R10; - case 11: return (Addr) & arch->vex.guest_R11; - case 12: return (Addr) & arch->vex.guest_R12; - case 13: return (Addr) & arch->vex.guest_R13; - case 14: return (Addr) & arch->vex.guest_R14; - case 15: return (Addr) & arch->vex.guest_R15; - default: return 0; - } -#elif defined(VGA_ppc32) || defined(VGA_ppc64) - /* This is the PPC register encoding -- integer regs. */ -# define R_STACK_PTR 1 -# define R_FRAME_PTR 1 - switch (regno) { - case 0: return (Addr) & arch->vex.guest_GPR0; - case R_STACK_PTR: return (Addr) & arch->vex.guest_GPR1; - case 2: return (Addr) & arch->vex.guest_GPR2; - case 3: return (Addr) & arch->vex.guest_GPR3; - case 4: return (Addr) & arch->vex.guest_GPR4; - case 5: return (Addr) & arch->vex.guest_GPR5; - case 6: return (Addr) & arch->vex.guest_GPR6; - case 7: return (Addr) & arch->vex.guest_GPR7; - case 8: return (Addr) & arch->vex.guest_GPR8; - case 9: return (Addr) & arch->vex.guest_GPR9; - case 10: return (Addr) & arch->vex.guest_GPR10; - case 11: return (Addr) & arch->vex.guest_GPR11; - case 12: return (Addr) & arch->vex.guest_GPR12; - case 13: return (Addr) & arch->vex.guest_GPR13; - case 14: return (Addr) & arch->vex.guest_GPR14; - case 15: return (Addr) & arch->vex.guest_GPR15; - case 16: return (Addr) & arch->vex.guest_GPR16; - case 17: return (Addr) & arch->vex.guest_GPR17; - case 18: return (Addr) & arch->vex.guest_GPR18; - case 19: return (Addr) & arch->vex.guest_GPR19; - case 20: return (Addr) & arch->vex.guest_GPR20; - case 21: return (Addr) & arch->vex.guest_GPR21; - case 22: return (Addr) & arch->vex.guest_GPR22; - case 23: return (Addr) & arch->vex.guest_GPR23; - case 24: return (Addr) & arch->vex.guest_GPR24; - case 25: return (Addr) & arch->vex.guest_GPR25; - case 26: return (Addr) & arch->vex.guest_GPR26; - case 27: return (Addr) & arch->vex.guest_GPR27; - case 28: return (Addr) & arch->vex.guest_GPR28; - case 29: return (Addr) & arch->vex.guest_GPR29; - case 30: return (Addr) & arch->vex.guest_GPR30; - case 31: return (Addr) & arch->vex.guest_GPR31; - default: return 0; - } -#else -# error Unknown platform -#endif -} - - -/* return a pointer to a register (now for 5 other impossible things - before breakfast) */ -static Addr regaddr(ThreadId tid, Int regno) -{ - Addr ret = regaddr_from_tst(regno, &VG_(threads)[tid].arch); - - if (ret == 0) { - Char buf[100]; - VG_(describe_IP)( VG_(get_IP)(tid), buf, 100 ); - VG_(printf)("mysterious register %d used at %s\n", regno, buf); - } - - return ret; -} - -/* Get a list of all variables in scope, working out from the directly - current one */ -Variable* ML_(get_scope_variables)(ThreadId tid) -{ - static const Bool debug = False; - Variable *list, *end; - Addr eip; - SegInfo *si; - Int scopeidx; - Scope *scope; - Int distance; - static const Int maxsyms = 1000; - Int nsyms = maxsyms; - - list = end = NULL; - - eip = VG_(get_IP)(tid); - - search_all_scopetabs(eip, &si, &scopeidx); - - if (debug) - VG_(printf)("eip=%p si=%p (%s; offset=%p) scopeidx=%d\n", - eip, si, si ? si->filename : (Char *)"???", - si ? si->offset : 0x99999, scopeidx); - - if (si == NULL) - return NULL; /* nothing in scope (should use global scope at least) */ - - if (debug) { - ScopeRange *sr = &si->scopetab[scopeidx]; - Char file[100]; - Int line; - - if (!VG_(get_filename_linenum)(sr->addr, file, sizeof(file), - NULL, 0, NULL, &line)) - file[0] = 0; - - VG_(printf)("found scope range %p: eip=%p (%s:%d) size=%d scope=%p\n", - sr, sr->addr, file, line, sr->size, sr->scope); - } - - distance = 0; - for (scope = si->scopetab[scopeidx].scope; - scope != NULL; - scope = scope->outer, distance++) { - UInt i; - - for(i = 0; i < scope->nsyms; i++) { - Sym *sym = &scope->syms[i]; - Variable *v; - - if (nsyms-- == 0) { - VG_(printf)("max %d syms reached\n", maxsyms); - return list; - } - - v = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof(*v)); - - v->next = NULL; - v->distance = distance; - v->type = ML_(st_basetype)(sym->type, False); - v->name = VG_(arena_strdup)(VG_AR_SYMTAB, sym->name); - v->container = NULL; - v->size = ML_(st_sizeof)(sym->type); - - if (debug && 0) - VG_(printf)("sym->name=%s sym->kind=%d offset=%d\n", sym->name, sym->kind, sym->u.offset); - switch(sym->kind) { - - case SyGlobal: - case SyStatic: - if (sym->u.addr == 0) { - /* XXX lookup value */ - } - v->valuep = sym->u.addr; - break; - - case SyReg: - v->valuep = regaddr(tid, sym->u.regno); - break; - - case SyEBPrel: - case SyESPrel: { - Addr reg = *(Addr*)regaddr(tid, sym->kind == SyESPrel - ? R_STACK_PTR : R_FRAME_PTR); - if (debug) - VG_(printf)("reg=%p+%d=%p\n", reg, sym->u.offset, reg+sym->u.offset); - v->valuep = reg + sym->u.offset; - break; - } - - case SyType: - VG_(core_panic)("unexpected typedef in scope"); - } - - if (v->valuep == 0) { - /* not interesting or useful */ - VG_(arena_free)(VG_AR_SYMTAB, v); - continue; - } - - /* append to end of list */ - if (list == NULL) - list = end = v; - else { - end->next = v; - end = v; - } - } - } - - return list; -} - -# undef R_STACK_PTR -# undef R_FRAME_PTR - -#endif /* TEST */ - -/* Print into buf info on code address, function name and filename */ - -static Int putStr ( Int n, Int n_buf, Char* buf, Char* str ) -{ - for (; n < n_buf-1 && *str != 0; n++,str++) - buf[n] = *str; - buf[n] = '\0'; - return n; -} -static Int putStrEsc ( Int n, Int n_buf, Char* buf, Char* str ) -{ - Char alt[2]; - for (; *str != 0; str++) { - switch (*str) { - case '&': n = putStr( n, n_buf, buf, "&"); break; - case '<': n = putStr( n, n_buf, buf, "<"); break; - case '>': n = putStr( n, n_buf, buf, ">"); break; - default: alt[0] = *str; - alt[1] = 0; - n = putStr( n, n_buf, buf, alt ); - break; - } - } - return n; -} - -Char* VG_(describe_IP)(Addr eip, Char* buf, Int n_buf) -{ -# define APPEND(_str) \ - n = putStr(n, n_buf, buf, _str); -# define APPEND_ESC(_str) \ - n = putStrEsc(n, n_buf, buf, _str); -# define BUF_LEN 4096 - - UInt lineno; - UChar ibuf[50]; - Int n = 0; - static UChar buf_fn[BUF_LEN]; - static UChar buf_obj[BUF_LEN]; - static UChar buf_srcloc[BUF_LEN]; - static UChar buf_dirname[BUF_LEN]; - Bool know_dirinfo = False; - Bool know_fnname = VG_(get_fnname) (eip, buf_fn, BUF_LEN); - Bool know_objname = VG_(get_objname)(eip, buf_obj, BUF_LEN); - Bool know_srcloc = VG_(get_filename_linenum)( - eip, - buf_srcloc, BUF_LEN, - buf_dirname, BUF_LEN, &know_dirinfo, - &lineno - ); - if (VG_(clo_xml)) { - - Bool human_readable = True; - HChar* maybe_newline = human_readable ? "\n " : ""; - HChar* maybe_newline2 = human_readable ? "\n " : ""; - - /* Print in XML format, dumping in as much info as we know. */ - APPEND(""); - VG_(sprintf)(ibuf,"0x%llx", (ULong)eip); - APPEND(maybe_newline); - APPEND(ibuf); - if (know_objname) { - APPEND(maybe_newline); - APPEND(""); - APPEND_ESC(buf_obj); - APPEND(""); - } - if (know_fnname) { - APPEND(maybe_newline); - APPEND(""); - APPEND_ESC(buf_fn); - APPEND(""); - } - if (know_srcloc) { - if (know_dirinfo) { - APPEND(maybe_newline); - APPEND(""); - APPEND(buf_dirname); - APPEND(""); - } - APPEND(maybe_newline); - APPEND(""); - APPEND_ESC(buf_srcloc); - APPEND(""); - APPEND(maybe_newline); - APPEND(""); - VG_(sprintf)(ibuf,"%d",lineno); - APPEND(ibuf); - APPEND(""); - } - APPEND(maybe_newline2); - APPEND(""); - - } else { - - /* Print for humans to read */ - VG_(sprintf)(ibuf,"0x%llx: ", (ULong)eip); - APPEND(ibuf); - if (know_fnname) { - APPEND(buf_fn); - if (!know_srcloc && know_objname) { - APPEND(" (in "); - APPEND(buf_obj); - APPEND(")"); - } - } else if (know_objname && !know_srcloc) { - APPEND("(within "); - APPEND(buf_obj); - APPEND(")"); - } else { - APPEND("???"); - } - if (know_srcloc) { - APPEND(" ("); - APPEND(buf_srcloc); - APPEND(":"); - VG_(sprintf)(ibuf,"%d",lineno); - APPEND(ibuf); - APPEND(")"); - } - - } - return buf; - -# undef APPEND -# undef APPEND_ESC -# undef BUF_LEN -} - -/* Returns True if OK. If not OK, *{ip,sp,fp}P are not changed. */ -/* NOTE: this function may rearrange the order of entries in the - SegInfo list. */ -Bool VG_(use_CFI_info) ( /*MOD*/Addr* ipP, - /*MOD*/Addr* spP, - /*MOD*/Addr* fpP, - Addr min_accessible, - Addr max_accessible ) -{ - Int i; - SegInfo* si; - CfiSI* cfisi = NULL; - Addr cfa, ipHere, spHere, fpHere, ipPrev, spPrev, fpPrev; - - static UInt n_search = 0; - static UInt n_steps = 0; - n_search++; - - if (0) VG_(printf)("search for %p\n", *ipP); - - for (si = segInfo_list; si != NULL; si = si->next) { - n_steps++; - - /* Use the per-SegInfo summary address ranges to skip - inapplicable SegInfos quickly. */ - if (si->cfisi_used == 0) - continue; - if (*ipP < si->cfisi_minaddr || *ipP > si->cfisi_maxaddr) - continue; - - i = search_one_cfitab( si, *ipP ); - if (i != -1) { - vg_assert(i >= 0 && i < si->cfisi_used); - cfisi = &si->cfisi[i]; - break; - } - } - - if (cfisi == NULL) - return False; - - if (0 && ((n_search & 0xFFFFF) == 0)) - VG_(printf)("%u %u\n", n_search, n_steps); - - /* Start of performance-enhancing hack: once every 16 (chosen - hackily after profiling) successful searchs, move the found - SegInfo one step closer to the start of the list. This makes - future searches cheaper. For starting konqueror on amd64, this - in fact reduces the total amount of searching done by the above - find-the-right-SegInfo loop by more than a factor of 20. */ - if ((n_search & 0xF) == 0) { - /* Move si one step closer to the start of the list. */ - SegInfo* si0 = segInfo_list; - SegInfo* si1 = NULL; - SegInfo* si2 = NULL; - SegInfo* tmp; - while (True) { - if (si0 == NULL) break; - if (si0 == si) break; - si2 = si1; - si1 = si0; - si0 = si0->next; - } - if (si0 == si && si0 != NULL && si1 != NULL && si2 != NULL) { - /* si0 points to si, si1 to its predecessor, and si2 to si1's - predecessor. Swap si0 and si1, that is, move si0 one step - closer to the start of the list. */ - tmp = si0->next; - si2->next = si0; - si0->next = si1; - si1->next = tmp; - } - } - /* End of performance-enhancing hack. */ - - if (0) { - VG_(printf)("found cfisi: "); - ML_(ppCfiSI)(cfisi); - } - - ipPrev = spPrev = fpPrev = 0; - - ipHere = *ipP; - spHere = *spP; - fpHere = *fpP; - - cfa = cfisi->cfa_off + (cfisi->cfa_sprel ? spHere : fpHere); - -# define COMPUTE(_prev, _here, _how, _off) \ - do { \ - switch (_how) { \ - case CFIR_UNKNOWN: \ - return False; \ - case CFIR_SAME: \ - _prev = _here; break; \ - case CFIR_MEMCFAREL: { \ - Addr a = cfa + (Word)_off; \ - if (a < min_accessible \ - || a+sizeof(Addr) > max_accessible) \ - return False; \ - _prev = *(Addr*)a; \ - break; \ - } \ - case CFIR_CFAREL: \ - _prev = cfa + (Word)_off; \ - break; \ - } \ - } while (0) - - COMPUTE(ipPrev, ipHere, cfisi->ra_how, cfisi->ra_off); - COMPUTE(spPrev, spHere, cfisi->sp_how, cfisi->sp_off); - COMPUTE(fpPrev, fpHere, cfisi->fp_how, cfisi->fp_off); - -# undef COMPUTE - - *ipP = ipPrev; - *spP = spPrev; - *fpP = fpPrev; - return True; -} - - -/*------------------------------------------------------------*/ -/*--- SegInfo accessor functions ---*/ -/*------------------------------------------------------------*/ - -const SegInfo* VG_(next_seginfo)(const SegInfo* si) -{ - if (si == NULL) - return segInfo_list; - return si->next; -} - -Addr VG_(seginfo_start)(const SegInfo* si) -{ - return si->start; -} - -SizeT VG_(seginfo_size)(const SegInfo* si) -{ - return si->size; -} - -const UChar* VG_(seginfo_soname)(const SegInfo* si) -{ - return si->soname; -} - -const UChar* VG_(seginfo_filename)(const SegInfo* si) -{ - return si->filename; -} - -ULong VG_(seginfo_sym_offset)(const SegInfo* si) -{ - return si->offset; -} - -VgSectKind VG_(seginfo_sect_kind)(Addr a) -{ - SegInfo* si; - VgSectKind ret = Vg_SectUnknown; - - for(si = segInfo_list; si != NULL; si = si->next) { - if (a >= si->start && a < (si->start + si->size)) { - - if (0) - VG_(printf)( - "addr=%p si=%p %s got=%p %d plt=%p %d data=%p %d bss=%p %d\n", - a, si, si->filename, - si->got_start_vma, si->got_size, - si->plt_start_vma, si->plt_size, - si->data_start_vma, si->data_size, - si->bss_start_vma, si->bss_size); - - ret = Vg_SectText; - - if (a >= si->data_start_vma && a < (si->data_start_vma + si->data_size)) - ret = Vg_SectData; - else - if (a >= si->bss_start_vma && a < (si->bss_start_vma + si->bss_size)) - ret = Vg_SectBSS; - else - if (a >= si->plt_start_vma && a < (si->plt_start_vma + si->plt_size)) - ret = Vg_SectPLT; - else - if (a >= si->got_start_vma && a < (si->got_start_vma + si->got_size)) - ret = Vg_SectGOT; - } - } - - return ret; -} - -Int VG_(seginfo_syms_howmany) ( const SegInfo *si ) -{ - return si->symtab_used; -} - -void VG_(seginfo_syms_getidx) ( const SegInfo *si, - Int idx, - /*OUT*/Addr* addr, - /*OUT*/UInt* size, - /*OUT*/HChar** name ) -{ - vg_assert(idx >= 0 && idx < si->symtab_used); - if (addr) *addr = si->symtab[idx].addr; - if (size) *size = si->symtab[idx].size; - if (name) *name = (HChar*)si->symtab[idx].name; -} - - -/*--------------------------------------------------------------------*/ -/*--- end ---*/ -/*--------------------------------------------------------------------*/ diff --git a/coregrind/m_stacktrace.c b/coregrind/m_stacktrace.c index ce0b3facaa..3d307fc45a 100644 --- a/coregrind/m_stacktrace.c +++ b/coregrind/m_stacktrace.c @@ -154,7 +154,7 @@ UInt VG_(get_StackTrace2) ( ThreadId tid_if_known, /* That didn't work out, so see if there is any CFI info to hand which can be used. */ - if ( VG_(use_CFI_info)( &ip, &sp, &fp, fp_min, fp_max ) ) { + if ( VG_(use_CF_info)( &ip, &sp, &fp, fp_min, fp_max ) ) { ips[i++] = ip; if (debug) VG_(printf)(" ipsC[%d]=%08p\n", i-1, ips[i-1]); @@ -200,7 +200,7 @@ UInt VG_(get_StackTrace2) ( ThreadId tid_if_known, /* First off, see if there is any CFI info to hand which can be used. */ - if ( VG_(use_CFI_info)( &ip, &sp, &fp, fp_min, fp_max ) ) { + if ( VG_(use_CF_info)( &ip, &sp, &fp, fp_min, fp_max ) ) { ips[i++] = ip; if (debug) VG_(printf)(" ipsC[%d]=%08p\n", i-1, ips[i-1]); @@ -208,7 +208,7 @@ UInt VG_(get_StackTrace2) ( ThreadId tid_if_known, continue; } - /* If VG_(use_CFI_info) fails, it won't modify ip/sp/fp, so + /* If VG_(use_CF_info) fails, it won't modify ip/sp/fp, so we can safely try the old-fashioned method. */ /* This bit is supposed to deal with frames resulting from functions which begin "pushq %rbp ; movq %rsp, %rbp". diff --git a/coregrind/pub_core_debuginfo.h b/coregrind/pub_core_debuginfo.h index 887aba00c3..ce6e4f2875 100644 --- a/coregrind/pub_core_debuginfo.h +++ b/coregrind/pub_core_debuginfo.h @@ -35,8 +35,6 @@ // PURPOSE: This module deals with reading debug info and symbol tables // to get file and function names, line numbers, variable types, and // to help stack unwinding. -// -// And its internals are currently a mess. Its interface is ugly, too. //-------------------------------------------------------------------- #include "pub_tool_debuginfo.h" @@ -53,16 +51,15 @@ extern void VG_(di_notify_munmap)( Addr a, SizeT len ); extern void VG_(di_notify_mprotect)( Addr a, SizeT len, UInt prot ); -extern SegInfo *VG_(read_seg_symbols) ( Addr addr, SizeT len, - OffT offset, const Char* filename); +extern Bool VG_(get_fnname_nodemangle)( Addr a, + Char* fnname, Int n_fnname ); -extern Bool VG_(get_fnname_nodemangle)( Addr a, Char* fnname, Int n_fnname ); - -extern Bool VG_(use_CFI_info) ( /*MOD*/Addr* ipP, - /*MOD*/Addr* spP, - /*MOD*/Addr* fpP, - Addr min_accessible, - Addr max_accessible ); +/* Use DWARF2/3 CFA information to do one step of stack unwinding. */ +extern Bool VG_(use_CF_info) ( /*MOD*/Addr* ipP, + /*MOD*/Addr* spP, + /*MOD*/Addr* fpP, + Addr min_accessible, + Addr max_accessible ); /* ppc64-linux only: find the TOC pointer (R2 value) that should be in force at the entry point address of the function containing diff --git a/helgrind/hg_main.c b/helgrind/hg_main.c index 7164c7b90c..0c86e62363 100644 --- a/helgrind/hg_main.c +++ b/helgrind/hg_main.c @@ -2521,7 +2521,9 @@ static void record_race_error ( ThreadId tid, Addr a, Bool is_write, err_extra.prevstate = prevstate; if (clo_execontext) err_extra.lasttouched = getExeContext(a); - err_extra.addrinfo.expr = VG_(describe_addr)(tid, a); + /* JRS 4 Apr 06: VG_(describe_addr) disappeared from m_debuginfo, + at least for the time being. */ + err_extra.addrinfo.expr = "???"; /* VG_(describe_addr)(tid, a); */ VG_(maybe_record_error)( tid, RaceErr, a, (is_write ? "writing" : "reading"), diff --git a/include/pub_tool_debuginfo.h b/include/pub_tool_debuginfo.h index f82fbfeece..44ca628cb3 100644 --- a/include/pub_tool_debuginfo.h +++ b/include/pub_tool_debuginfo.h @@ -87,9 +87,6 @@ extern Bool VG_(get_objname) ( Addr a, Char* objname, Int n_objname ); */ extern Char* VG_(describe_IP)(Addr eip, Char* buf, Int n_buf); -/* Returns a string containing an expression for the given - address. String is malloced with VG_(malloc)() */ -Char *VG_(describe_addr)(ThreadId, Addr); /*====================================================================*/ /*=== Obtaining segment information ===*/