]> git.ipfire.org Git - thirdparty/binutils-gdb.git/blame - gold/dwarf_reader.cc
From Craig Silverstein: Track_relocs doesn't need to hold onto the
[thirdparty/binutils-gdb.git] / gold / dwarf_reader.cc
CommitLineData
5c2c6c95
ILT
1// dwarf_reader.cc -- parse dwarf2/3 debug information
2
3// Copyright 2007 Free Software Foundation, Inc.
4// Written by Ian Lance Taylor <iant@google.com>.
5
6// This file is part of gold.
7
8// This program is free software; you can redistribute it and/or modify
9// it under the terms of the GNU General Public License as published by
10// the Free Software Foundation; either version 3 of the License, or
11// (at your option) any later version.
12
13// This program is distributed in the hope that it will be useful,
14// but WITHOUT ANY WARRANTY; without even the implied warranty of
15// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16// GNU General Public License for more details.
17
18// You should have received a copy of the GNU General Public License
19// along with this program; if not, write to the Free Software
20// Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
21// MA 02110-1301, USA.
22
23#include "gold.h"
24
25#include "elfcpp_swap.h"
26#include "dwarf.h"
24badc65 27#include "object.h"
4c50553d 28#include "reloc.h"
5c2c6c95
ILT
29#include "dwarf_reader.h"
30
31namespace {
32
33// Read an unsigned LEB128 number. Each byte contains 7 bits of
34// information, plus one bit saying whether the number continues or
35// not.
36
37uint64_t
38read_unsigned_LEB_128(const unsigned char* buffer, size_t* len)
39{
40 uint64_t result = 0;
41 size_t num_read = 0;
42 unsigned int shift = 0;
43 unsigned char byte;
44
45 do
46 {
47 byte = *buffer++;
48 num_read++;
49 result |= (static_cast<uint64_t>(byte & 0x7f)) << shift;
50 shift += 7;
51 }
52 while (byte & 0x80);
53
54 *len = num_read;
55
56 return result;
57}
58
59// Read a signed LEB128 number. These are like regular LEB128
60// numbers, except the last byte may have a sign bit set.
61
62int64_t
63read_signed_LEB_128(const unsigned char* buffer, size_t* len)
64{
65 int64_t result = 0;
66 int shift = 0;
67 size_t num_read = 0;
68 unsigned char byte;
69
70 do
71 {
72 byte = *buffer++;
73 num_read++;
74 result |= (static_cast<uint64_t>(byte & 0x7f) << shift);
75 shift += 7;
76 }
77 while (byte & 0x80);
78
79 if ((shift < 8 * static_cast<int>(sizeof(result))) && (byte & 0x40))
80 result |= -((static_cast<int64_t>(1)) << shift);
81 *len = num_read;
82 return result;
83}
84
85} // End anonymous namespace.
86
87
88namespace gold {
89
90// This is the format of a DWARF2/3 line state machine that we process
91// opcodes using. There is no need for anything outside the lineinfo
92// processor to know how this works.
93
94struct LineStateMachine
95{
96 int file_num;
97 uint64_t address;
98 int line_num;
99 int column_num;
100 unsigned int shndx; // the section address refers to
101 bool is_stmt; // stmt means statement.
102 bool basic_block;
103 bool end_sequence;
104};
105
106static void
107ResetLineStateMachine(struct LineStateMachine* lsm, bool default_is_stmt)
108{
109 lsm->file_num = 1;
110 lsm->address = 0;
111 lsm->line_num = 1;
112 lsm->column_num = 0;
338f2eba 113 lsm->shndx = -1U;
5c2c6c95
ILT
114 lsm->is_stmt = default_is_stmt;
115 lsm->basic_block = false;
116 lsm->end_sequence = false;
117}
118
24badc65
ILT
119template<int size, bool big_endian>
120Dwarf_line_info<size, big_endian>::Dwarf_line_info(
121 Sized_relobj<size, big_endian>* object)
122 : data_valid_(true), buffer_(NULL), symtab_buffer_(NULL),
123 directories_(1), files_(1)
124{
125 unsigned int debug_shndx;
126 for (debug_shndx = 0; debug_shndx < object->shnum(); ++debug_shndx)
127 if (object->section_name(debug_shndx) == ".debug_line")
128 {
129 off_t buffer_size;
130 this->buffer_ = object->section_contents(
131 debug_shndx, &buffer_size, false);
132 this->buffer_end_ = this->buffer_ + buffer_size;
133 break;
134 }
135 if (this->buffer_ == NULL)
136 {
137 this->data_valid_ = false;
138 return;
139 }
140
141 // Find the relocation section for ".debug_line".
142 bool got_relocs = false;
143 for (unsigned int reloc_shndx = 0;
144 reloc_shndx < object->shnum();
145 ++reloc_shndx)
146 {
147 unsigned int reloc_sh_type = object->section_type(reloc_shndx);
148 if ((reloc_sh_type == elfcpp::SHT_REL
149 || reloc_sh_type == elfcpp::SHT_RELA)
150 && object->section_info(reloc_shndx) == debug_shndx)
151 {
152 got_relocs = this->track_relocs_.initialize(object, reloc_shndx,
153 reloc_sh_type);
154 break;
155 }
156 }
157 if (!got_relocs)
158 {
159 this->data_valid_ = false;
160 return;
161 }
162
163 // Finally, we need the symtab section to interpret the relocs.
164 unsigned int symtab_shndx;
165 for (symtab_shndx = 0; symtab_shndx < object->shnum(); ++symtab_shndx)
166 if (object->section_type(symtab_shndx) == elfcpp::SHT_SYMTAB)
167 {
168 off_t symtab_size;
169 this->symtab_buffer_ = object->section_contents(
170 symtab_shndx, &symtab_size, false);
171 this->symtab_buffer_end_ = this->symtab_buffer_ + symtab_size;
172 break;
173 }
174 if (this->symtab_buffer_ == NULL)
175 {
176 this->data_valid_ = false;
177 return;
178 }
179
180 // Now that we have successfully read all the data, parse the debug
181 // info.
182 this->read_line_mappings();
183}
184
5c2c6c95
ILT
185// Read the DWARF header.
186
187template<int size, bool big_endian>
188const unsigned char*
e43872e9
ILT
189Dwarf_line_info<size, big_endian>::read_header_prolog(
190 const unsigned char* lineptr)
5c2c6c95
ILT
191{
192 uint32_t initial_length = elfcpp::Swap<32, big_endian>::readval(lineptr);
193 lineptr += 4;
194
195 // In DWARF2/3, if the initial length is all 1 bits, then the offset
196 // size is 8 and we need to read the next 8 bytes for the real length.
197 if (initial_length == 0xffffffff)
198 {
199 header_.offset_size = 8;
200 initial_length = elfcpp::Swap<64, big_endian>::readval(lineptr);
201 lineptr += 8;
202 }
203 else
204 header_.offset_size = 4;
205
206 header_.total_length = initial_length;
207
208 gold_assert(lineptr + header_.total_length <= buffer_end_);
209
210 header_.version = elfcpp::Swap<16, big_endian>::readval(lineptr);
211 lineptr += 2;
212
213 if (header_.offset_size == 4)
214 header_.prologue_length = elfcpp::Swap<32, big_endian>::readval(lineptr);
215 else
216 header_.prologue_length = elfcpp::Swap<64, big_endian>::readval(lineptr);
217 lineptr += header_.offset_size;
218
219 header_.min_insn_length = *lineptr;
220 lineptr += 1;
221
222 header_.default_is_stmt = *lineptr;
223 lineptr += 1;
224
225 header_.line_base = *reinterpret_cast<const signed char*>(lineptr);
226 lineptr += 1;
227
228 header_.line_range = *lineptr;
229 lineptr += 1;
230
231 header_.opcode_base = *lineptr;
232 lineptr += 1;
233
234 header_.std_opcode_lengths.reserve(header_.opcode_base + 1);
235 header_.std_opcode_lengths[0] = 0;
236 for (int i = 1; i < header_.opcode_base; i++)
237 {
238 header_.std_opcode_lengths[i] = *lineptr;
239 lineptr += 1;
240 }
241
242 return lineptr;
243}
244
245// The header for a debug_line section is mildly complicated, because
246// the line info is very tightly encoded.
247
e43872e9 248template<int size, bool big_endian>
5c2c6c95 249const unsigned char*
e43872e9
ILT
250Dwarf_line_info<size, big_endian>::read_header_tables(
251 const unsigned char* lineptr)
5c2c6c95
ILT
252{
253 // It is legal for the directory entry table to be empty.
254 if (*lineptr)
255 {
256 int dirindex = 1;
257 while (*lineptr)
258 {
259 const unsigned char* dirname = lineptr;
260 gold_assert(dirindex == static_cast<int>(directories_.size()));
261 directories_.push_back(reinterpret_cast<const char*>(dirname));
262 lineptr += directories_.back().size() + 1;
263 dirindex++;
264 }
265 }
266 lineptr++;
267
268 // It is also legal for the file entry table to be empty.
269 if (*lineptr)
270 {
271 int fileindex = 1;
272 size_t len;
273 while (*lineptr)
274 {
275 const char* filename = reinterpret_cast<const char*>(lineptr);
276 lineptr += strlen(filename) + 1;
277
278 uint64_t dirindex = read_unsigned_LEB_128(lineptr, &len);
279 if (dirindex >= directories_.size())
280 dirindex = 0;
281 lineptr += len;
282
283 read_unsigned_LEB_128(lineptr, &len); // mod_time
284 lineptr += len;
285
286 read_unsigned_LEB_128(lineptr, &len); // filelength
287 lineptr += len;
288
289 gold_assert(fileindex == static_cast<int>(files_.size()));
290 files_.push_back(std::pair<int, std::string>(dirindex, filename));
291 fileindex++;
292 }
293 }
294 lineptr++;
295
296 return lineptr;
297}
298
299// Process a single opcode in the .debug.line structure.
300
301// Templating on size and big_endian would yield more efficient (and
302// simpler) code, but would bloat the binary. Speed isn't important
303// here.
304
e43872e9 305template<int size, bool big_endian>
5c2c6c95 306bool
e43872e9
ILT
307Dwarf_line_info<size, big_endian>::process_one_opcode(
308 const unsigned char* start, struct LineStateMachine* lsm, size_t* len)
5c2c6c95
ILT
309{
310 size_t oplen = 0;
311 size_t templen;
312 unsigned char opcode = *start;
313 oplen++;
314 start++;
315
316 // If the opcode is great than the opcode_base, it is a special
317 // opcode. Most line programs consist mainly of special opcodes.
318 if (opcode >= header_.opcode_base)
319 {
320 opcode -= header_.opcode_base;
321 const int advance_address = ((opcode / header_.line_range)
322 * header_.min_insn_length);
323 lsm->address += advance_address;
324
325 const int advance_line = ((opcode % header_.line_range)
326 + header_.line_base);
327 lsm->line_num += advance_line;
328 lsm->basic_block = true;
329 *len = oplen;
330 return true;
331 }
332
333 // Otherwise, we have the regular opcodes
334 switch (opcode)
335 {
336 case elfcpp::DW_LNS_copy:
337 lsm->basic_block = false;
338 *len = oplen;
339 return true;
340
341 case elfcpp::DW_LNS_advance_pc:
342 {
343 const uint64_t advance_address
344 = read_unsigned_LEB_128(start, &templen);
345 oplen += templen;
346 lsm->address += header_.min_insn_length * advance_address;
347 }
348 break;
349
350 case elfcpp::DW_LNS_advance_line:
351 {
352 const uint64_t advance_line = read_signed_LEB_128(start, &templen);
353 oplen += templen;
354 lsm->line_num += advance_line;
355 }
356 break;
357
358 case elfcpp::DW_LNS_set_file:
359 {
360 const uint64_t fileno = read_unsigned_LEB_128(start, &templen);
361 oplen += templen;
362 lsm->file_num = fileno;
363 }
364 break;
365
366 case elfcpp::DW_LNS_set_column:
367 {
368 const uint64_t colno = read_unsigned_LEB_128(start, &templen);
369 oplen += templen;
370 lsm->column_num = colno;
371 }
372 break;
373
374 case elfcpp::DW_LNS_negate_stmt:
375 lsm->is_stmt = !lsm->is_stmt;
376 break;
377
378 case elfcpp::DW_LNS_set_basic_block:
379 lsm->basic_block = true;
380 break;
381
382 case elfcpp::DW_LNS_fixed_advance_pc:
383 {
384 int advance_address;
e43872e9 385 advance_address = elfcpp::Swap<16, big_endian>::readval(start);
5c2c6c95
ILT
386 oplen += 2;
387 lsm->address += advance_address;
388 }
389 break;
390
391 case elfcpp::DW_LNS_const_add_pc:
392 {
393 const int advance_address = (header_.min_insn_length
394 * ((255 - header_.opcode_base)
395 / header_.line_range));
396 lsm->address += advance_address;
397 }
398 break;
399
400 case elfcpp::DW_LNS_extended_op:
401 {
402 const uint64_t extended_op_len
403 = read_unsigned_LEB_128(start, &templen);
404 start += templen;
405 oplen += templen + extended_op_len;
406
407 const unsigned char extended_op = *start;
408 start++;
409
410 switch (extended_op)
411 {
412 case elfcpp::DW_LNE_end_sequence:
413 lsm->end_sequence = true;
414 *len = oplen;
415 return true;
416
417 case elfcpp::DW_LNE_set_address:
4c50553d
ILT
418 {
419 typename Reloc_map::const_iterator it
420 = reloc_map_.find(start - this->buffer_);
421 if (it != reloc_map_.end())
422 {
423 // value + addend.
424 lsm->address =
425 (elfcpp::Swap<size, big_endian>::readval(start)
426 + it->second.second);
427 lsm->shndx = it->second.first;
428 }
429 else
430 {
431 // Every set_address should have an associated
432 // relocation.
433 this->data_valid_ = false;
434 }
435 break;
24badc65 436 }
5c2c6c95
ILT
437 case elfcpp::DW_LNE_define_file:
438 {
439 const char* filename = reinterpret_cast<const char*>(start);
440 templen = strlen(filename) + 1;
441 start += templen;
442
443 uint64_t dirindex = read_unsigned_LEB_128(start, &templen);
444 if (dirindex >= directories_.size())
445 dirindex = 0;
446 oplen += templen;
447
448 read_unsigned_LEB_128(start, &templen); // mod_time
449 oplen += templen;
450
451 read_unsigned_LEB_128(start, &templen); // filelength
452 oplen += templen;
453
454 files_.push_back(std::pair<int, std::string>(dirindex,
455 filename));
456 }
457 break;
458 }
459 }
460 break;
461
462 default:
463 {
464 // Ignore unknown opcode silently
465 for (int i = 0; i < header_.std_opcode_lengths[opcode]; i++)
466 {
467 size_t templen;
468 read_unsigned_LEB_128(start, &templen);
469 start += templen;
470 oplen += templen;
471 }
472 }
473 break;
474 }
475 *len = oplen;
476 return false;
477}
478
479// Read the debug information at LINEPTR and store it in the line
480// number map.
481
e43872e9 482template<int size, bool big_endian>
5c2c6c95 483unsigned const char*
e43872e9 484Dwarf_line_info<size, big_endian>::read_lines(unsigned const char* lineptr)
5c2c6c95
ILT
485{
486 struct LineStateMachine lsm;
487
488 // LENGTHSTART is the place the length field is based on. It is the
489 // point in the header after the initial length field.
490 const unsigned char* lengthstart = buffer_;
491
492 // In 64 bit dwarf, the initial length is 12 bytes, because of the
493 // 0xffffffff at the start.
494 if (header_.offset_size == 8)
495 lengthstart += 12;
496 else
497 lengthstart += 4;
498
499 while (lineptr < lengthstart + header_.total_length)
500 {
501 ResetLineStateMachine(&lsm, header_.default_is_stmt);
502 while (!lsm.end_sequence)
503 {
504 size_t oplength;
e43872e9 505 bool add_line = this->process_one_opcode(lineptr, &lsm, &oplength);
5c2c6c95
ILT
506 if (add_line)
507 {
508 Offset_to_lineno_entry entry
509 = { lsm.address, lsm.file_num, lsm.line_num };
510 line_number_map_[lsm.shndx].push_back(entry);
511 }
512 lineptr += oplength;
513 }
514 }
515
516 return lengthstart + header_.total_length;
517}
518
4c50553d
ILT
519// Looks in the symtab to see what section a symbol is in.
520
521template<int size, bool big_endian>
522unsigned int
523Dwarf_line_info<size, big_endian>::symbol_section(
524 unsigned int sym,
525 typename elfcpp::Elf_types<size>::Elf_Addr* value)
526{
527 const int symsize = elfcpp::Elf_sizes<size>::sym_size;
528 gold_assert(this->symtab_buffer_ + sym * symsize < this->symtab_buffer_end_);
529 elfcpp::Sym<size, big_endian> elfsym(this->symtab_buffer_ + sym * symsize);
530 *value = elfsym.get_st_value();
531 return elfsym.get_st_shndx();
532}
533
534// Read the relocations into a Reloc_map.
535
536template<int size, bool big_endian>
537void
538Dwarf_line_info<size, big_endian>::read_relocs()
539{
540 if (this->symtab_buffer_ == NULL)
541 return;
542
543 typename elfcpp::Elf_types<size>::Elf_Addr value;
544 off_t reloc_offset;
24badc65 545 while ((reloc_offset = this->track_relocs_.next_offset()) != -1)
4c50553d 546 {
24badc65 547 const unsigned int sym = this->track_relocs_.next_symndx();
4c50553d
ILT
548 const unsigned int shndx = this->symbol_section(sym, &value);
549 this->reloc_map_[reloc_offset] = std::make_pair(shndx, value);
24badc65 550 this->track_relocs_.advance(reloc_offset + 1);
4c50553d
ILT
551 }
552}
553
554// Read the line number info.
555
e43872e9 556template<int size, bool big_endian>
5c2c6c95 557void
e43872e9 558Dwarf_line_info<size, big_endian>::read_line_mappings()
5c2c6c95 559{
24badc65
ILT
560 if (this->data_valid_ == false)
561 return;
562
4c50553d
ILT
563 read_relocs();
564 while (this->buffer_ < this->buffer_end_)
e43872e9 565 {
4c50553d 566 const unsigned char* lineptr = this->buffer_;
e43872e9
ILT
567 lineptr = this->read_header_prolog(lineptr);
568 lineptr = this->read_header_tables(lineptr);
569 lineptr = this->read_lines(lineptr);
4c50553d 570 this->buffer_ = lineptr;
e43872e9
ILT
571 }
572
573 // Sort the lines numbers, so addr2line can use binary search.
574 for (typename Lineno_map::iterator it = line_number_map_.begin();
5c2c6c95
ILT
575 it != line_number_map_.end();
576 ++it)
577 // Each vector needs to be sorted by offset.
4c50553d 578 std::sort(it->second.begin(), it->second.end());
5c2c6c95
ILT
579}
580
581// Return a string for a file name and line number.
582
e43872e9 583template<int size, bool big_endian>
5c2c6c95 584std::string
e43872e9 585Dwarf_line_info<size, big_endian>::addr2line(unsigned int shndx, off_t offset)
5c2c6c95 586{
4c50553d
ILT
587 if (this->data_valid_ == false)
588 return "";
589
5c2c6c95 590 const Offset_to_lineno_entry lookup_key = { offset, 0, 0 };
4c50553d
ILT
591 std::vector<Offset_to_lineno_entry>& offsets = this->line_number_map_[shndx];
592 if (offsets.empty())
593 return "";
594
e43872e9 595 typename std::vector<Offset_to_lineno_entry>::const_iterator it
5c2c6c95
ILT
596 = std::lower_bound(offsets.begin(), offsets.end(), lookup_key);
597
598 // If we found an exact match, great, otherwise find the last entry
599 // before the passed-in offset.
600 if (it->offset > offset)
601 {
602 if (it == offsets.begin())
603 return "";
604 --it;
605 gold_assert(it->offset < offset);
606 }
607
608 // Convert the file_num + line_num into a string.
609 std::string ret;
610 gold_assert(it->file_num < static_cast<int>(files_.size()));
611 const std::pair<int, std::string>& filename_pair = files_[it->file_num];
612 gold_assert(filename_pair.first < static_cast<int>(directories_.size()));
613 const std::string& dirname = directories_[filename_pair.first];
614 const std::string& filename = filename_pair.second;
615 if (!dirname.empty())
616 {
617 ret += dirname;
618 ret += "/";
619 }
620 ret += filename;
621 if (ret.empty())
622 ret = "(unknown)";
623
624 char buffer[64]; // enough to hold a line number
625 snprintf(buffer, sizeof(buffer), "%d", it->line_num);
626 ret += ":";
627 ret += buffer;
628
629 return ret;
630}
631
632#ifdef HAVE_TARGET_32_LITTLE
633template
e43872e9 634class Dwarf_line_info<32, false>;
5c2c6c95
ILT
635#endif
636
637#ifdef HAVE_TARGET_32_BIG
638template
e43872e9 639class Dwarf_line_info<32, true>;
5c2c6c95
ILT
640#endif
641
642#ifdef HAVE_TARGET_64_LITTLE
643template
e43872e9 644class Dwarf_line_info<64, false>;
5c2c6c95
ILT
645#endif
646
647#ifdef HAVE_TARGET_64_BIG
648template
e43872e9 649class Dwarf_line_info<64, true>;
5c2c6c95
ILT
650#endif
651
652} // End namespace gold.