]> git.ipfire.org Git - thirdparty/glibc.git/blame - scripts/glibcelf.py
Fix all the remaining misspellings -- BZ 25337
[thirdparty/glibc.git] / scripts / glibcelf.py
CommitLineData
30035d67
FW
1#!/usr/bin/python3
2# ELF support functionality for Python.
6d7e8eda 3# Copyright (C) 2022-2023 Free Software Foundation, Inc.
30035d67
FW
4# This file is part of the GNU C Library.
5#
6# The GNU C Library is free software; you can redistribute it and/or
7# modify it under the terms of the GNU Lesser General Public
8# License as published by the Free Software Foundation; either
9# version 2.1 of the License, or (at your option) any later version.
10#
11# The GNU C Library is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14# Lesser General Public License for more details.
15#
16# You should have received a copy of the GNU Lesser General Public
17# License along with the GNU C Library; if not, see
18# <https://www.gnu.org/licenses/>.
19
20"""Basic ELF parser.
21
22Use Image.readfile(path) to read an ELF file into memory and begin
23parsing it.
24
25"""
26
27import collections
340097d0
FW
28import functools
29import os
30035d67
FW
30import struct
31
340097d0
FW
32import glibcpp
33
34class _MetaNamedValue(type):
35 """Used to set up _NamedValue subclasses."""
b571f3ad 36
30035d67 37 @classmethod
340097d0
FW
38 def __prepare__(metacls, cls, bases, **kwds):
39 # Indicates an int-based class. Needed for types like Shn.
40 int_based = False
41 for base in bases:
42 if issubclass(base, int):
43 int_based = int
44 break
45 return dict(by_value={},
46 by_name={},
47 prefix=None,
48 _int_based=int_based)
49
50 def __contains__(self, other):
51 return other in self.by_value
52
53class _NamedValue(metaclass=_MetaNamedValue):
54 """Typed, named integer constants.
55
56 Constants have the following instance attributes:
57
58 name: The full name of the constant (e.g., "PT_NULL").
59 short_name: The name with of the constant without the prefix ("NULL").
60 value: The integer value of the constant.
61
62 The following class attributes are available:
63
64 by_value: A dict mapping integers to constants.
65 by_name: A dict mapping strings to constants.
66 prefix: A string that is removed from the start of short names, or None.
67
68 """
69
70 def __new__(cls, arg0, arg1=None):
71 """Instance creation.
72
73 For the one-argument form, the argument must be a string, an
74 int, or an instance of this class. Strings are looked up via
75 by_name. Values are looked up via by_value; if value lookup
76 fails, a new unnamed instance is returned. Instances of this
77 class a re returned as-is.
78
79 The two-argument form expects the name (a string) and the
80 value (an integer). A new instance is created in this case.
81 The instance is not registered in the by_value/by_name
82 dictionaries (but the caller can do that).
83
84 """
85
86 typ0 = type(arg0)
87 if arg1 is None:
88 if isinstance(typ0, cls):
89 # Re-use the existing object.
90 return arg0
91 if typ0 is int:
92 by_value = cls.by_value
93 try:
94 return by_value[arg0]
95 except KeyError:
96 # Create a new object of the requested value.
97 if cls._int_based:
98 result = int.__new__(cls, arg0)
99 else:
100 result = object.__new__(cls)
101 result.value = arg0
102 result.name = None
103 return result
104 if typ0 is str:
105 by_name = cls.by_name
106 try:
107 return by_name[arg0]
108 except KeyError:
109 raise ValueError('unknown {} constant: {!r}'.format(
110 cls.__name__, arg0))
111 else:
112 # Types for the two-argument form are rigid.
113 if typ0 is not str and typ0 is not None:
114 raise ValueError('type {} of name {!r} should be str'.format(
115 typ0.__name__, arg0))
116 if type(arg1) is not int:
117 raise ValueError('type {} of value {!r} should be int'.format(
118 type(arg1).__name__, arg1))
119 # Create a new named constants.
120 if cls._int_based:
121 result = int.__new__(cls, arg1)
122 else:
123 result = object.__new__(cls)
124 result.value = arg1
125 result.name = arg0
126 # Set up the short_name attribute.
127 prefix = cls.prefix
128 if prefix and arg0.startswith(prefix):
129 result.short_name = arg0[len(prefix):]
130 else:
131 result.short_name = arg0
132 return result
30035d67 133
340097d0
FW
134 def __str__(self):
135 name = self.name
136 if name:
30035d67 137 return name
340097d0
FW
138 else:
139 return str(self.value)
30035d67 140
340097d0
FW
141 def __repr__(self):
142 name = self.name
143 if name:
30035d67 144 return name
340097d0
FW
145 else:
146 return '{}({})'.format(self.__class__.__name__, self.value)
30035d67 147
340097d0
FW
148 def __setattr__(self, name, value):
149 # Prevent modification of the critical attributes once they
150 # have been set.
151 if name in ('name', 'value', 'short_name') and hasattr(self, name):
152 raise AttributeError('can\'t set attribute {}'.format(name))
153 object.__setattr__(self, name, value)
154
155@functools.total_ordering
156class _TypedConstant(_NamedValue):
157 """Base class for integer-valued optionally named constants.
158
159 This type is not an integer type.
160
161 """
162
163 def __eq__(self, other):
164 return isinstance(other, self.__class__) and self.value == other.value
165
166 def __lt__(self, other):
167 return isinstance(other, self.__class__) and self.value <= other.value
168
169 def __hash__(self):
170 return hash(self.value)
171
172class _IntConstant(_NamedValue, int):
173 """Base class for integer-like optionally named constants.
174
175 Instances compare equal to the integer of the same value, and can
176 be used in integer arithmetic.
177
178 """
179
180 pass
181
182class _FlagConstant(_TypedConstant, int):
183 pass
184
185def _parse_elf_h():
186 """Read ../elf/elf.h and return a dict with the constants in it."""
187
188 path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
189 '..', 'elf', 'elf.h')
190 class TokenizerReporter:
191 """Report tokenizer errors to standard output."""
192
193 def __init__(self):
194 self.errors = 0
195
196 def error(self, token, message):
197 self.errors += 1
198 print('{}:{}:{}: error: {}'.format(
199 path, token.line, token.column, message))
200
201 reporter = TokenizerReporter()
202 with open(path) as inp:
203 tokens = glibcpp.tokenize_c(inp.read(), reporter)
204 if reporter.errors:
205 raise IOError('parse error in elf.h')
206
207 class MacroReporter:
208 """Report macro errors to standard output."""
209
210 def __init__(self):
211 self.errors = 0
212
213 def error(self, line, message):
d33705c0 214 self.errors += 1
340097d0
FW
215 print('{}:{}: error: {}'.format(path, line, message))
216
217 def note(self, line, message):
218 print('{}:{}: note: {}'.format(path, line, message))
219
220 reporter = MacroReporter()
221 result = glibcpp.macro_eval(glibcpp.macro_definitions(tokens), reporter)
222 if reporter.errors:
223 raise IOError('parse error in elf.h')
224
225 return result
226_elf_h = _parse_elf_h()
227del _parse_elf_h
228_elf_h_processed = set()
229
230def _register_elf_h(cls, prefix=None, skip=(), ranges=False, parent=None):
231 prefix = prefix or cls.prefix
232 if not prefix:
233 raise ValueError('missing prefix for {}'.format(cls.__name__))
234 by_value = cls.by_value
235 by_name = cls.by_name
236 processed = _elf_h_processed
237
238 skip = set(skip)
239 skip.add(prefix + 'NUM')
240 if ranges:
241 skip.add(prefix + 'LOOS')
242 skip.add(prefix + 'HIOS')
243 skip.add(prefix + 'LOPROC')
244 skip.add(prefix + 'HIPROC')
245 cls.os_range = (_elf_h[prefix + 'LOOS'], _elf_h[prefix + 'HIOS'])
246 cls.proc_range = (_elf_h[prefix + 'LOPROC'], _elf_h[prefix + 'HIPROC'])
247
248 # Inherit the prefix from the parent if not set.
249 if parent and cls.prefix is None and parent.prefix is not None:
250 cls.prefix = parent.prefix
251
252 processed_len_start = len(processed)
253 for name, value in _elf_h.items():
254 if name in skip or name in processed:
255 continue
256 if name.startswith(prefix):
257 processed.add(name)
258 if value in by_value:
259 raise ValueError('duplicate value {}: {}, {}'.format(
260 value, name, by_value[value]))
261 obj = cls(name, value)
262 by_value[value] = obj
263 by_name[name] = obj
264 setattr(cls, name, obj)
265 if parent:
266 # Make the symbolic name available through the parent as well.
267 parent.by_name[name] = obj
268 setattr(parent, name, obj)
269
270 if len(processed) == processed_len_start:
271 raise ValueError('nothing matched prefix {!r}'.format(prefix))
272
273class ElfClass(_TypedConstant):
30035d67 274 """ELF word size. Type of EI_CLASS values."""
340097d0 275_register_elf_h(ElfClass, prefix='ELFCLASS')
30035d67 276
340097d0 277class ElfData(_TypedConstant):
7f0d9e61 278 """ELF endianness. Type of EI_DATA values."""
340097d0 279_register_elf_h(ElfData, prefix='ELFDATA')
30035d67 280
340097d0 281class Machine(_TypedConstant):
30035d67 282 """ELF machine type. Type of values in Ehdr.e_machine field."""
340097d0
FW
283 prefix = 'EM_'
284_register_elf_h(Machine, skip=('EM_ARC_A5',))
285
286class Et(_TypedConstant):
30035d67 287 """ELF file type. Type of ET_* values and the Ehdr.e_type field."""
340097d0
FW
288 prefix = 'ET_'
289_register_elf_h(Et, ranges=True)
30035d67 290
340097d0 291class Shn(_IntConstant):
30035d67 292 """ELF reserved section indices."""
340097d0
FW
293 prefix = 'SHN_'
294class ShnMIPS(Shn):
30035d67 295 """Supplemental SHN_* constants for EM_MIPS."""
340097d0 296class ShnPARISC(Shn):
30035d67 297 """Supplemental SHN_* constants for EM_PARISC."""
340097d0
FW
298_register_elf_h(ShnMIPS, prefix='SHN_MIPS_', parent=Shn)
299_register_elf_h(ShnPARISC, prefix='SHN_PARISC_', parent=Shn)
300_register_elf_h(Shn, skip='SHN_LORESERVE SHN_HIRESERVE'.split(), ranges=True)
30035d67 301
340097d0 302class Sht(_TypedConstant):
30035d67 303 """ELF section types. Type of SHT_* values."""
340097d0
FW
304 prefix = 'SHT_'
305class ShtALPHA(Sht):
30035d67 306 """Supplemental SHT_* constants for EM_ALPHA."""
6ae0737d
SV
307class ShtARC(Sht):
308 """Supplemental SHT_* constants for EM_ARC."""
340097d0 309class ShtARM(Sht):
30035d67 310 """Supplemental SHT_* constants for EM_ARM."""
340097d0 311class ShtCSKY(Sht):
30035d67 312 """Supplemental SHT_* constants for EM_CSKY."""
340097d0 313class ShtIA_64(Sht):
30035d67 314 """Supplemental SHT_* constants for EM_IA_64."""
340097d0 315class ShtMIPS(Sht):
30035d67 316 """Supplemental SHT_* constants for EM_MIPS."""
340097d0 317class ShtPARISC(Sht):
30035d67 318 """Supplemental SHT_* constants for EM_PARISC."""
340097d0 319class ShtRISCV(Sht):
d055481c 320 """Supplemental SHT_* constants for EM_RISCV."""
340097d0 321_register_elf_h(ShtALPHA, prefix='SHT_ALPHA_', parent=Sht)
6ae0737d 322_register_elf_h(ShtARC, prefix='SHT_ARC_', parent=Sht)
340097d0
FW
323_register_elf_h(ShtARM, prefix='SHT_ARM_', parent=Sht)
324_register_elf_h(ShtCSKY, prefix='SHT_CSKY_', parent=Sht)
325_register_elf_h(ShtIA_64, prefix='SHT_IA_64_', parent=Sht)
326_register_elf_h(ShtMIPS, prefix='SHT_MIPS_', parent=Sht)
327_register_elf_h(ShtPARISC, prefix='SHT_PARISC_', parent=Sht)
328_register_elf_h(ShtRISCV, prefix='SHT_RISCV_', parent=Sht)
329_register_elf_h(Sht, ranges=True,
330 skip='SHT_LOSUNW SHT_HISUNW SHT_LOUSER SHT_HIUSER'.split())
331
332class Pf(_FlagConstant):
30035d67 333 """Program header flags. Type of Phdr.p_flags values."""
340097d0
FW
334 prefix = 'PF_'
335class PfARM(Pf):
30035d67 336 """Supplemental PF_* flags for EM_ARM."""
340097d0
FW
337class PfHP(Pf):
338 """Supplemental PF_* flags for HP-UX."""
339class PfIA_64(Pf):
30035d67 340 """Supplemental PF_* flags for EM_IA_64."""
340097d0 341class PfMIPS(Pf):
30035d67 342 """Supplemental PF_* flags for EM_MIPS."""
340097d0
FW
343class PfPARISC(Pf):
344 """Supplemental PF_* flags for EM_PARISC."""
345_register_elf_h(PfARM, prefix='PF_ARM_', parent=Pf)
346_register_elf_h(PfHP, prefix='PF_HP_', parent=Pf)
347_register_elf_h(PfIA_64, prefix='PF_IA_64_', parent=Pf)
348_register_elf_h(PfMIPS, prefix='PF_MIPS_', parent=Pf)
349_register_elf_h(PfPARISC, prefix='PF_PARISC_', parent=Pf)
350_register_elf_h(Pf, skip='PF_MASKOS PF_MASKPROC'.split())
351
352class Shf(_FlagConstant):
30035d67 353 """Section flags. Type of Shdr.sh_type values."""
340097d0
FW
354 prefix = 'SHF_'
355class ShfALPHA(Shf):
30035d67 356 """Supplemental SHF_* constants for EM_ALPHA."""
340097d0 357class ShfARM(Shf):
30035d67 358 """Supplemental SHF_* constants for EM_ARM."""
340097d0 359class ShfIA_64(Shf):
30035d67 360 """Supplemental SHF_* constants for EM_IA_64."""
340097d0 361class ShfMIPS(Shf):
30035d67 362 """Supplemental SHF_* constants for EM_MIPS."""
340097d0 363class ShfPARISC(Shf):
30035d67 364 """Supplemental SHF_* constants for EM_PARISC."""
340097d0
FW
365_register_elf_h(ShfALPHA, prefix='SHF_ALPHA_', parent=Shf)
366_register_elf_h(ShfARM, prefix='SHF_ARM_', parent=Shf)
367_register_elf_h(ShfIA_64, prefix='SHF_IA_64_', parent=Shf)
368_register_elf_h(ShfMIPS, prefix='SHF_MIPS_', parent=Shf)
369_register_elf_h(ShfPARISC, prefix='SHF_PARISC_', parent=Shf)
370_register_elf_h(Shf, skip='SHF_MASKOS SHF_MASKPROC'.split())
371
372class Stb(_TypedConstant):
30035d67 373 """ELF symbol binding type."""
340097d0
FW
374 prefix = 'STB_'
375_register_elf_h(Stb, ranges=True)
30035d67 376
340097d0 377class Stt(_TypedConstant):
30035d67 378 """ELF symbol type."""
340097d0
FW
379 prefix = 'STT_'
380class SttARM(Sht):
30035d67 381 """Supplemental STT_* constants for EM_ARM."""
340097d0 382class SttPARISC(Sht):
30035d67 383 """Supplemental STT_* constants for EM_PARISC."""
340097d0 384class SttSPARC(Sht):
30035d67
FW
385 """Supplemental STT_* constants for EM_SPARC."""
386 STT_SPARC_REGISTER = 13
340097d0 387class SttX86_64(Sht):
30035d67 388 """Supplemental STT_* constants for EM_X86_64."""
340097d0
FW
389_register_elf_h(SttARM, prefix='STT_ARM_', parent=Stt)
390_register_elf_h(SttPARISC, prefix='STT_PARISC_', parent=Stt)
391_register_elf_h(SttSPARC, prefix='STT_SPARC_', parent=Stt)
392_register_elf_h(Stt, ranges=True)
393
30035d67 394
340097d0 395class Pt(_TypedConstant):
30035d67 396 """ELF program header types. Type of Phdr.p_type."""
340097d0
FW
397 prefix = 'PT_'
398class PtAARCH64(Pt):
85210017 399 """Supplemental PT_* constants for EM_AARCH64."""
340097d0 400class PtARM(Pt):
30035d67 401 """Supplemental PT_* constants for EM_ARM."""
340097d0
FW
402class PtHP(Pt):
403 """Supplemental PT_* constants for HP-U."""
404class PtIA_64(Pt):
30035d67 405 """Supplemental PT_* constants for EM_IA_64."""
340097d0 406class PtMIPS(Pt):
30035d67 407 """Supplemental PT_* constants for EM_MIPS."""
340097d0 408class PtPARISC(Pt):
30035d67 409 """Supplemental PT_* constants for EM_PARISC."""
340097d0 410class PtRISCV(Pt):
d055481c 411 """Supplemental PT_* constants for EM_RISCV."""
340097d0
FW
412_register_elf_h(PtAARCH64, prefix='PT_AARCH64_', parent=Pt)
413_register_elf_h(PtARM, prefix='PT_ARM_', parent=Pt)
414_register_elf_h(PtHP, prefix='PT_HP_', parent=Pt)
415_register_elf_h(PtIA_64, prefix='PT_IA_64_', parent=Pt)
416_register_elf_h(PtMIPS, prefix='PT_MIPS_', parent=Pt)
417_register_elf_h(PtPARISC, prefix='PT_PARISC_', parent=Pt)
418_register_elf_h(PtRISCV, prefix='PT_RISCV_', parent=Pt)
419_register_elf_h(Pt, skip='PT_LOSUNW PT_HISUNW'.split(), ranges=True)
420
421class Dt(_TypedConstant):
30035d67 422 """ELF dynamic segment tags. Type of Dyn.d_val."""
340097d0
FW
423 prefix = 'DT_'
424class DtAARCH64(Dt):
30035d67 425 """Supplemental DT_* constants for EM_AARCH64."""
340097d0 426class DtALPHA(Dt):
30035d67 427 """Supplemental DT_* constants for EM_ALPHA."""
340097d0 428class DtALTERA_NIOS2(Dt):
30035d67 429 """Supplemental DT_* constants for EM_ALTERA_NIOS2."""
340097d0 430class DtIA_64(Dt):
30035d67 431 """Supplemental DT_* constants for EM_IA_64."""
340097d0 432class DtMIPS(Dt):
30035d67 433 """Supplemental DT_* constants for EM_MIPS."""
340097d0 434class DtPPC(Dt):
30035d67 435 """Supplemental DT_* constants for EM_PPC."""
340097d0 436class DtPPC64(Dt):
30035d67 437 """Supplemental DT_* constants for EM_PPC64."""
340097d0 438class DtRISCV(Dt):
d055481c 439 """Supplemental DT_* constants for EM_RISCV."""
340097d0 440class DtSPARC(Dt):
30035d67 441 """Supplemental DT_* constants for EM_SPARC."""
340097d0
FW
442_dt_skip = '''
443DT_ENCODING DT_PROCNUM
444DT_ADDRRNGLO DT_ADDRRNGHI DT_ADDRNUM
445DT_VALRNGLO DT_VALRNGHI DT_VALNUM
446DT_VERSIONTAGNUM DT_EXTRANUM
447DT_AARCH64_NUM
448DT_ALPHA_NUM
449DT_IA_64_NUM
450DT_MIPS_NUM
451DT_PPC_NUM
452DT_PPC64_NUM
453DT_SPARC_NUM
454'''.strip().split()
455_register_elf_h(DtAARCH64, prefix='DT_AARCH64_', skip=_dt_skip, parent=Dt)
456_register_elf_h(DtALPHA, prefix='DT_ALPHA_', skip=_dt_skip, parent=Dt)
457_register_elf_h(DtALTERA_NIOS2, prefix='DT_NIOS2_', skip=_dt_skip, parent=Dt)
458_register_elf_h(DtIA_64, prefix='DT_IA_64_', skip=_dt_skip, parent=Dt)
459_register_elf_h(DtMIPS, prefix='DT_MIPS_', skip=_dt_skip, parent=Dt)
460_register_elf_h(DtPPC, prefix='DT_PPC_', skip=_dt_skip, parent=Dt)
461_register_elf_h(DtPPC64, prefix='DT_PPC64_', skip=_dt_skip, parent=Dt)
462_register_elf_h(DtRISCV, prefix='DT_RISCV_', skip=_dt_skip, parent=Dt)
463_register_elf_h(DtSPARC, prefix='DT_SPARC_', skip=_dt_skip, parent=Dt)
464_register_elf_h(Dt, skip=_dt_skip, ranges=True)
465del _dt_skip
466
467# Constant extraction is complete.
468del _register_elf_h
469del _elf_h
30035d67
FW
470
471class StInfo:
472 """ELF symbol binding and type. Type of the Sym.st_info field."""
473 def __init__(self, arg0, arg1=None):
474 if isinstance(arg0, int) and arg1 is None:
475 self.bind = Stb(arg0 >> 4)
476 self.type = Stt(arg0 & 15)
477 else:
478 self.bind = Stb(arg0)
479 self.type = Stt(arg1)
480
481 def value(self):
482 """Returns the raw value for the bind/type combination."""
483 return (self.bind.value() << 4) | (self.type.value())
484
485# Type in an ELF file. Used for deserialization.
486_Layout = collections.namedtuple('_Layout', 'unpack size')
487
488def _define_layouts(baseclass: type, layout32: str, layout64: str,
489 types=None, fields32=None):
490 """Assign variants dict to baseclass.
491
492 The variants dict is indexed by (ElfClass, ElfData) pairs, and its
493 values are _Layout instances.
494
495 """
496 struct32 = struct.Struct(layout32)
497 struct64 = struct.Struct(layout64)
498
499 # Check that the struct formats yield the right number of components.
500 for s in (struct32, struct64):
501 example = s.unpack(b' ' * s.size)
502 if len(example) != len(baseclass._fields):
503 raise ValueError('{!r} yields wrong field count: {} != {}'.format(
504 s.format, len(example), len(baseclass._fields)))
505
506 # Check that field names in types are correct.
507 if types is None:
508 types = ()
509 for n in types:
510 if n not in baseclass._fields:
511 raise ValueError('{} does not have field {!r}'.format(
512 baseclass.__name__, n))
513
514 if fields32 is not None \
515 and set(fields32) != set(baseclass._fields):
516 raise ValueError('{!r} is not a permutation of the fields {!r}'.format(
517 fields32, baseclass._fields))
518
519 def unique_name(name, used_names = (set((baseclass.__name__,))
520 | set(baseclass._fields)
521 | {n.__name__
522 for n in (types or {}).values()})):
523 """Find a name that is not used for a class or field name."""
524 candidate = name
525 n = 0
526 while candidate in used_names:
527 n += 1
528 candidate = '{}{}'.format(name, n)
529 used_names.add(candidate)
530 return candidate
531
532 blob_name = unique_name('blob')
533 struct_unpack_name = unique_name('struct_unpack')
534 comps_name = unique_name('comps')
535
536 layouts = {}
537 for (bits, elfclass, layout, fields) in (
538 (32, ElfClass.ELFCLASS32, layout32, fields32),
539 (64, ElfClass.ELFCLASS64, layout64, None),
540 ):
541 for (elfdata, structprefix, funcsuffix) in (
542 (ElfData.ELFDATA2LSB, '<', 'LE'),
543 (ElfData.ELFDATA2MSB, '>', 'BE'),
544 ):
545 env = {
546 baseclass.__name__: baseclass,
547 struct_unpack_name: struct.unpack,
548 }
549
550 # Add the type converters.
551 if types:
552 for cls in types.values():
553 env[cls.__name__] = cls
554
555 funcname = ''.join(
556 ('unpack_', baseclass.__name__, str(bits), funcsuffix))
557
558 code = '''
559def {funcname}({blob_name}):
560'''.format(funcname=funcname, blob_name=blob_name)
561
562 indent = ' ' * 4
563 unpack_call = '{}({!r}, {})'.format(
564 struct_unpack_name, structprefix + layout, blob_name)
565 field_names = ', '.join(baseclass._fields)
566 if types is None and fields is None:
567 code += '{}return {}({})\n'.format(
568 indent, baseclass.__name__, unpack_call)
569 else:
570 # Destructuring tuple assignment.
571 if fields is None:
572 code += '{}{} = {}\n'.format(
573 indent, field_names, unpack_call)
574 else:
575 # Use custom field order.
576 code += '{}{} = {}\n'.format(
577 indent, ', '.join(fields), unpack_call)
578
579 # Perform the type conversions.
580 for n in baseclass._fields:
581 if n in types:
582 code += '{}{} = {}({})\n'.format(
583 indent, n, types[n].__name__, n)
584 # Create the named tuple.
585 code += '{}return {}({})\n'.format(
586 indent, baseclass.__name__, field_names)
587
588 exec(code, env)
589 layouts[(elfclass, elfdata)] = _Layout(
590 env[funcname], struct.calcsize(layout))
591 baseclass.layouts = layouts
592
593
594# Corresponds to EI_* indices into Elf*_Ehdr.e_indent.
595class Ident(collections.namedtuple('Ident',
596 'ei_mag ei_class ei_data ei_version ei_osabi ei_abiversion ei_pad')):
597
598 def __new__(cls, *args):
599 """Construct an object from a blob or its constituent fields."""
600 if len(args) == 1:
601 return cls.unpack(args[0])
602 return cls.__base__.__new__(cls, *args)
603
604 @staticmethod
605 def unpack(blob: memoryview) -> 'Ident':
606 """Parse raws data into a tuple."""
607 ei_mag, ei_class, ei_data, ei_version, ei_osabi, ei_abiversion, \
608 ei_pad = struct.unpack('4s5B7s', blob)
609 return Ident(ei_mag, ElfClass(ei_class), ElfData(ei_data),
610 ei_version, ei_osabi, ei_abiversion, ei_pad)
611 size = 16
612
613# Corresponds to Elf32_Ehdr and Elf64_Ehdr.
614Ehdr = collections.namedtuple('Ehdr',
615 'e_ident e_type e_machine e_version e_entry e_phoff e_shoff e_flags'
616 + ' e_ehsize e_phentsize e_phnum e_shentsize e_shnum e_shstrndx')
617_define_layouts(Ehdr,
618 layout32='16s2H5I6H',
619 layout64='16s2HI3QI6H',
620 types=dict(e_ident=Ident,
621 e_machine=Machine,
622 e_type=Et,
623 e_shstrndx=Shn))
624
625# Corresponds to Elf32_Phdr and Elf64_Pdhr. Order follows the latter.
626Phdr = collections.namedtuple('Phdr',
627 'p_type p_flags p_offset p_vaddr p_paddr p_filesz p_memsz p_align')
628_define_layouts(Phdr,
629 layout32='8I',
630 fields32=('p_type', 'p_offset', 'p_vaddr', 'p_paddr',
631 'p_filesz', 'p_memsz', 'p_flags', 'p_align'),
632 layout64='2I6Q',
633 types=dict(p_type=Pt, p_flags=Pf))
634
635
636# Corresponds to Elf32_Shdr and Elf64_Shdr.
637class Shdr(collections.namedtuple('Shdr',
638 'sh_name sh_type sh_flags sh_addr sh_offset sh_size sh_link sh_info'
639 + ' sh_addralign sh_entsize')):
640 def resolve(self, strtab: 'StringTable') -> 'Shdr':
641 """Resolve sh_name using a string table."""
642 return self.__class__(strtab.get(self[0]), *self[1:])
643_define_layouts(Shdr,
644 layout32='10I',
645 layout64='2I4Q2I2Q',
646 types=dict(sh_type=Sht,
647 sh_flags=Shf,
648 sh_link=Shn))
649
650# Corresponds to Elf32_Dyn and Elf64_Dyn. The nesting through the
651# d_un union is skipped, and d_ptr is missing (its representation in
652# Python would be identical to d_val).
653Dyn = collections.namedtuple('Dyn', 'd_tag d_val')
654_define_layouts(Dyn,
655 layout32='2i',
656 layout64='2q',
657 types=dict(d_tag=Dt))
658
659# Corresponds to Elf32_Sym and Elf64_Sym.
660class Sym(collections.namedtuple('Sym',
661 'st_name st_info st_other st_shndx st_value st_size')):
662 def resolve(self, strtab: 'StringTable') -> 'Sym':
663 """Resolve st_name using a string table."""
664 return self.__class__(strtab.get(self[0]), *self[1:])
665_define_layouts(Sym,
666 layout32='3I2BH',
667 layout64='I2BH2Q',
668 fields32=('st_name', 'st_value', 'st_size', 'st_info',
669 'st_other', 'st_shndx'),
670 types=dict(st_shndx=Shn,
671 st_info=StInfo))
672
673# Corresponds to Elf32_Rel and Elf64_Rel.
674Rel = collections.namedtuple('Rel', 'r_offset r_info')
675_define_layouts(Rel,
676 layout32='2I',
677 layout64='2Q')
678
679# Corresponds to Elf32_Rel and Elf64_Rel.
680Rela = collections.namedtuple('Rela', 'r_offset r_info r_addend')
681_define_layouts(Rela,
682 layout32='3I',
683 layout64='3Q')
684
685class StringTable:
686 """ELF string table."""
687 def __init__(self, blob):
688 """Create a new string table backed by the data in the blob.
689
690 blob: a memoryview-like object
691
692 """
693 self.blob = blob
694
695 def get(self, index) -> bytes:
696 """Returns the null-terminated byte string at the index."""
697 blob = self.blob
698 endindex = index
699 while True:
700 if blob[endindex] == 0:
701 return bytes(blob[index:endindex])
702 endindex += 1
703
704class Image:
705 """ELF image parser."""
706 def __init__(self, image):
707 """Create an ELF image from binary image data.
708
709 image: a memoryview-like object that supports efficient range
710 subscripting.
711
712 """
713 self.image = image
714 ident = self.read(Ident, 0)
715 classdata = (ident.ei_class, ident.ei_data)
716 # Set self.Ehdr etc. to the subtypes with the right parsers.
717 for typ in (Ehdr, Phdr, Shdr, Dyn, Sym, Rel, Rela):
718 setattr(self, typ.__name__, typ.layouts.get(classdata, None))
719
720 if self.Ehdr is not None:
721 self.ehdr = self.read(self.Ehdr, 0)
722 self._shdr_num = self._compute_shdr_num()
723 else:
724 self.ehdr = None
725 self._shdr_num = 0
726
727 self._section = {}
728 self._stringtab = {}
729
730 if self._shdr_num > 0:
731 self._shdr_strtab = self._find_shdr_strtab()
732 else:
733 self._shdr_strtab = None
734
735 @staticmethod
736 def readfile(path: str) -> 'Image':
737 """Reads the ELF file at the specified path."""
738 with open(path, 'rb') as inp:
739 return Image(memoryview(inp.read()))
740
741 def _compute_shdr_num(self) -> int:
742 """Computes the actual number of section headers."""
743 shnum = self.ehdr.e_shnum
744 if shnum == 0:
745 if self.ehdr.e_shoff == 0 or self.ehdr.e_shentsize == 0:
746 # No section headers.
747 return 0
748 # Otherwise the extension mechanism is used (which may be
749 # needed because e_shnum is just 16 bits).
750 return self.read(self.Shdr, self.ehdr.e_shoff).sh_size
751 return shnum
752
753 def _find_shdr_strtab(self) -> StringTable:
754 """Finds the section header string table (maybe via extensions)."""
755 shstrndx = self.ehdr.e_shstrndx
756 if shstrndx == Shn.SHN_XINDEX:
757 shstrndx = self.read(self.Shdr, self.ehdr.e_shoff).sh_link
758 return self._find_stringtab(shstrndx)
759
760 def read(self, typ: type, offset:int ):
761 """Reads an object at a specific offset.
762
763 The type must have been enhanced using _define_variants.
764
765 """
766 return typ.unpack(self.image[offset: offset + typ.size])
767
768 def phdrs(self) -> Phdr:
769 """Generator iterating over the program headers."""
770 if self.ehdr is None:
771 return
772 size = self.ehdr.e_phentsize
773 if size != self.Phdr.size:
774 raise ValueError('Unexpected Phdr size in ELF header: {} != {}'
775 .format(size, self.Phdr.size))
776
777 offset = self.ehdr.e_phoff
778 for _ in range(self.ehdr.e_phnum):
779 yield self.read(self.Phdr, offset)
780 offset += size
781
782 def shdrs(self, resolve: bool=True) -> Shdr:
783 """Generator iterating over the section headers.
784
785 If resolve, section names are automatically translated
786 using the section header string table.
787
788 """
789 if self._shdr_num == 0:
790 return
791
792 size = self.ehdr.e_shentsize
793 if size != self.Shdr.size:
794 raise ValueError('Unexpected Shdr size in ELF header: {} != {}'
795 .format(size, self.Shdr.size))
796
797 offset = self.ehdr.e_shoff
798 for _ in range(self._shdr_num):
799 shdr = self.read(self.Shdr, offset)
800 if resolve:
801 shdr = shdr.resolve(self._shdr_strtab)
802 yield shdr
803 offset += size
804
805 def dynamic(self) -> Dyn:
806 """Generator iterating over the dynamic segment."""
807 for phdr in self.phdrs():
808 if phdr.p_type == Pt.PT_DYNAMIC:
809 # Pick the first dynamic segment, like the loader.
810 if phdr.p_filesz == 0:
811 # Probably separated debuginfo.
812 return
813 offset = phdr.p_offset
814 end = offset + phdr.p_memsz
815 size = self.Dyn.size
816 while True:
817 next_offset = offset + size
818 if next_offset > end:
819 raise ValueError(
820 'Dynamic segment size {} is not a multiple of Dyn size {}'.format(
821 phdr.p_memsz, size))
822 yield self.read(self.Dyn, offset)
823 if next_offset == end:
824 return
825 offset = next_offset
826
827 def syms(self, shdr: Shdr, resolve: bool=True) -> Sym:
828 """A generator iterating over a symbol table.
829
830 If resolve, symbol names are automatically translated using
831 the string table for the symbol table.
832
833 """
834 assert shdr.sh_type == Sht.SHT_SYMTAB
835 size = shdr.sh_entsize
836 if size != self.Sym.size:
837 raise ValueError('Invalid symbol table entry size {}'.format(size))
838 offset = shdr.sh_offset
839 end = shdr.sh_offset + shdr.sh_size
840 if resolve:
841 strtab = self._find_stringtab(shdr.sh_link)
842 while offset < end:
843 sym = self.read(self.Sym, offset)
844 if resolve:
845 sym = sym.resolve(strtab)
846 yield sym
847 offset += size
848 if offset != end:
849 raise ValueError('Symbol table is not a multiple of entry size')
850
851 def lookup_string(self, strtab_index: int, strtab_offset: int) -> bytes:
852 """Looks up a string in a string table identified by its link index."""
853 try:
854 strtab = self._stringtab[strtab_index]
855 except KeyError:
856 strtab = self._find_stringtab(strtab_index)
857 return strtab.get(strtab_offset)
858
859 def find_section(self, shndx: Shn) -> Shdr:
860 """Returns the section header for the indexed section.
861
862 The section name is not resolved.
863 """
864 try:
865 return self._section[shndx]
866 except KeyError:
867 pass
868 if shndx in Shn:
869 raise ValueError('Reserved section index {}'.format(shndx))
870 idx = shndx.value
871 if idx < 0 or idx > self._shdr_num:
872 raise ValueError('Section index {} out of range [0, {})'.format(
873 idx, self._shdr_num))
874 shdr = self.read(
875 self.Shdr, self.ehdr.e_shoff + idx * self.Shdr.size)
876 self._section[shndx] = shdr
877 return shdr
878
879 def _find_stringtab(self, sh_link: int) -> StringTable:
880 if sh_link in self._stringtab:
881 return self._stringtab
882 if sh_link < 0 or sh_link >= self._shdr_num:
883 raise ValueError('Section index {} out of range [0, {})'.format(
884 sh_link, self._shdr_num))
885 shdr = self.read(
886 self.Shdr, self.ehdr.e_shoff + sh_link * self.Shdr.size)
887 if shdr.sh_type != Sht.SHT_STRTAB:
888 raise ValueError(
889 'Section {} is not a string table: {}'.format(
890 sh_link, shdr.sh_type))
891 strtab = StringTable(
892 self.image[shdr.sh_offset:shdr.sh_offset + shdr.sh_size])
893 # This could retrain essentially arbitrary amounts of data,
894 # but caching string tables seems important for performance.
895 self._stringtab[sh_link] = strtab
896 return strtab
897
bd13cb19
FW
898def elf_hash(s):
899 """Computes the ELF hash of the string."""
900 acc = 0
901 for ch in s:
902 if type(ch) is not int:
903 ch = ord(ch)
904 acc = ((acc << 4) + ch) & 0xffffffff
905 top = acc & 0xf0000000
906 acc = (acc ^ (top >> 24)) & ~top
907 return acc
908
909def gnu_hash(s):
910 """Computes the GNU hash of the string."""
911 h = 5381
912 for ch in s:
913 if type(ch) is not int:
914 ch = ord(ch)
915 h = (h * 33 + ch) & 0xffffffff
916 return h
30035d67
FW
917
918__all__ = [name for name in dir() if name[0].isupper()]