]>
Commit | Line | Data |
---|---|---|
30035d67 FW |
1 | #!/usr/bin/python3 |
2 | # ELF support functionality for Python. | |
6d7e8eda | 3 | # Copyright (C) 2022-2023 Free Software Foundation, Inc. |
30035d67 FW |
4 | # This file is part of the GNU C Library. |
5 | # | |
6 | # The GNU C Library is free software; you can redistribute it and/or | |
7 | # modify it under the terms of the GNU Lesser General Public | |
8 | # License as published by the Free Software Foundation; either | |
9 | # version 2.1 of the License, or (at your option) any later version. | |
10 | # | |
11 | # The GNU C Library is distributed in the hope that it will be useful, | |
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | # Lesser General Public License for more details. | |
15 | # | |
16 | # You should have received a copy of the GNU Lesser General Public | |
17 | # License along with the GNU C Library; if not, see | |
18 | # <https://www.gnu.org/licenses/>. | |
19 | ||
20 | """Basic ELF parser. | |
21 | ||
22 | Use Image.readfile(path) to read an ELF file into memory and begin | |
23 | parsing it. | |
24 | ||
25 | """ | |
26 | ||
27 | import collections | |
340097d0 FW |
28 | import functools |
29 | import os | |
30035d67 FW |
30 | import struct |
31 | ||
340097d0 FW |
32 | import glibcpp |
33 | ||
34 | class _MetaNamedValue(type): | |
35 | """Used to set up _NamedValue subclasses.""" | |
b571f3ad | 36 | |
30035d67 | 37 | @classmethod |
340097d0 FW |
38 | def __prepare__(metacls, cls, bases, **kwds): |
39 | # Indicates an int-based class. Needed for types like Shn. | |
40 | int_based = False | |
41 | for base in bases: | |
42 | if issubclass(base, int): | |
43 | int_based = int | |
44 | break | |
45 | return dict(by_value={}, | |
46 | by_name={}, | |
47 | prefix=None, | |
48 | _int_based=int_based) | |
49 | ||
50 | def __contains__(self, other): | |
51 | return other in self.by_value | |
52 | ||
53 | class _NamedValue(metaclass=_MetaNamedValue): | |
54 | """Typed, named integer constants. | |
55 | ||
56 | Constants have the following instance attributes: | |
57 | ||
58 | name: The full name of the constant (e.g., "PT_NULL"). | |
59 | short_name: The name with of the constant without the prefix ("NULL"). | |
60 | value: The integer value of the constant. | |
61 | ||
62 | The following class attributes are available: | |
63 | ||
64 | by_value: A dict mapping integers to constants. | |
65 | by_name: A dict mapping strings to constants. | |
66 | prefix: A string that is removed from the start of short names, or None. | |
67 | ||
68 | """ | |
69 | ||
70 | def __new__(cls, arg0, arg1=None): | |
71 | """Instance creation. | |
72 | ||
73 | For the one-argument form, the argument must be a string, an | |
74 | int, or an instance of this class. Strings are looked up via | |
75 | by_name. Values are looked up via by_value; if value lookup | |
76 | fails, a new unnamed instance is returned. Instances of this | |
77 | class a re returned as-is. | |
78 | ||
79 | The two-argument form expects the name (a string) and the | |
80 | value (an integer). A new instance is created in this case. | |
81 | The instance is not registered in the by_value/by_name | |
82 | dictionaries (but the caller can do that). | |
83 | ||
84 | """ | |
85 | ||
86 | typ0 = type(arg0) | |
87 | if arg1 is None: | |
88 | if isinstance(typ0, cls): | |
89 | # Re-use the existing object. | |
90 | return arg0 | |
91 | if typ0 is int: | |
92 | by_value = cls.by_value | |
93 | try: | |
94 | return by_value[arg0] | |
95 | except KeyError: | |
96 | # Create a new object of the requested value. | |
97 | if cls._int_based: | |
98 | result = int.__new__(cls, arg0) | |
99 | else: | |
100 | result = object.__new__(cls) | |
101 | result.value = arg0 | |
102 | result.name = None | |
103 | return result | |
104 | if typ0 is str: | |
105 | by_name = cls.by_name | |
106 | try: | |
107 | return by_name[arg0] | |
108 | except KeyError: | |
109 | raise ValueError('unknown {} constant: {!r}'.format( | |
110 | cls.__name__, arg0)) | |
111 | else: | |
112 | # Types for the two-argument form are rigid. | |
113 | if typ0 is not str and typ0 is not None: | |
114 | raise ValueError('type {} of name {!r} should be str'.format( | |
115 | typ0.__name__, arg0)) | |
116 | if type(arg1) is not int: | |
117 | raise ValueError('type {} of value {!r} should be int'.format( | |
118 | type(arg1).__name__, arg1)) | |
119 | # Create a new named constants. | |
120 | if cls._int_based: | |
121 | result = int.__new__(cls, arg1) | |
122 | else: | |
123 | result = object.__new__(cls) | |
124 | result.value = arg1 | |
125 | result.name = arg0 | |
126 | # Set up the short_name attribute. | |
127 | prefix = cls.prefix | |
128 | if prefix and arg0.startswith(prefix): | |
129 | result.short_name = arg0[len(prefix):] | |
130 | else: | |
131 | result.short_name = arg0 | |
132 | return result | |
30035d67 | 133 | |
340097d0 FW |
134 | def __str__(self): |
135 | name = self.name | |
136 | if name: | |
30035d67 | 137 | return name |
340097d0 FW |
138 | else: |
139 | return str(self.value) | |
30035d67 | 140 | |
340097d0 FW |
141 | def __repr__(self): |
142 | name = self.name | |
143 | if name: | |
30035d67 | 144 | return name |
340097d0 FW |
145 | else: |
146 | return '{}({})'.format(self.__class__.__name__, self.value) | |
30035d67 | 147 | |
340097d0 FW |
148 | def __setattr__(self, name, value): |
149 | # Prevent modification of the critical attributes once they | |
150 | # have been set. | |
151 | if name in ('name', 'value', 'short_name') and hasattr(self, name): | |
152 | raise AttributeError('can\'t set attribute {}'.format(name)) | |
153 | object.__setattr__(self, name, value) | |
154 | ||
155 | @functools.total_ordering | |
156 | class _TypedConstant(_NamedValue): | |
157 | """Base class for integer-valued optionally named constants. | |
158 | ||
159 | This type is not an integer type. | |
160 | ||
161 | """ | |
162 | ||
163 | def __eq__(self, other): | |
164 | return isinstance(other, self.__class__) and self.value == other.value | |
165 | ||
166 | def __lt__(self, other): | |
167 | return isinstance(other, self.__class__) and self.value <= other.value | |
168 | ||
169 | def __hash__(self): | |
170 | return hash(self.value) | |
171 | ||
172 | class _IntConstant(_NamedValue, int): | |
173 | """Base class for integer-like optionally named constants. | |
174 | ||
175 | Instances compare equal to the integer of the same value, and can | |
176 | be used in integer arithmetic. | |
177 | ||
178 | """ | |
179 | ||
180 | pass | |
181 | ||
182 | class _FlagConstant(_TypedConstant, int): | |
183 | pass | |
184 | ||
185 | def _parse_elf_h(): | |
186 | """Read ../elf/elf.h and return a dict with the constants in it.""" | |
187 | ||
188 | path = os.path.join(os.path.dirname(os.path.realpath(__file__)), | |
189 | '..', 'elf', 'elf.h') | |
190 | class TokenizerReporter: | |
191 | """Report tokenizer errors to standard output.""" | |
192 | ||
193 | def __init__(self): | |
194 | self.errors = 0 | |
195 | ||
196 | def error(self, token, message): | |
197 | self.errors += 1 | |
198 | print('{}:{}:{}: error: {}'.format( | |
199 | path, token.line, token.column, message)) | |
200 | ||
201 | reporter = TokenizerReporter() | |
202 | with open(path) as inp: | |
203 | tokens = glibcpp.tokenize_c(inp.read(), reporter) | |
204 | if reporter.errors: | |
205 | raise IOError('parse error in elf.h') | |
206 | ||
207 | class MacroReporter: | |
208 | """Report macro errors to standard output.""" | |
209 | ||
210 | def __init__(self): | |
211 | self.errors = 0 | |
212 | ||
213 | def error(self, line, message): | |
d33705c0 | 214 | self.errors += 1 |
340097d0 FW |
215 | print('{}:{}: error: {}'.format(path, line, message)) |
216 | ||
217 | def note(self, line, message): | |
218 | print('{}:{}: note: {}'.format(path, line, message)) | |
219 | ||
220 | reporter = MacroReporter() | |
221 | result = glibcpp.macro_eval(glibcpp.macro_definitions(tokens), reporter) | |
222 | if reporter.errors: | |
223 | raise IOError('parse error in elf.h') | |
224 | ||
225 | return result | |
226 | _elf_h = _parse_elf_h() | |
227 | del _parse_elf_h | |
228 | _elf_h_processed = set() | |
229 | ||
230 | def _register_elf_h(cls, prefix=None, skip=(), ranges=False, parent=None): | |
231 | prefix = prefix or cls.prefix | |
232 | if not prefix: | |
233 | raise ValueError('missing prefix for {}'.format(cls.__name__)) | |
234 | by_value = cls.by_value | |
235 | by_name = cls.by_name | |
236 | processed = _elf_h_processed | |
237 | ||
238 | skip = set(skip) | |
239 | skip.add(prefix + 'NUM') | |
240 | if ranges: | |
241 | skip.add(prefix + 'LOOS') | |
242 | skip.add(prefix + 'HIOS') | |
243 | skip.add(prefix + 'LOPROC') | |
244 | skip.add(prefix + 'HIPROC') | |
245 | cls.os_range = (_elf_h[prefix + 'LOOS'], _elf_h[prefix + 'HIOS']) | |
246 | cls.proc_range = (_elf_h[prefix + 'LOPROC'], _elf_h[prefix + 'HIPROC']) | |
247 | ||
248 | # Inherit the prefix from the parent if not set. | |
249 | if parent and cls.prefix is None and parent.prefix is not None: | |
250 | cls.prefix = parent.prefix | |
251 | ||
252 | processed_len_start = len(processed) | |
253 | for name, value in _elf_h.items(): | |
254 | if name in skip or name in processed: | |
255 | continue | |
256 | if name.startswith(prefix): | |
257 | processed.add(name) | |
258 | if value in by_value: | |
259 | raise ValueError('duplicate value {}: {}, {}'.format( | |
260 | value, name, by_value[value])) | |
261 | obj = cls(name, value) | |
262 | by_value[value] = obj | |
263 | by_name[name] = obj | |
264 | setattr(cls, name, obj) | |
265 | if parent: | |
266 | # Make the symbolic name available through the parent as well. | |
267 | parent.by_name[name] = obj | |
268 | setattr(parent, name, obj) | |
269 | ||
270 | if len(processed) == processed_len_start: | |
271 | raise ValueError('nothing matched prefix {!r}'.format(prefix)) | |
272 | ||
273 | class ElfClass(_TypedConstant): | |
30035d67 | 274 | """ELF word size. Type of EI_CLASS values.""" |
340097d0 | 275 | _register_elf_h(ElfClass, prefix='ELFCLASS') |
30035d67 | 276 | |
340097d0 | 277 | class ElfData(_TypedConstant): |
7f0d9e61 | 278 | """ELF endianness. Type of EI_DATA values.""" |
340097d0 | 279 | _register_elf_h(ElfData, prefix='ELFDATA') |
30035d67 | 280 | |
340097d0 | 281 | class Machine(_TypedConstant): |
30035d67 | 282 | """ELF machine type. Type of values in Ehdr.e_machine field.""" |
340097d0 FW |
283 | prefix = 'EM_' |
284 | _register_elf_h(Machine, skip=('EM_ARC_A5',)) | |
285 | ||
286 | class Et(_TypedConstant): | |
30035d67 | 287 | """ELF file type. Type of ET_* values and the Ehdr.e_type field.""" |
340097d0 FW |
288 | prefix = 'ET_' |
289 | _register_elf_h(Et, ranges=True) | |
30035d67 | 290 | |
340097d0 | 291 | class Shn(_IntConstant): |
30035d67 | 292 | """ELF reserved section indices.""" |
340097d0 FW |
293 | prefix = 'SHN_' |
294 | class ShnMIPS(Shn): | |
30035d67 | 295 | """Supplemental SHN_* constants for EM_MIPS.""" |
340097d0 | 296 | class ShnPARISC(Shn): |
30035d67 | 297 | """Supplemental SHN_* constants for EM_PARISC.""" |
340097d0 FW |
298 | _register_elf_h(ShnMIPS, prefix='SHN_MIPS_', parent=Shn) |
299 | _register_elf_h(ShnPARISC, prefix='SHN_PARISC_', parent=Shn) | |
300 | _register_elf_h(Shn, skip='SHN_LORESERVE SHN_HIRESERVE'.split(), ranges=True) | |
30035d67 | 301 | |
340097d0 | 302 | class Sht(_TypedConstant): |
30035d67 | 303 | """ELF section types. Type of SHT_* values.""" |
340097d0 FW |
304 | prefix = 'SHT_' |
305 | class ShtALPHA(Sht): | |
30035d67 | 306 | """Supplemental SHT_* constants for EM_ALPHA.""" |
6ae0737d SV |
307 | class ShtARC(Sht): |
308 | """Supplemental SHT_* constants for EM_ARC.""" | |
340097d0 | 309 | class ShtARM(Sht): |
30035d67 | 310 | """Supplemental SHT_* constants for EM_ARM.""" |
340097d0 | 311 | class ShtCSKY(Sht): |
30035d67 | 312 | """Supplemental SHT_* constants for EM_CSKY.""" |
340097d0 | 313 | class ShtIA_64(Sht): |
30035d67 | 314 | """Supplemental SHT_* constants for EM_IA_64.""" |
340097d0 | 315 | class ShtMIPS(Sht): |
30035d67 | 316 | """Supplemental SHT_* constants for EM_MIPS.""" |
340097d0 | 317 | class ShtPARISC(Sht): |
30035d67 | 318 | """Supplemental SHT_* constants for EM_PARISC.""" |
340097d0 | 319 | class ShtRISCV(Sht): |
d055481c | 320 | """Supplemental SHT_* constants for EM_RISCV.""" |
340097d0 | 321 | _register_elf_h(ShtALPHA, prefix='SHT_ALPHA_', parent=Sht) |
6ae0737d | 322 | _register_elf_h(ShtARC, prefix='SHT_ARC_', parent=Sht) |
340097d0 FW |
323 | _register_elf_h(ShtARM, prefix='SHT_ARM_', parent=Sht) |
324 | _register_elf_h(ShtCSKY, prefix='SHT_CSKY_', parent=Sht) | |
325 | _register_elf_h(ShtIA_64, prefix='SHT_IA_64_', parent=Sht) | |
326 | _register_elf_h(ShtMIPS, prefix='SHT_MIPS_', parent=Sht) | |
327 | _register_elf_h(ShtPARISC, prefix='SHT_PARISC_', parent=Sht) | |
328 | _register_elf_h(ShtRISCV, prefix='SHT_RISCV_', parent=Sht) | |
329 | _register_elf_h(Sht, ranges=True, | |
330 | skip='SHT_LOSUNW SHT_HISUNW SHT_LOUSER SHT_HIUSER'.split()) | |
331 | ||
332 | class Pf(_FlagConstant): | |
30035d67 | 333 | """Program header flags. Type of Phdr.p_flags values.""" |
340097d0 FW |
334 | prefix = 'PF_' |
335 | class PfARM(Pf): | |
30035d67 | 336 | """Supplemental PF_* flags for EM_ARM.""" |
340097d0 FW |
337 | class PfHP(Pf): |
338 | """Supplemental PF_* flags for HP-UX.""" | |
339 | class PfIA_64(Pf): | |
30035d67 | 340 | """Supplemental PF_* flags for EM_IA_64.""" |
340097d0 | 341 | class PfMIPS(Pf): |
30035d67 | 342 | """Supplemental PF_* flags for EM_MIPS.""" |
340097d0 FW |
343 | class PfPARISC(Pf): |
344 | """Supplemental PF_* flags for EM_PARISC.""" | |
345 | _register_elf_h(PfARM, prefix='PF_ARM_', parent=Pf) | |
346 | _register_elf_h(PfHP, prefix='PF_HP_', parent=Pf) | |
347 | _register_elf_h(PfIA_64, prefix='PF_IA_64_', parent=Pf) | |
348 | _register_elf_h(PfMIPS, prefix='PF_MIPS_', parent=Pf) | |
349 | _register_elf_h(PfPARISC, prefix='PF_PARISC_', parent=Pf) | |
350 | _register_elf_h(Pf, skip='PF_MASKOS PF_MASKPROC'.split()) | |
351 | ||
352 | class Shf(_FlagConstant): | |
30035d67 | 353 | """Section flags. Type of Shdr.sh_type values.""" |
340097d0 FW |
354 | prefix = 'SHF_' |
355 | class ShfALPHA(Shf): | |
30035d67 | 356 | """Supplemental SHF_* constants for EM_ALPHA.""" |
340097d0 | 357 | class ShfARM(Shf): |
30035d67 | 358 | """Supplemental SHF_* constants for EM_ARM.""" |
340097d0 | 359 | class ShfIA_64(Shf): |
30035d67 | 360 | """Supplemental SHF_* constants for EM_IA_64.""" |
340097d0 | 361 | class ShfMIPS(Shf): |
30035d67 | 362 | """Supplemental SHF_* constants for EM_MIPS.""" |
340097d0 | 363 | class ShfPARISC(Shf): |
30035d67 | 364 | """Supplemental SHF_* constants for EM_PARISC.""" |
340097d0 FW |
365 | _register_elf_h(ShfALPHA, prefix='SHF_ALPHA_', parent=Shf) |
366 | _register_elf_h(ShfARM, prefix='SHF_ARM_', parent=Shf) | |
367 | _register_elf_h(ShfIA_64, prefix='SHF_IA_64_', parent=Shf) | |
368 | _register_elf_h(ShfMIPS, prefix='SHF_MIPS_', parent=Shf) | |
369 | _register_elf_h(ShfPARISC, prefix='SHF_PARISC_', parent=Shf) | |
370 | _register_elf_h(Shf, skip='SHF_MASKOS SHF_MASKPROC'.split()) | |
371 | ||
372 | class Stb(_TypedConstant): | |
30035d67 | 373 | """ELF symbol binding type.""" |
340097d0 FW |
374 | prefix = 'STB_' |
375 | _register_elf_h(Stb, ranges=True) | |
30035d67 | 376 | |
340097d0 | 377 | class Stt(_TypedConstant): |
30035d67 | 378 | """ELF symbol type.""" |
340097d0 FW |
379 | prefix = 'STT_' |
380 | class SttARM(Sht): | |
30035d67 | 381 | """Supplemental STT_* constants for EM_ARM.""" |
340097d0 | 382 | class SttPARISC(Sht): |
30035d67 | 383 | """Supplemental STT_* constants for EM_PARISC.""" |
340097d0 | 384 | class SttSPARC(Sht): |
30035d67 FW |
385 | """Supplemental STT_* constants for EM_SPARC.""" |
386 | STT_SPARC_REGISTER = 13 | |
340097d0 | 387 | class SttX86_64(Sht): |
30035d67 | 388 | """Supplemental STT_* constants for EM_X86_64.""" |
340097d0 FW |
389 | _register_elf_h(SttARM, prefix='STT_ARM_', parent=Stt) |
390 | _register_elf_h(SttPARISC, prefix='STT_PARISC_', parent=Stt) | |
391 | _register_elf_h(SttSPARC, prefix='STT_SPARC_', parent=Stt) | |
392 | _register_elf_h(Stt, ranges=True) | |
393 | ||
30035d67 | 394 | |
340097d0 | 395 | class Pt(_TypedConstant): |
30035d67 | 396 | """ELF program header types. Type of Phdr.p_type.""" |
340097d0 FW |
397 | prefix = 'PT_' |
398 | class PtAARCH64(Pt): | |
85210017 | 399 | """Supplemental PT_* constants for EM_AARCH64.""" |
340097d0 | 400 | class PtARM(Pt): |
30035d67 | 401 | """Supplemental PT_* constants for EM_ARM.""" |
340097d0 FW |
402 | class PtHP(Pt): |
403 | """Supplemental PT_* constants for HP-U.""" | |
404 | class PtIA_64(Pt): | |
30035d67 | 405 | """Supplemental PT_* constants for EM_IA_64.""" |
340097d0 | 406 | class PtMIPS(Pt): |
30035d67 | 407 | """Supplemental PT_* constants for EM_MIPS.""" |
340097d0 | 408 | class PtPARISC(Pt): |
30035d67 | 409 | """Supplemental PT_* constants for EM_PARISC.""" |
340097d0 | 410 | class PtRISCV(Pt): |
d055481c | 411 | """Supplemental PT_* constants for EM_RISCV.""" |
340097d0 FW |
412 | _register_elf_h(PtAARCH64, prefix='PT_AARCH64_', parent=Pt) |
413 | _register_elf_h(PtARM, prefix='PT_ARM_', parent=Pt) | |
414 | _register_elf_h(PtHP, prefix='PT_HP_', parent=Pt) | |
415 | _register_elf_h(PtIA_64, prefix='PT_IA_64_', parent=Pt) | |
416 | _register_elf_h(PtMIPS, prefix='PT_MIPS_', parent=Pt) | |
417 | _register_elf_h(PtPARISC, prefix='PT_PARISC_', parent=Pt) | |
418 | _register_elf_h(PtRISCV, prefix='PT_RISCV_', parent=Pt) | |
419 | _register_elf_h(Pt, skip='PT_LOSUNW PT_HISUNW'.split(), ranges=True) | |
420 | ||
421 | class Dt(_TypedConstant): | |
30035d67 | 422 | """ELF dynamic segment tags. Type of Dyn.d_val.""" |
340097d0 FW |
423 | prefix = 'DT_' |
424 | class DtAARCH64(Dt): | |
30035d67 | 425 | """Supplemental DT_* constants for EM_AARCH64.""" |
340097d0 | 426 | class DtALPHA(Dt): |
30035d67 | 427 | """Supplemental DT_* constants for EM_ALPHA.""" |
340097d0 | 428 | class DtALTERA_NIOS2(Dt): |
30035d67 | 429 | """Supplemental DT_* constants for EM_ALTERA_NIOS2.""" |
340097d0 | 430 | class DtIA_64(Dt): |
30035d67 | 431 | """Supplemental DT_* constants for EM_IA_64.""" |
340097d0 | 432 | class DtMIPS(Dt): |
30035d67 | 433 | """Supplemental DT_* constants for EM_MIPS.""" |
340097d0 | 434 | class DtPPC(Dt): |
30035d67 | 435 | """Supplemental DT_* constants for EM_PPC.""" |
340097d0 | 436 | class DtPPC64(Dt): |
30035d67 | 437 | """Supplemental DT_* constants for EM_PPC64.""" |
340097d0 | 438 | class DtRISCV(Dt): |
d055481c | 439 | """Supplemental DT_* constants for EM_RISCV.""" |
340097d0 | 440 | class DtSPARC(Dt): |
30035d67 | 441 | """Supplemental DT_* constants for EM_SPARC.""" |
340097d0 FW |
442 | _dt_skip = ''' |
443 | DT_ENCODING DT_PROCNUM | |
444 | DT_ADDRRNGLO DT_ADDRRNGHI DT_ADDRNUM | |
445 | DT_VALRNGLO DT_VALRNGHI DT_VALNUM | |
446 | DT_VERSIONTAGNUM DT_EXTRANUM | |
447 | DT_AARCH64_NUM | |
448 | DT_ALPHA_NUM | |
449 | DT_IA_64_NUM | |
450 | DT_MIPS_NUM | |
451 | DT_PPC_NUM | |
452 | DT_PPC64_NUM | |
453 | DT_SPARC_NUM | |
454 | '''.strip().split() | |
455 | _register_elf_h(DtAARCH64, prefix='DT_AARCH64_', skip=_dt_skip, parent=Dt) | |
456 | _register_elf_h(DtALPHA, prefix='DT_ALPHA_', skip=_dt_skip, parent=Dt) | |
457 | _register_elf_h(DtALTERA_NIOS2, prefix='DT_NIOS2_', skip=_dt_skip, parent=Dt) | |
458 | _register_elf_h(DtIA_64, prefix='DT_IA_64_', skip=_dt_skip, parent=Dt) | |
459 | _register_elf_h(DtMIPS, prefix='DT_MIPS_', skip=_dt_skip, parent=Dt) | |
460 | _register_elf_h(DtPPC, prefix='DT_PPC_', skip=_dt_skip, parent=Dt) | |
461 | _register_elf_h(DtPPC64, prefix='DT_PPC64_', skip=_dt_skip, parent=Dt) | |
462 | _register_elf_h(DtRISCV, prefix='DT_RISCV_', skip=_dt_skip, parent=Dt) | |
463 | _register_elf_h(DtSPARC, prefix='DT_SPARC_', skip=_dt_skip, parent=Dt) | |
464 | _register_elf_h(Dt, skip=_dt_skip, ranges=True) | |
465 | del _dt_skip | |
466 | ||
467 | # Constant extraction is complete. | |
468 | del _register_elf_h | |
469 | del _elf_h | |
30035d67 FW |
470 | |
471 | class StInfo: | |
472 | """ELF symbol binding and type. Type of the Sym.st_info field.""" | |
473 | def __init__(self, arg0, arg1=None): | |
474 | if isinstance(arg0, int) and arg1 is None: | |
475 | self.bind = Stb(arg0 >> 4) | |
476 | self.type = Stt(arg0 & 15) | |
477 | else: | |
478 | self.bind = Stb(arg0) | |
479 | self.type = Stt(arg1) | |
480 | ||
481 | def value(self): | |
482 | """Returns the raw value for the bind/type combination.""" | |
483 | return (self.bind.value() << 4) | (self.type.value()) | |
484 | ||
485 | # Type in an ELF file. Used for deserialization. | |
486 | _Layout = collections.namedtuple('_Layout', 'unpack size') | |
487 | ||
488 | def _define_layouts(baseclass: type, layout32: str, layout64: str, | |
489 | types=None, fields32=None): | |
490 | """Assign variants dict to baseclass. | |
491 | ||
492 | The variants dict is indexed by (ElfClass, ElfData) pairs, and its | |
493 | values are _Layout instances. | |
494 | ||
495 | """ | |
496 | struct32 = struct.Struct(layout32) | |
497 | struct64 = struct.Struct(layout64) | |
498 | ||
499 | # Check that the struct formats yield the right number of components. | |
500 | for s in (struct32, struct64): | |
501 | example = s.unpack(b' ' * s.size) | |
502 | if len(example) != len(baseclass._fields): | |
503 | raise ValueError('{!r} yields wrong field count: {} != {}'.format( | |
504 | s.format, len(example), len(baseclass._fields))) | |
505 | ||
506 | # Check that field names in types are correct. | |
507 | if types is None: | |
508 | types = () | |
509 | for n in types: | |
510 | if n not in baseclass._fields: | |
511 | raise ValueError('{} does not have field {!r}'.format( | |
512 | baseclass.__name__, n)) | |
513 | ||
514 | if fields32 is not None \ | |
515 | and set(fields32) != set(baseclass._fields): | |
516 | raise ValueError('{!r} is not a permutation of the fields {!r}'.format( | |
517 | fields32, baseclass._fields)) | |
518 | ||
519 | def unique_name(name, used_names = (set((baseclass.__name__,)) | |
520 | | set(baseclass._fields) | |
521 | | {n.__name__ | |
522 | for n in (types or {}).values()})): | |
523 | """Find a name that is not used for a class or field name.""" | |
524 | candidate = name | |
525 | n = 0 | |
526 | while candidate in used_names: | |
527 | n += 1 | |
528 | candidate = '{}{}'.format(name, n) | |
529 | used_names.add(candidate) | |
530 | return candidate | |
531 | ||
532 | blob_name = unique_name('blob') | |
533 | struct_unpack_name = unique_name('struct_unpack') | |
534 | comps_name = unique_name('comps') | |
535 | ||
536 | layouts = {} | |
537 | for (bits, elfclass, layout, fields) in ( | |
538 | (32, ElfClass.ELFCLASS32, layout32, fields32), | |
539 | (64, ElfClass.ELFCLASS64, layout64, None), | |
540 | ): | |
541 | for (elfdata, structprefix, funcsuffix) in ( | |
542 | (ElfData.ELFDATA2LSB, '<', 'LE'), | |
543 | (ElfData.ELFDATA2MSB, '>', 'BE'), | |
544 | ): | |
545 | env = { | |
546 | baseclass.__name__: baseclass, | |
547 | struct_unpack_name: struct.unpack, | |
548 | } | |
549 | ||
550 | # Add the type converters. | |
551 | if types: | |
552 | for cls in types.values(): | |
553 | env[cls.__name__] = cls | |
554 | ||
555 | funcname = ''.join( | |
556 | ('unpack_', baseclass.__name__, str(bits), funcsuffix)) | |
557 | ||
558 | code = ''' | |
559 | def {funcname}({blob_name}): | |
560 | '''.format(funcname=funcname, blob_name=blob_name) | |
561 | ||
562 | indent = ' ' * 4 | |
563 | unpack_call = '{}({!r}, {})'.format( | |
564 | struct_unpack_name, structprefix + layout, blob_name) | |
565 | field_names = ', '.join(baseclass._fields) | |
566 | if types is None and fields is None: | |
567 | code += '{}return {}({})\n'.format( | |
568 | indent, baseclass.__name__, unpack_call) | |
569 | else: | |
570 | # Destructuring tuple assignment. | |
571 | if fields is None: | |
572 | code += '{}{} = {}\n'.format( | |
573 | indent, field_names, unpack_call) | |
574 | else: | |
575 | # Use custom field order. | |
576 | code += '{}{} = {}\n'.format( | |
577 | indent, ', '.join(fields), unpack_call) | |
578 | ||
579 | # Perform the type conversions. | |
580 | for n in baseclass._fields: | |
581 | if n in types: | |
582 | code += '{}{} = {}({})\n'.format( | |
583 | indent, n, types[n].__name__, n) | |
584 | # Create the named tuple. | |
585 | code += '{}return {}({})\n'.format( | |
586 | indent, baseclass.__name__, field_names) | |
587 | ||
588 | exec(code, env) | |
589 | layouts[(elfclass, elfdata)] = _Layout( | |
590 | env[funcname], struct.calcsize(layout)) | |
591 | baseclass.layouts = layouts | |
592 | ||
593 | ||
594 | # Corresponds to EI_* indices into Elf*_Ehdr.e_indent. | |
595 | class Ident(collections.namedtuple('Ident', | |
596 | 'ei_mag ei_class ei_data ei_version ei_osabi ei_abiversion ei_pad')): | |
597 | ||
598 | def __new__(cls, *args): | |
599 | """Construct an object from a blob or its constituent fields.""" | |
600 | if len(args) == 1: | |
601 | return cls.unpack(args[0]) | |
602 | return cls.__base__.__new__(cls, *args) | |
603 | ||
604 | @staticmethod | |
605 | def unpack(blob: memoryview) -> 'Ident': | |
606 | """Parse raws data into a tuple.""" | |
607 | ei_mag, ei_class, ei_data, ei_version, ei_osabi, ei_abiversion, \ | |
608 | ei_pad = struct.unpack('4s5B7s', blob) | |
609 | return Ident(ei_mag, ElfClass(ei_class), ElfData(ei_data), | |
610 | ei_version, ei_osabi, ei_abiversion, ei_pad) | |
611 | size = 16 | |
612 | ||
613 | # Corresponds to Elf32_Ehdr and Elf64_Ehdr. | |
614 | Ehdr = collections.namedtuple('Ehdr', | |
615 | 'e_ident e_type e_machine e_version e_entry e_phoff e_shoff e_flags' | |
616 | + ' e_ehsize e_phentsize e_phnum e_shentsize e_shnum e_shstrndx') | |
617 | _define_layouts(Ehdr, | |
618 | layout32='16s2H5I6H', | |
619 | layout64='16s2HI3QI6H', | |
620 | types=dict(e_ident=Ident, | |
621 | e_machine=Machine, | |
622 | e_type=Et, | |
623 | e_shstrndx=Shn)) | |
624 | ||
625 | # Corresponds to Elf32_Phdr and Elf64_Pdhr. Order follows the latter. | |
626 | Phdr = collections.namedtuple('Phdr', | |
627 | 'p_type p_flags p_offset p_vaddr p_paddr p_filesz p_memsz p_align') | |
628 | _define_layouts(Phdr, | |
629 | layout32='8I', | |
630 | fields32=('p_type', 'p_offset', 'p_vaddr', 'p_paddr', | |
631 | 'p_filesz', 'p_memsz', 'p_flags', 'p_align'), | |
632 | layout64='2I6Q', | |
633 | types=dict(p_type=Pt, p_flags=Pf)) | |
634 | ||
635 | ||
636 | # Corresponds to Elf32_Shdr and Elf64_Shdr. | |
637 | class Shdr(collections.namedtuple('Shdr', | |
638 | 'sh_name sh_type sh_flags sh_addr sh_offset sh_size sh_link sh_info' | |
639 | + ' sh_addralign sh_entsize')): | |
640 | def resolve(self, strtab: 'StringTable') -> 'Shdr': | |
641 | """Resolve sh_name using a string table.""" | |
642 | return self.__class__(strtab.get(self[0]), *self[1:]) | |
643 | _define_layouts(Shdr, | |
644 | layout32='10I', | |
645 | layout64='2I4Q2I2Q', | |
646 | types=dict(sh_type=Sht, | |
647 | sh_flags=Shf, | |
648 | sh_link=Shn)) | |
649 | ||
650 | # Corresponds to Elf32_Dyn and Elf64_Dyn. The nesting through the | |
651 | # d_un union is skipped, and d_ptr is missing (its representation in | |
652 | # Python would be identical to d_val). | |
653 | Dyn = collections.namedtuple('Dyn', 'd_tag d_val') | |
654 | _define_layouts(Dyn, | |
655 | layout32='2i', | |
656 | layout64='2q', | |
657 | types=dict(d_tag=Dt)) | |
658 | ||
659 | # Corresponds to Elf32_Sym and Elf64_Sym. | |
660 | class Sym(collections.namedtuple('Sym', | |
661 | 'st_name st_info st_other st_shndx st_value st_size')): | |
662 | def resolve(self, strtab: 'StringTable') -> 'Sym': | |
663 | """Resolve st_name using a string table.""" | |
664 | return self.__class__(strtab.get(self[0]), *self[1:]) | |
665 | _define_layouts(Sym, | |
666 | layout32='3I2BH', | |
667 | layout64='I2BH2Q', | |
668 | fields32=('st_name', 'st_value', 'st_size', 'st_info', | |
669 | 'st_other', 'st_shndx'), | |
670 | types=dict(st_shndx=Shn, | |
671 | st_info=StInfo)) | |
672 | ||
673 | # Corresponds to Elf32_Rel and Elf64_Rel. | |
674 | Rel = collections.namedtuple('Rel', 'r_offset r_info') | |
675 | _define_layouts(Rel, | |
676 | layout32='2I', | |
677 | layout64='2Q') | |
678 | ||
679 | # Corresponds to Elf32_Rel and Elf64_Rel. | |
680 | Rela = collections.namedtuple('Rela', 'r_offset r_info r_addend') | |
681 | _define_layouts(Rela, | |
682 | layout32='3I', | |
683 | layout64='3Q') | |
684 | ||
685 | class StringTable: | |
686 | """ELF string table.""" | |
687 | def __init__(self, blob): | |
688 | """Create a new string table backed by the data in the blob. | |
689 | ||
690 | blob: a memoryview-like object | |
691 | ||
692 | """ | |
693 | self.blob = blob | |
694 | ||
695 | def get(self, index) -> bytes: | |
696 | """Returns the null-terminated byte string at the index.""" | |
697 | blob = self.blob | |
698 | endindex = index | |
699 | while True: | |
700 | if blob[endindex] == 0: | |
701 | return bytes(blob[index:endindex]) | |
702 | endindex += 1 | |
703 | ||
704 | class Image: | |
705 | """ELF image parser.""" | |
706 | def __init__(self, image): | |
707 | """Create an ELF image from binary image data. | |
708 | ||
709 | image: a memoryview-like object that supports efficient range | |
710 | subscripting. | |
711 | ||
712 | """ | |
713 | self.image = image | |
714 | ident = self.read(Ident, 0) | |
715 | classdata = (ident.ei_class, ident.ei_data) | |
716 | # Set self.Ehdr etc. to the subtypes with the right parsers. | |
717 | for typ in (Ehdr, Phdr, Shdr, Dyn, Sym, Rel, Rela): | |
718 | setattr(self, typ.__name__, typ.layouts.get(classdata, None)) | |
719 | ||
720 | if self.Ehdr is not None: | |
721 | self.ehdr = self.read(self.Ehdr, 0) | |
722 | self._shdr_num = self._compute_shdr_num() | |
723 | else: | |
724 | self.ehdr = None | |
725 | self._shdr_num = 0 | |
726 | ||
727 | self._section = {} | |
728 | self._stringtab = {} | |
729 | ||
730 | if self._shdr_num > 0: | |
731 | self._shdr_strtab = self._find_shdr_strtab() | |
732 | else: | |
733 | self._shdr_strtab = None | |
734 | ||
735 | @staticmethod | |
736 | def readfile(path: str) -> 'Image': | |
737 | """Reads the ELF file at the specified path.""" | |
738 | with open(path, 'rb') as inp: | |
739 | return Image(memoryview(inp.read())) | |
740 | ||
741 | def _compute_shdr_num(self) -> int: | |
742 | """Computes the actual number of section headers.""" | |
743 | shnum = self.ehdr.e_shnum | |
744 | if shnum == 0: | |
745 | if self.ehdr.e_shoff == 0 or self.ehdr.e_shentsize == 0: | |
746 | # No section headers. | |
747 | return 0 | |
748 | # Otherwise the extension mechanism is used (which may be | |
749 | # needed because e_shnum is just 16 bits). | |
750 | return self.read(self.Shdr, self.ehdr.e_shoff).sh_size | |
751 | return shnum | |
752 | ||
753 | def _find_shdr_strtab(self) -> StringTable: | |
754 | """Finds the section header string table (maybe via extensions).""" | |
755 | shstrndx = self.ehdr.e_shstrndx | |
756 | if shstrndx == Shn.SHN_XINDEX: | |
757 | shstrndx = self.read(self.Shdr, self.ehdr.e_shoff).sh_link | |
758 | return self._find_stringtab(shstrndx) | |
759 | ||
760 | def read(self, typ: type, offset:int ): | |
761 | """Reads an object at a specific offset. | |
762 | ||
763 | The type must have been enhanced using _define_variants. | |
764 | ||
765 | """ | |
766 | return typ.unpack(self.image[offset: offset + typ.size]) | |
767 | ||
768 | def phdrs(self) -> Phdr: | |
769 | """Generator iterating over the program headers.""" | |
770 | if self.ehdr is None: | |
771 | return | |
772 | size = self.ehdr.e_phentsize | |
773 | if size != self.Phdr.size: | |
774 | raise ValueError('Unexpected Phdr size in ELF header: {} != {}' | |
775 | .format(size, self.Phdr.size)) | |
776 | ||
777 | offset = self.ehdr.e_phoff | |
778 | for _ in range(self.ehdr.e_phnum): | |
779 | yield self.read(self.Phdr, offset) | |
780 | offset += size | |
781 | ||
782 | def shdrs(self, resolve: bool=True) -> Shdr: | |
783 | """Generator iterating over the section headers. | |
784 | ||
785 | If resolve, section names are automatically translated | |
786 | using the section header string table. | |
787 | ||
788 | """ | |
789 | if self._shdr_num == 0: | |
790 | return | |
791 | ||
792 | size = self.ehdr.e_shentsize | |
793 | if size != self.Shdr.size: | |
794 | raise ValueError('Unexpected Shdr size in ELF header: {} != {}' | |
795 | .format(size, self.Shdr.size)) | |
796 | ||
797 | offset = self.ehdr.e_shoff | |
798 | for _ in range(self._shdr_num): | |
799 | shdr = self.read(self.Shdr, offset) | |
800 | if resolve: | |
801 | shdr = shdr.resolve(self._shdr_strtab) | |
802 | yield shdr | |
803 | offset += size | |
804 | ||
805 | def dynamic(self) -> Dyn: | |
806 | """Generator iterating over the dynamic segment.""" | |
807 | for phdr in self.phdrs(): | |
808 | if phdr.p_type == Pt.PT_DYNAMIC: | |
809 | # Pick the first dynamic segment, like the loader. | |
810 | if phdr.p_filesz == 0: | |
811 | # Probably separated debuginfo. | |
812 | return | |
813 | offset = phdr.p_offset | |
814 | end = offset + phdr.p_memsz | |
815 | size = self.Dyn.size | |
816 | while True: | |
817 | next_offset = offset + size | |
818 | if next_offset > end: | |
819 | raise ValueError( | |
820 | 'Dynamic segment size {} is not a multiple of Dyn size {}'.format( | |
821 | phdr.p_memsz, size)) | |
822 | yield self.read(self.Dyn, offset) | |
823 | if next_offset == end: | |
824 | return | |
825 | offset = next_offset | |
826 | ||
827 | def syms(self, shdr: Shdr, resolve: bool=True) -> Sym: | |
828 | """A generator iterating over a symbol table. | |
829 | ||
830 | If resolve, symbol names are automatically translated using | |
831 | the string table for the symbol table. | |
832 | ||
833 | """ | |
834 | assert shdr.sh_type == Sht.SHT_SYMTAB | |
835 | size = shdr.sh_entsize | |
836 | if size != self.Sym.size: | |
837 | raise ValueError('Invalid symbol table entry size {}'.format(size)) | |
838 | offset = shdr.sh_offset | |
839 | end = shdr.sh_offset + shdr.sh_size | |
840 | if resolve: | |
841 | strtab = self._find_stringtab(shdr.sh_link) | |
842 | while offset < end: | |
843 | sym = self.read(self.Sym, offset) | |
844 | if resolve: | |
845 | sym = sym.resolve(strtab) | |
846 | yield sym | |
847 | offset += size | |
848 | if offset != end: | |
849 | raise ValueError('Symbol table is not a multiple of entry size') | |
850 | ||
851 | def lookup_string(self, strtab_index: int, strtab_offset: int) -> bytes: | |
852 | """Looks up a string in a string table identified by its link index.""" | |
853 | try: | |
854 | strtab = self._stringtab[strtab_index] | |
855 | except KeyError: | |
856 | strtab = self._find_stringtab(strtab_index) | |
857 | return strtab.get(strtab_offset) | |
858 | ||
859 | def find_section(self, shndx: Shn) -> Shdr: | |
860 | """Returns the section header for the indexed section. | |
861 | ||
862 | The section name is not resolved. | |
863 | """ | |
864 | try: | |
865 | return self._section[shndx] | |
866 | except KeyError: | |
867 | pass | |
868 | if shndx in Shn: | |
869 | raise ValueError('Reserved section index {}'.format(shndx)) | |
870 | idx = shndx.value | |
871 | if idx < 0 or idx > self._shdr_num: | |
872 | raise ValueError('Section index {} out of range [0, {})'.format( | |
873 | idx, self._shdr_num)) | |
874 | shdr = self.read( | |
875 | self.Shdr, self.ehdr.e_shoff + idx * self.Shdr.size) | |
876 | self._section[shndx] = shdr | |
877 | return shdr | |
878 | ||
879 | def _find_stringtab(self, sh_link: int) -> StringTable: | |
880 | if sh_link in self._stringtab: | |
881 | return self._stringtab | |
882 | if sh_link < 0 or sh_link >= self._shdr_num: | |
883 | raise ValueError('Section index {} out of range [0, {})'.format( | |
884 | sh_link, self._shdr_num)) | |
885 | shdr = self.read( | |
886 | self.Shdr, self.ehdr.e_shoff + sh_link * self.Shdr.size) | |
887 | if shdr.sh_type != Sht.SHT_STRTAB: | |
888 | raise ValueError( | |
889 | 'Section {} is not a string table: {}'.format( | |
890 | sh_link, shdr.sh_type)) | |
891 | strtab = StringTable( | |
892 | self.image[shdr.sh_offset:shdr.sh_offset + shdr.sh_size]) | |
893 | # This could retrain essentially arbitrary amounts of data, | |
894 | # but caching string tables seems important for performance. | |
895 | self._stringtab[sh_link] = strtab | |
896 | return strtab | |
897 | ||
bd13cb19 FW |
898 | def elf_hash(s): |
899 | """Computes the ELF hash of the string.""" | |
900 | acc = 0 | |
901 | for ch in s: | |
902 | if type(ch) is not int: | |
903 | ch = ord(ch) | |
904 | acc = ((acc << 4) + ch) & 0xffffffff | |
905 | top = acc & 0xf0000000 | |
906 | acc = (acc ^ (top >> 24)) & ~top | |
907 | return acc | |
908 | ||
909 | def gnu_hash(s): | |
910 | """Computes the GNU hash of the string.""" | |
911 | h = 5381 | |
912 | for ch in s: | |
913 | if type(ch) is not int: | |
914 | ch = ord(ch) | |
915 | h = (h * 33 + ch) & 0xffffffff | |
916 | return h | |
30035d67 FW |
917 | |
918 | __all__ = [name for name in dir() if name[0].isupper()] |