]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/powerpc/dl-machine.c
Update.
[thirdparty/glibc.git] / sysdeps / powerpc / dl-machine.c
CommitLineData
052b6a6c 1/* Machine-dependent ELF dynamic relocation functions. PowerPC version.
f420344c 2 Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
052b6a6c
UD
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
14
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
19
20#include <unistd.h>
21#include <string.h>
22#include <sys/param.h>
23#include <link.h>
24#include <dl-machine.h>
25#include <elf/ldsodefs.h>
26#include <elf/dynamic-link.h>
27
28/* Because ld.so is now versioned, these functions can be in their own file;
29 no relocations need to be done to call them.
30 Of course, if ld.so is not versioned... */
31#if !(DO_VERSIONING - 0)
32#error This will not work with versioning turned off, sorry.
33#endif
34
35
7137f424 36/* Stuff for the PLT. */
052b6a6c 37#define PLT_INITIAL_ENTRY_WORDS 18
7137f424
GK
38#define PLT_LONGBRANCH_ENTRY_WORDS 0
39#define PLT_TRAMPOLINE_ENTRY_WORDS 6
052b6a6c
UD
40#define PLT_DOUBLE_SIZE (1<<13)
41#define PLT_ENTRY_START_WORDS(entry_number) \
7137f424
GK
42 (PLT_INITIAL_ENTRY_WORDS + (entry_number)*2 \
43 + ((entry_number) > PLT_DOUBLE_SIZE \
44 ? ((entry_number) - PLT_DOUBLE_SIZE)*2 \
45 : 0))
052b6a6c
UD
46#define PLT_DATA_START_WORDS(num_entries) PLT_ENTRY_START_WORDS(num_entries)
47
7137f424 48/* Macros to build PowerPC opcode words. */
052b6a6c 49#define OPCODE_ADDI(rd,ra,simm) \
118bad87 50 (0x38000000 | (rd) << 21 | (ra) << 16 | ((simm) & 0xffff))
052b6a6c 51#define OPCODE_ADDIS(rd,ra,simm) \
118bad87 52 (0x3c000000 | (rd) << 21 | (ra) << 16 | ((simm) & 0xffff))
052b6a6c
UD
53#define OPCODE_ADD(rd,ra,rb) \
54 (0x7c000214 | (rd) << 21 | (ra) << 16 | (rb) << 11)
118bad87
UD
55#define OPCODE_B(target) (0x48000000 | ((target) & 0x03fffffc))
56#define OPCODE_BA(target) (0x48000002 | ((target) & 0x03fffffc))
052b6a6c
UD
57#define OPCODE_BCTR() 0x4e800420
58#define OPCODE_LWZ(rd,d,ra) \
118bad87 59 (0x80000000 | (rd) << 21 | (ra) << 16 | ((d) & 0xffff))
7137f424
GK
60#define OPCODE_LWZU(rd,d,ra) \
61 (0x84000000 | (rd) << 21 | (ra) << 16 | ((d) & 0xffff))
052b6a6c
UD
62#define OPCODE_MTCTR(rd) (0x7C0903A6 | (rd) << 21)
63#define OPCODE_RLWINM(ra,rs,sh,mb,me) \
64 (0x54000000 | (rs) << 21 | (ra) << 16 | (sh) << 11 | (mb) << 6 | (me) << 1)
65
66#define OPCODE_LI(rd,simm) OPCODE_ADDI(rd,0,simm)
7137f424
GK
67#define OPCODE_ADDIS_HI(rd,ra,value) \
68 OPCODE_ADDIS(rd,ra,((value) + 0x8000) >> 16)
69#define OPCODE_LIS_HI(rd,value) OPCODE_ADDIS_HI(rd,0,value)
052b6a6c
UD
70#define OPCODE_SLWI(ra,rs,sh) OPCODE_RLWINM(ra,rs,sh,0,31-sh)
71
72
2d09b95d
UD
73#define PPC_DCBST(where) asm ("dcbst 0,%0" : : "r"(where) : "memory")
74#define PPC_SYNC asm ("sync" : : : "memory")
75#define PPC_ISYNC asm volatile ("sync; isync" : : : "memory")
76#define PPC_ICBI(where) asm ("icbi 0,%0" : : "r"(where) : "memory")
052b6a6c
UD
77#define PPC_DIE asm volatile ("tweq 0,0")
78
79/* Use this when you've modified some code, but it won't be in the
80 instruction fetch queue (or when it doesn't matter if it is). */
81#define MODIFIED_CODE_NOQUEUE(where) \
82 do { PPC_DCBST(where); PPC_SYNC; PPC_ICBI(where); } while (0)
83/* Use this when it might be in the instruction queue. */
84#define MODIFIED_CODE(where) \
85 do { PPC_DCBST(where); PPC_SYNC; PPC_ICBI(where); PPC_ISYNC; } while (0)
86
87
88/* The idea here is that to conform to the ABI, we are supposed to try
89 to load dynamic objects between 0x10000 (we actually use 0x40000 as
90 the lower bound, to increase the chance of a memory reference from
91 a null pointer giving a segfault) and the program's load address;
92 this may allow us to use a branch instruction in the PLT rather
93 than a computed jump. The address is only used as a preference for
94 mmap, so if we get it wrong the worst that happens is that it gets
95 mapped somewhere else. */
96
97ElfW(Addr)
98__elf_preferred_address(struct link_map *loader, size_t maplength,
99 ElfW(Addr) mapstartpref)
100{
101 ElfW(Addr) low, high;
102 struct link_map *l;
103
104 /* If the object has a preference, load it there! */
105 if (mapstartpref != 0)
106 return mapstartpref;
107
108 /* Otherwise, quickly look for a suitable gap between 0x3FFFF and
109 0x70000000. 0x3FFFF is so that references off NULL pointers will
110 cause a segfault, 0x70000000 is just paranoia (it should always
111 be superceded by the program's load address). */
112 low = 0x0003FFFF;
113 high = 0x70000000;
114 for (l = _dl_loaded; l; l = l->l_next)
115 {
116 ElfW(Addr) mapstart, mapend;
117 mapstart = l->l_map_start & ~(_dl_pagesize - 1);
118 mapend = l->l_map_end | (_dl_pagesize - 1);
119 assert (mapend > mapstart);
120
121 if (mapend >= high && high >= mapstart)
122 high = mapstart;
123 else if (mapend >= low && low >= mapstart)
124 low = mapend;
125 else if (high >= mapend && mapstart >= low)
126 {
127 if (high - mapend >= mapstart - low)
128 low = mapend;
129 else
130 high = mapstart;
131 }
132 }
133
134 high -= 0x10000; /* Allow some room between objects. */
135 maplength = (maplength | (_dl_pagesize-1)) + 1;
136 if (high <= low || high - low < maplength )
137 return 0;
138 return high - maplength; /* Both high and maplength are page-aligned. */
139}
140
141/* Set up the loaded object described by L so its unrelocated PLT
142 entries will jump to the on-demand fixup code in dl-runtime.c.
143 Also install a small trampoline to be used by entries that have
144 been relocated to an address too far away for a single branch. */
145
7137f424
GK
146/* There are many kinds of PLT entries:
147
148 (1) A direct jump to the actual routine, either a relative or
149 absolute branch. These are set up in __elf_machine_fixup_plt.
150
151 (2) Short lazy entries. These cover the first 8192 slots in
152 the PLT, and look like (where 'index' goes from 0 to 8191):
153
154 li %r11, index*4
155 b &plt[PLT_TRAMPOLINE_ENTRY_WORDS+1]
156
157 (3) Short indirect jumps. These replace (2) when a direct jump
158 wouldn't reach. They look the same except that the branch
159 is 'b &plt[PLT_LONGBRANCH_ENTRY_WORDS]'.
160
161 (4) Long lazy entries. These cover the slots when a short entry
162 won't fit ('index*4' overflows its field), and look like:
163
164 lis %r11, %hi(index*4 + &plt[PLT_DATA_START_WORDS])
165 lwzu %r12, %r11, %lo(index*4 + &plt[PLT_DATA_START_WORDS])
166 b &plt[PLT_TRAMPOLINE_ENTRY_WORDS]
167 bctr
168
169 (5) Long indirect jumps. These replace (4) when a direct jump
170 wouldn't reach. They look like:
171
172 lis %r11, %hi(index*4 + &plt[PLT_DATA_START_WORDS])
173 lwz %r12, %r11, %lo(index*4 + &plt[PLT_DATA_START_WORDS])
174 mtctr %r12
175 bctr
176
177 (6) Long direct jumps. These are used when thread-safety is not
178 required. They look like:
179
180 lis %r12, %hi(finaladdr)
181 addi %r12, %r12, %lo(finaladdr)
182 mtctr %r12
183 bctr
184
185
186 The lazy entries, (2) and (4), are set up here in
187 __elf_machine_runtime_setup. (1), (3), and (5) are set up in
188 __elf_machine_fixup_plt. (1), (3), and (6) can also be constructed
189 in __process_machine_rela.
190
191 The reason for the somewhat strange construction of the long
192 entries, (4) and (5), is that we need to ensure thread-safety. For
193 (1) and (3), this is obvious because only one instruction is
194 changed and the PPC architecture guarantees that aligned stores are
195 atomic. For (5), this is more tricky. When changing (4) to (5),
196 the `b' instruction is first changed to to `mtctr'; this is safe
197 and is why the `lwzu' instruction is not just a simple `addi'.
198 Once this is done, and is visible to all processors, the `lwzu' can
199 safely be changed to a `lwz'. */
052b6a6c
UD
200int
201__elf_machine_runtime_setup (struct link_map *map, int lazy, int profile)
202{
203 if (map->l_info[DT_JMPREL])
204 {
205 Elf32_Word i;
f1d34527 206 Elf32_Word *plt = (Elf32_Word *) map->l_info[DT_PLTGOT]->d_un.d_val;
052b6a6c
UD
207 Elf32_Word num_plt_entries = (map->l_info[DT_PLTRELSZ]->d_un.d_val
208 / sizeof (Elf32_Rela));
209 Elf32_Word rel_offset_words = PLT_DATA_START_WORDS (num_plt_entries);
7137f424 210 Elf32_Word data_words = (Elf32_Word) (plt + rel_offset_words);
052b6a6c 211 Elf32_Word size_modified;
7137f424 212
052b6a6c
UD
213 extern void _dl_runtime_resolve (void);
214 extern void _dl_prof_resolve (void);
052b6a6c 215
7137f424
GK
216 /* Convert the index in r11 into an actual address, and get the
217 word at that address. */
218 plt[PLT_LONGBRANCH_ENTRY_WORDS] = OPCODE_ADDIS_HI (11, 11, data_words);
219 plt[PLT_LONGBRANCH_ENTRY_WORDS + 1] = OPCODE_LWZ (11, data_words, 11);
052b6a6c 220
7137f424
GK
221 /* Call the procedure at that address. */
222 plt[PLT_LONGBRANCH_ENTRY_WORDS + 2] = OPCODE_MTCTR (11);
223 plt[PLT_LONGBRANCH_ENTRY_WORDS + 3] = OPCODE_BCTR ();
722c33bb 224
052b6a6c 225 if (lazy)
052b6a6c 226 {
7137f424
GK
227 Elf32_Word *tramp = plt + PLT_TRAMPOLINE_ENTRY_WORDS;
228 Elf32_Word dlrr = (Elf32_Word)(profile
229 ? _dl_prof_resolve
230 : _dl_runtime_resolve);
231 Elf32_Word offset;
232
233 if (profile && _dl_name_match_p (_dl_profile, map))
234 /* This is the object we are looking for. Say that we really
235 want profiling and the timers are started. */
236 _dl_profile_map = map;
237
238 /* For the long entries, subtract off data_words. */
239 tramp[0] = OPCODE_ADDIS_HI (11, 11, -data_words);
240 tramp[1] = OPCODE_ADDI (11, 11, -data_words);
241
242 /* Multiply index of entry by 3 (in r11). */
243 tramp[2] = OPCODE_SLWI (12, 11, 1);
244 tramp[3] = OPCODE_ADD (11, 12, 11);
245 if (dlrr <= 0x01fffffc || dlrr >= 0xfe000000)
052b6a6c 246 {
7137f424
GK
247 /* Load address of link map in r12. */
248 tramp[4] = OPCODE_LI (12, (Elf32_Word) map);
249 tramp[5] = OPCODE_ADDIS_HI (12, 12, (Elf32_Word) map);
250
251 /* Call _dl_runtime_resolve. */
252 tramp[6] = OPCODE_BA (dlrr);
052b6a6c
UD
253 }
254 else
7137f424
GK
255 {
256 /* Get address of _dl_runtime_resolve in CTR. */
257 tramp[4] = OPCODE_LI (12, dlrr);
258 tramp[5] = OPCODE_ADDIS_HI (12, 12, dlrr);
259 tramp[6] = OPCODE_MTCTR (12);
260
261 /* Load address of link map in r12. */
262 tramp[7] = OPCODE_LI (12, (Elf32_Word) map);
263 tramp[8] = OPCODE_ADDIS_HI (12, 12, (Elf32_Word) map);
264
265 /* Call _dl_runtime_resolve. */
266 tramp[9] = OPCODE_BCTR ();
267 }
268
269 /* Set up the lazy PLT entries. */
270 offset = PLT_INITIAL_ENTRY_WORDS;
271 i = 0;
272 while (i < num_plt_entries && i < PLT_DOUBLE_SIZE)
052b6a6c
UD
273 {
274 plt[offset ] = OPCODE_LI (11, i * 4);
7137f424
GK
275 plt[offset+1] = OPCODE_B ((PLT_TRAMPOLINE_ENTRY_WORDS + 2
276 - (offset+1))
277 * 4);
278 i++;
279 offset += 2;
280 }
281 while (i < num_plt_entries)
282 {
283 plt[offset ] = OPCODE_LIS_HI (11, i * 4 + data_words);
284 plt[offset+1] = OPCODE_LWZU (12, i * 4 + data_words, 11);
285 plt[offset+2] = OPCODE_B ((PLT_TRAMPOLINE_ENTRY_WORDS
286 - (offset+2))
287 * 4);
288 plt[offset+3] = OPCODE_BCTR ();
289 i++;
290 offset += 4;
052b6a6c
UD
291 }
292 }
293
7137f424
GK
294 /* Now, we've modified code. We need to write the changes from
295 the data cache to a second-level unified cache, then make
296 sure that stale data in the instruction cache is removed.
297 (In a multiprocessor system, the effect is more complex.)
298 Most of the PLT shouldn't be in the instruction cache, but
299 there may be a little overlap at the start and the end.
052b6a6c 300
7137f424
GK
301 Assumes that dcbst and icbi apply to lines of 16 bytes or
302 more. At present, all PowerPC processors have line sizes of
303 16 or 32 bytes. */
052b6a6c 304
7137f424
GK
305 size_modified = lazy ? rel_offset_words : 6;
306 for (i = 0; i < size_modified; i += 4)
052b6a6c 307 PPC_DCBST (plt + i);
f1d34527 308 PPC_DCBST (plt + size_modified - 1);
052b6a6c 309 PPC_SYNC;
2d09b95d 310 PPC_ICBI (plt);
7137f424 311 PPC_ICBI (plt + size_modified - 1);
052b6a6c
UD
312 PPC_ISYNC;
313 }
314
315 return lazy;
316}
317
318void
319__elf_machine_fixup_plt(struct link_map *map, const Elf32_Rela *reloc,
320 Elf32_Addr *reloc_addr, Elf32_Addr finaladdr)
321{
7137f424 322 Elf32_Sword delta = finaladdr - (Elf32_Word) reloc_addr;
052b6a6c
UD
323 if (delta << 6 >> 6 == delta)
324 *reloc_addr = OPCODE_B (delta);
325 else if (finaladdr <= 0x01fffffc || finaladdr >= 0xfe000000)
326 *reloc_addr = OPCODE_BA (finaladdr);
327 else
328 {
7137f424
GK
329 Elf32_Word *plt, *data_words;
330 Elf32_Word index, offset, num_plt_entries;
331
332 num_plt_entries = (map->l_info[DT_PLTRELSZ]->d_un.d_val
333 / sizeof(Elf32_Rela));
f1d34527 334 plt = (Elf32_Word *) map->l_info[DT_PLTGOT]->d_un.d_val;
7137f424
GK
335 offset = reloc_addr - plt;
336 index = (offset - PLT_INITIAL_ENTRY_WORDS)/2;
337 data_words = plt + PLT_DATA_START_WORDS (num_plt_entries);
338
339 reloc_addr += 1;
340
341 if (index < PLT_DOUBLE_SIZE)
052b6a6c 342 {
7137f424
GK
343 data_words[index] = finaladdr;
344 PPC_SYNC;
345 *reloc_addr = OPCODE_B ((PLT_LONGBRANCH_ENTRY_WORDS - (offset+1))
346 * 4);
052b6a6c
UD
347 }
348 else
349 {
7137f424
GK
350 index -= (index - PLT_DOUBLE_SIZE)/2;
351
352 data_words[index] = finaladdr;
353 PPC_SYNC;
354
355 reloc_addr[1] = OPCODE_MTCTR (12);
356 MODIFIED_CODE_NOQUEUE (reloc_addr + 1);
357 PPC_SYNC;
358
359 reloc_addr[0] = OPCODE_LWZ (12,
360 (Elf32_Word) (data_words + index), 11);
052b6a6c
UD
361 }
362 }
363 MODIFIED_CODE (reloc_addr);
364}
365
366void
367__process_machine_rela (struct link_map *map,
368 const Elf32_Rela *reloc,
369 const Elf32_Sym *sym,
370 const Elf32_Sym *refsym,
371 Elf32_Addr *const reloc_addr,
372 Elf32_Addr const finaladdr,
373 int rinfo)
374{
375 switch (rinfo)
376 {
377 case R_PPC_NONE:
378 return;
379
380 case R_PPC_ADDR32:
381 case R_PPC_UADDR32:
382 case R_PPC_GLOB_DAT:
383 case R_PPC_RELATIVE:
384 *reloc_addr = finaladdr;
385 return;
386
387 case R_PPC_ADDR24:
388 if (finaladdr > 0x01fffffc && finaladdr < 0xfe000000)
f1d34527
UD
389 _dl_signal_error (0, map->l_name,
390 "R_PPC_ADDR24 relocation out of range");
118bad87 391 *reloc_addr = (*reloc_addr & 0xfc000003) | (finaladdr & 0x3fffffc);
052b6a6c
UD
392 break;
393
394 case R_PPC_ADDR16:
395 case R_PPC_UADDR16:
396 if (finaladdr > 0x7fff && finaladdr < 0x8000)
f1d34527
UD
397 _dl_signal_error (0, map->l_name,
398 "R_PPC_ADDR16 relocation out of range");
052b6a6c
UD
399 *(Elf32_Half*) reloc_addr = finaladdr;
400 break;
401
402 case R_PPC_ADDR16_LO:
403 *(Elf32_Half*) reloc_addr = finaladdr;
404 break;
405
406 case R_PPC_ADDR16_HI:
407 *(Elf32_Half*) reloc_addr = finaladdr >> 16;
408 break;
409
410 case R_PPC_ADDR16_HA:
411 *(Elf32_Half*) reloc_addr = (finaladdr + 0x8000) >> 16;
412 break;
413
414 case R_PPC_ADDR14:
415 case R_PPC_ADDR14_BRTAKEN:
416 case R_PPC_ADDR14_BRNTAKEN:
417 if (finaladdr > 0x7fff && finaladdr < 0x8000)
f1d34527
UD
418 _dl_signal_error (0, map->l_name,
419 "R_PPC_ADDR14 relocation out of range");
118bad87 420 *reloc_addr = (*reloc_addr & 0xffff0003) | (finaladdr & 0xfffc);
052b6a6c 421 if (rinfo != R_PPC_ADDR14)
118bad87
UD
422 *reloc_addr = ((*reloc_addr & 0xffdfffff)
423 | ((rinfo == R_PPC_ADDR14_BRTAKEN)
424 ^ (finaladdr >> 31)) << 21);
052b6a6c
UD
425 break;
426
427 case R_PPC_REL24:
428 {
7137f424 429 Elf32_Sword delta = finaladdr - (Elf32_Word) reloc_addr;
052b6a6c 430 if (delta << 6 >> 6 != delta)
f1d34527
UD
431 _dl_signal_error (0, map->l_name,
432 "R_PPC_REL24 relocation out of range");
118bad87 433 *reloc_addr = (*reloc_addr & 0xfc000003) | (delta & 0x3fffffc);
052b6a6c
UD
434 }
435 break;
436
437 case R_PPC_COPY:
438 if (sym == NULL)
439 /* This can happen in trace mode when an object could not be
440 found. */
441 return;
442 if (sym->st_size > refsym->st_size
443 || (_dl_verbose && sym->st_size < refsym->st_size))
444 {
445 const char *strtab;
446
f420344c 447 strtab = (const void *) map->l_info[DT_STRTAB]->d_un.d_ptr;
052b6a6c
UD
448 _dl_sysdep_error (_dl_argv[0] ?: "<program name unknown>",
449 ": Symbol `", strtab + refsym->st_name,
450 "' has different size in shared object, "
451 "consider re-linking\n", NULL);
452 }
453 memcpy (reloc_addr, (char *) finaladdr, MIN (sym->st_size,
454 refsym->st_size));
455 return;
456
457 case R_PPC_REL32:
7137f424 458 *reloc_addr = finaladdr - (Elf32_Word) reloc_addr;
052b6a6c
UD
459 return;
460
461 case R_PPC_JMP_SLOT:
7137f424
GK
462 /* It used to be that elf_machine_fixup_plt was used here,
463 but that doesn't work when ld.so relocates itself
464 for the second time. On the bright side, there's
465 no need to worry about thread-safety here. */
466 {
467 Elf32_Sword delta = finaladdr - (Elf32_Word) reloc_addr;
468 if (delta << 6 >> 6 == delta)
469 *reloc_addr = OPCODE_B (delta);
470 else if (finaladdr <= 0x01fffffc || finaladdr >= 0xfe000000)
471 *reloc_addr = OPCODE_BA (finaladdr);
472 else
473 {
474 Elf32_Word *plt, *data_words;
475 Elf32_Word index, offset, num_plt_entries;
476
477 plt = (Elf32_Word *) map->l_info[DT_PLTGOT]->d_un.d_val;
478 offset = reloc_addr - plt;
479
480 if (offset < PLT_DOUBLE_SIZE*2 + PLT_INITIAL_ENTRY_WORDS)
481 {
482 index = (offset - PLT_INITIAL_ENTRY_WORDS)/2;
483 num_plt_entries = (map->l_info[DT_PLTRELSZ]->d_un.d_val
484 / sizeof(Elf32_Rela));
485 data_words = plt + PLT_DATA_START_WORDS (num_plt_entries);
486 data_words[index] = finaladdr;
487 reloc_addr[0] = OPCODE_LI (11, index * 4);
488 reloc_addr[1] = OPCODE_B ((PLT_LONGBRANCH_ENTRY_WORDS
489 - (offset+1))
490 * 4);
491 MODIFIED_CODE_NOQUEUE (reloc_addr + 1);
492 }
493 else
494 {
495 reloc_addr[0] = OPCODE_LIS_HI (12, finaladdr);
496 reloc_addr[1] = OPCODE_ADDI (12, 12, finaladdr);
497 reloc_addr[2] = OPCODE_MTCTR (12);
498 reloc_addr[3] = OPCODE_BCTR ();
499 MODIFIED_CODE_NOQUEUE (reloc_addr + 3);
500 }
501 }
502 }
503 break;
052b6a6c
UD
504
505 default:
421c80d2 506 _dl_reloc_bad_type (map, rinfo, 0);
052b6a6c
UD
507 return;
508 }
509
510 MODIFIED_CODE_NOQUEUE (reloc_addr);
511}