]>
Commit | Line | Data |
---|---|---|
9dcafc55 | 1 | /* PLT trampolines. x86-64 version. |
f7a9f785 | 2 | Copyright (C) 2004-2016 Free Software Foundation, Inc. |
9dcafc55 UD |
3 | This file is part of the GNU C Library. |
4 | ||
5 | The GNU C Library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Lesser General Public | |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
9 | ||
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
59ba27a6 PE |
16 | License along with the GNU C Library; if not, see |
17 | <http://www.gnu.org/licenses/>. */ | |
9dcafc55 | 18 | |
b0ecde3a | 19 | #include <config.h> |
9dcafc55 | 20 | #include <sysdep.h> |
b0ecde3a | 21 | #include <link-defines.h> |
9dcafc55 | 22 | |
f3dcae82 L |
23 | #ifndef DL_STACK_ALIGNMENT |
24 | /* Due to GCC bug: | |
25 | ||
26 | https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066 | |
27 | ||
28 | __tls_get_addr may be called with 8-byte stack alignment. Although | |
29 | this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume | |
30 | that stack will be always aligned at 16 bytes. We use unaligned | |
31 | 16-byte move to load and store SSE registers, which has no penalty | |
32 | on modern processors if stack is 16-byte aligned. */ | |
33 | # define DL_STACK_ALIGNMENT 8 | |
34 | #endif | |
35 | ||
36 | #ifndef DL_RUNIME_UNALIGNED_VEC_SIZE | |
37 | /* The maximum size of unaligned vector load and store. */ | |
38 | # define DL_RUNIME_UNALIGNED_VEC_SIZE 16 | |
1cf463cd L |
39 | #endif |
40 | ||
f3dcae82 L |
41 | /* True if _dl_runtime_resolve should align stack to VEC_SIZE bytes. */ |
42 | #define DL_RUNIME_RESOLVE_REALIGN_STACK \ | |
43 | (VEC_SIZE > DL_STACK_ALIGNMENT \ | |
44 | && VEC_SIZE > DL_RUNIME_UNALIGNED_VEC_SIZE) | |
45 | ||
46 | /* Align vector register save area to 16 bytes. */ | |
47 | #define REGISTER_SAVE_VEC_OFF 0 | |
48 | ||
a4c75cfd IZ |
49 | /* Area on stack to save and restore registers used for parameter |
50 | passing when calling _dl_fixup. */ | |
51 | #ifdef __ILP32__ | |
f3dcae82 | 52 | # define REGISTER_SAVE_RAX (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 8) |
b97eb2bd | 53 | # define PRESERVE_BND_REGS_PREFIX |
a4c75cfd | 54 | #else |
a4c75cfd | 55 | /* Align bound register save area to 16 bytes. */ |
f3dcae82 | 56 | # define REGISTER_SAVE_BND0 (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 8) |
a4c75cfd IZ |
57 | # define REGISTER_SAVE_BND1 (REGISTER_SAVE_BND0 + 16) |
58 | # define REGISTER_SAVE_BND2 (REGISTER_SAVE_BND1 + 16) | |
59 | # define REGISTER_SAVE_BND3 (REGISTER_SAVE_BND2 + 16) | |
60 | # define REGISTER_SAVE_RAX (REGISTER_SAVE_BND3 + 16) | |
b97eb2bd L |
61 | # ifdef HAVE_MPX_SUPPORT |
62 | # define PRESERVE_BND_REGS_PREFIX bnd | |
63 | # else | |
64 | # define PRESERVE_BND_REGS_PREFIX .byte 0xf2 | |
65 | # endif | |
a4c75cfd IZ |
66 | #endif |
67 | #define REGISTER_SAVE_RCX (REGISTER_SAVE_RAX + 8) | |
68 | #define REGISTER_SAVE_RDX (REGISTER_SAVE_RCX + 8) | |
69 | #define REGISTER_SAVE_RSI (REGISTER_SAVE_RDX + 8) | |
70 | #define REGISTER_SAVE_RDI (REGISTER_SAVE_RSI + 8) | |
71 | #define REGISTER_SAVE_R8 (REGISTER_SAVE_RDI + 8) | |
72 | #define REGISTER_SAVE_R9 (REGISTER_SAVE_R8 + 8) | |
73 | ||
0a5768fe L |
74 | #define RESTORE_AVX |
75 | ||
76 | #ifdef HAVE_AVX512_ASM_SUPPORT | |
77 | # define VEC_SIZE 64 | |
78 | # define VMOVA vmovdqa64 | |
79 | # if DL_RUNIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT | |
80 | # define VMOV vmovdqa64 | |
81 | # else | |
82 | # define VMOV vmovdqu64 | |
83 | # endif | |
84 | # define VEC(i) zmm##i | |
85 | # define _dl_runtime_resolve _dl_runtime_resolve_avx512 | |
86 | # define _dl_runtime_profile _dl_runtime_profile_avx512 | |
87 | # include "dl-trampoline.h" | |
88 | # undef _dl_runtime_resolve | |
89 | # undef _dl_runtime_profile | |
90 | # undef VEC | |
91 | # undef VMOV | |
92 | # undef VMOVA | |
93 | # undef VEC_SIZE | |
f3dcae82 | 94 | #else |
0a5768fe L |
95 | strong_alias (_dl_runtime_resolve_avx, _dl_runtime_resolve_avx512) |
96 | .hidden _dl_runtime_resolve_avx512 | |
97 | strong_alias (_dl_runtime_profile_avx, _dl_runtime_profile_avx512) | |
98 | .hidden _dl_runtime_profile_avx512 | |
a4c75cfd | 99 | #endif |
f3dcae82 L |
100 | |
101 | #define VEC_SIZE 32 | |
102 | #define VMOVA vmovdqa | |
103 | #if DL_RUNIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT | |
104 | # define VMOV vmovdqa | |
105 | #else | |
106 | # define VMOV vmovdqu | |
9f0d7b6d | 107 | #endif |
f3dcae82 L |
108 | #define VEC(i) ymm##i |
109 | #define _dl_runtime_resolve _dl_runtime_resolve_avx | |
110 | #define _dl_runtime_profile _dl_runtime_profile_avx | |
111 | #include "dl-trampoline.h" | |
112 | #undef _dl_runtime_resolve | |
113 | #undef _dl_runtime_profile | |
114 | #undef VEC | |
115 | #undef VMOV | |
116 | #undef VMOVA | |
117 | #undef VEC_SIZE | |
118 | ||
119 | /* movaps/movups is 1-byte shorter. */ | |
120 | #define VEC_SIZE 16 | |
121 | #define VMOVA movaps | |
122 | #if DL_RUNIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT | |
123 | # define VMOV movaps | |
124 | #else | |
125 | # define VMOV movups | |
b48a267b | 126 | #endif |
f3dcae82 L |
127 | #define VEC(i) xmm##i |
128 | #define _dl_runtime_resolve _dl_runtime_resolve_sse | |
129 | #define _dl_runtime_profile _dl_runtime_profile_sse | |
130 | #undef RESTORE_AVX | |
131 | #include "dl-trampoline.h" |