]>
Commit | Line | Data |
---|---|---|
4e1e2f42 L |
1 | /* Partial PLT profile trampoline to save and restore x86-64 vector |
2 | registers. | |
c88f1766 | 3 | Copyright (C) 2009, 2011 Free Software Foundation, Inc. |
4e1e2f42 L |
4 | This file is part of the GNU C Library. |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
7 | modify it under the terms of the GNU Lesser General Public | |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
10 | ||
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Lesser General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Lesser General Public | |
59ba27a6 PE |
17 | License along with the GNU C Library; if not, see |
18 | <http://www.gnu.org/licenses/>. */ | |
4e1e2f42 L |
19 | |
20 | #ifdef RESTORE_AVX | |
21 | /* This is to support AVX audit modules. */ | |
22 | vmovdqu %ymm0, (LR_VECTOR_OFFSET)(%rsp) | |
23 | vmovdqu %ymm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp) | |
24 | vmovdqu %ymm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp) | |
25 | vmovdqu %ymm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp) | |
26 | vmovdqu %ymm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp) | |
27 | vmovdqu %ymm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp) | |
28 | vmovdqu %ymm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp) | |
29 | vmovdqu %ymm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp) | |
30 | ||
31 | /* Save xmm0-xmm7 registers to detect if any of them are | |
32 | changed by audit module. */ | |
33 | vmovdqa %xmm0, (LR_SIZE)(%rsp) | |
34 | vmovdqa %xmm1, (LR_SIZE + XMM_SIZE)(%rsp) | |
35 | vmovdqa %xmm2, (LR_SIZE + XMM_SIZE*2)(%rsp) | |
36 | vmovdqa %xmm3, (LR_SIZE + XMM_SIZE*3)(%rsp) | |
37 | vmovdqa %xmm4, (LR_SIZE + XMM_SIZE*4)(%rsp) | |
38 | vmovdqa %xmm5, (LR_SIZE + XMM_SIZE*5)(%rsp) | |
39 | vmovdqa %xmm6, (LR_SIZE + XMM_SIZE*6)(%rsp) | |
40 | vmovdqa %xmm7, (LR_SIZE + XMM_SIZE*7)(%rsp) | |
41 | #endif | |
42 | ||
43 | movq %rsp, %rcx # La_x86_64_regs pointer to %rcx. | |
44 | movq 48(%rbx), %rdx # Load return address if needed. | |
45 | movq 40(%rbx), %rsi # Copy args pushed by PLT in register. | |
46 | movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index | |
47 | leaq 16(%rbx), %r8 | |
48 | call _dl_profile_fixup # Call resolver. | |
49 | ||
50 | movq %rax, %r11 # Save return value. | |
51 | ||
52 | movq 8(%rbx), %rax # Get back register content. | |
53 | movq LR_RDX_OFFSET(%rsp), %rdx | |
54 | movq LR_R8_OFFSET(%rsp), %r8 | |
55 | movq LR_R9_OFFSET(%rsp), %r9 | |
56 | ||
57 | movaps (LR_XMM_OFFSET)(%rsp), %xmm0 | |
58 | movaps (LR_XMM_OFFSET + XMM_SIZE)(%rsp), %xmm1 | |
59 | movaps (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp), %xmm2 | |
60 | movaps (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp), %xmm3 | |
61 | movaps (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp), %xmm4 | |
62 | movaps (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp), %xmm5 | |
63 | movaps (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp), %xmm6 | |
64 | movaps (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp), %xmm7 | |
65 | ||
66 | #ifdef RESTORE_AVX | |
67 | /* Check if any xmm0-xmm7 registers are changed by audit | |
68 | module. */ | |
69 | vpcmpeqq (LR_SIZE)(%rsp), %xmm0, %xmm8 | |
70 | vpmovmskb %xmm8, %esi | |
71 | cmpl $0xffff, %esi | |
72 | je 2f | |
73 | vmovdqa %xmm0, (LR_VECTOR_OFFSET)(%rsp) | |
74 | jmp 1f | |
75 | 2: vmovdqu (LR_VECTOR_OFFSET)(%rsp), %ymm0 | |
76 | vmovdqa %xmm0, (LR_XMM_OFFSET)(%rsp) | |
77 | ||
78 | 1: vpcmpeqq (LR_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm8 | |
79 | vpmovmskb %xmm8, %esi | |
80 | cmpl $0xffff, %esi | |
81 | je 2f | |
82 | vmovdqa %xmm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp) | |
83 | jmp 1f | |
84 | 2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %ymm1 | |
85 | vmovdqa %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp) | |
86 | ||
87 | 1: vpcmpeqq (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm2, %xmm8 | |
88 | vpmovmskb %xmm8, %esi | |
89 | cmpl $0xffff, %esi | |
90 | je 2f | |
91 | vmovdqa %xmm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp) | |
92 | jmp 1f | |
93 | 2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %ymm2 | |
94 | vmovdqa %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp) | |
95 | ||
96 | 1: vpcmpeqq (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm3, %xmm8 | |
97 | vpmovmskb %xmm8, %esi | |
98 | cmpl $0xffff, %esi | |
99 | je 2f | |
100 | vmovdqa %xmm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp) | |
101 | jmp 1f | |
102 | 2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %ymm3 | |
103 | vmovdqa %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp) | |
104 | ||
105 | 1: vpcmpeqq (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm4, %xmm8 | |
106 | vpmovmskb %xmm8, %esi | |
107 | cmpl $0xffff, %esi | |
108 | je 2f | |
109 | vmovdqa %xmm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp) | |
110 | jmp 1f | |
111 | 2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %ymm4 | |
112 | vmovdqa %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp) | |
113 | ||
114 | 1: vpcmpeqq (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm5, %xmm8 | |
115 | vpmovmskb %xmm8, %esi | |
116 | cmpl $0xffff, %esi | |
117 | je 2f | |
118 | vmovdqa %xmm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp) | |
119 | jmp 1f | |
120 | 2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %ymm5 | |
121 | vmovdqa %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp) | |
122 | ||
123 | 1: vpcmpeqq (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm6, %xmm8 | |
124 | vpmovmskb %xmm8, %esi | |
125 | cmpl $0xffff, %esi | |
126 | je 2f | |
127 | vmovdqa %xmm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp) | |
128 | jmp 1f | |
129 | 2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %ymm6 | |
130 | vmovdqa %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp) | |
131 | ||
132 | 1: vpcmpeqq (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm7, %xmm8 | |
133 | vpmovmskb %xmm8, %esi | |
134 | cmpl $0xffff, %esi | |
135 | je 2f | |
136 | vmovdqa %xmm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp) | |
137 | jmp 1f | |
138 | 2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %ymm7 | |
139 | vmovdqa %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp) | |
140 | ||
141 | 1: | |
142 | #endif | |
143 | movq 16(%rbx), %r10 # Anything in framesize? | |
144 | testq %r10, %r10 | |
145 | jns 3f | |
146 | ||
147 | /* There's nothing in the frame size, so there | |
148 | will be no call to the _dl_call_pltexit. */ | |
149 | ||
150 | /* Get back registers content. */ | |
151 | movq LR_RCX_OFFSET(%rsp), %rcx | |
152 | movq LR_RSI_OFFSET(%rsp), %rsi | |
153 | movq LR_RDI_OFFSET(%rsp), %rdi | |
154 | ||
155 | movq %rbx, %rsp | |
156 | movq (%rsp), %rbx | |
157 | cfi_restore(rbx) | |
158 | cfi_def_cfa_register(%rsp) | |
159 | ||
160 | addq $48, %rsp # Adjust the stack to the return value | |
161 | # (eats the reloc index and link_map) | |
162 | cfi_adjust_cfa_offset(-48) | |
163 | jmp *%r11 # Jump to function address. | |
164 | ||
165 | 3: | |
166 | cfi_adjust_cfa_offset(48) | |
167 | cfi_rel_offset(%rbx, 0) | |
168 | cfi_def_cfa_register(%rbx) | |
169 | ||
170 | /* At this point we need to prepare new stack for the function | |
171 | which has to be called. We copy the original stack to a | |
172 | temporary buffer of the size specified by the 'framesize' | |
173 | returned from _dl_profile_fixup */ | |
174 | ||
175 | leaq LR_RSP_OFFSET(%rbx), %rsi # stack | |
176 | addq $8, %r10 | |
177 | andq $0xfffffffffffffff0, %r10 | |
178 | movq %r10, %rcx | |
179 | subq %r10, %rsp | |
180 | movq %rsp, %rdi | |
181 | shrq $3, %rcx | |
182 | rep | |
183 | movsq | |
184 | ||
185 | movq 24(%rdi), %rcx # Get back register content. | |
186 | movq 32(%rdi), %rsi | |
187 | movq 40(%rdi), %rdi | |
188 | ||
189 | call *%r11 | |
190 | ||
191 | mov 24(%rbx), %rsp # Drop the copied stack content | |
192 | ||
193 | /* Now we have to prepare the La_x86_64_retval structure for the | |
194 | _dl_call_pltexit. The La_x86_64_regs is being pointed by rsp now, | |
195 | so we just need to allocate the sizeof(La_x86_64_retval) space on | |
196 | the stack, since the alignment has already been taken care of. */ | |
c88f1766 | 197 | #ifdef RESTORE_AVX |
4e1e2f42 L |
198 | /* sizeof(La_x86_64_retval). Need extra space for 2 SSE |
199 | registers to detect if xmm0/xmm1 registers are changed | |
200 | by audit module. */ | |
201 | subq $(LRV_SIZE + XMM_SIZE*2), %rsp | |
c88f1766 | 202 | #else |
4e1e2f42 | 203 | subq $LRV_SIZE, %rsp # sizeof(La_x86_64_retval) |
c88f1766 | 204 | #endif |
4e1e2f42 L |
205 | movq %rsp, %rcx # La_x86_64_retval argument to %rcx. |
206 | ||
207 | /* Fill in the La_x86_64_retval structure. */ | |
208 | movq %rax, LRV_RAX_OFFSET(%rcx) | |
209 | movq %rdx, LRV_RDX_OFFSET(%rcx) | |
210 | ||
211 | movaps %xmm0, LRV_XMM0_OFFSET(%rcx) | |
212 | movaps %xmm1, LRV_XMM1_OFFSET(%rcx) | |
213 | ||
c88f1766 | 214 | #ifdef RESTORE_AVX |
4e1e2f42 L |
215 | /* This is to support AVX audit modules. */ |
216 | vmovdqu %ymm0, LRV_VECTOR0_OFFSET(%rcx) | |
217 | vmovdqu %ymm1, LRV_VECTOR1_OFFSET(%rcx) | |
218 | ||
219 | /* Save xmm0/xmm1 registers to detect if they are changed | |
220 | by audit module. */ | |
221 | vmovdqa %xmm0, (LRV_SIZE)(%rcx) | |
222 | vmovdqa %xmm1, (LRV_SIZE + XMM_SIZE)(%rcx) | |
c88f1766 | 223 | #endif |
4e1e2f42 L |
224 | |
225 | fstpt LRV_ST0_OFFSET(%rcx) | |
226 | fstpt LRV_ST1_OFFSET(%rcx) | |
227 | ||
228 | movq 24(%rbx), %rdx # La_x86_64_regs argument to %rdx. | |
229 | movq 40(%rbx), %rsi # Copy args pushed by PLT in register. | |
c88f1766 | 230 | movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index |
4e1e2f42 L |
231 | call _dl_call_pltexit |
232 | ||
233 | /* Restore return registers. */ | |
234 | movq LRV_RAX_OFFSET(%rsp), %rax | |
235 | movq LRV_RDX_OFFSET(%rsp), %rdx | |
236 | ||
237 | movaps LRV_XMM0_OFFSET(%rsp), %xmm0 | |
238 | movaps LRV_XMM1_OFFSET(%rsp), %xmm1 | |
239 | ||
c88f1766 | 240 | #ifdef RESTORE_AVX |
4e1e2f42 L |
241 | /* Check if xmm0/xmm1 registers are changed by audit module. */ |
242 | vpcmpeqq (LRV_SIZE)(%rsp), %xmm0, %xmm2 | |
243 | vpmovmskb %xmm2, %esi | |
244 | cmpl $0xffff, %esi | |
245 | jne 1f | |
246 | vmovdqu LRV_VECTOR0_OFFSET(%rsp), %ymm0 | |
247 | ||
248 | 1: vpcmpeqq (LRV_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2 | |
249 | vpmovmskb %xmm2, %esi | |
250 | cmpl $0xffff, %esi | |
251 | jne 1f | |
252 | vmovdqu LRV_VECTOR1_OFFSET(%rsp), %ymm1 | |
253 | ||
254 | 1: | |
c88f1766 | 255 | #endif |
4e1e2f42 L |
256 | |
257 | fldt LRV_ST1_OFFSET(%rsp) | |
258 | fldt LRV_ST0_OFFSET(%rsp) | |
259 | ||
260 | movq %rbx, %rsp | |
261 | movq (%rsp), %rbx | |
262 | cfi_restore(rbx) | |
263 | cfi_def_cfa_register(%rsp) | |
264 | ||
265 | addq $48, %rsp # Adjust the stack to the return value | |
266 | # (eats the reloc index and link_map) | |
267 | cfi_adjust_cfa_offset(-48) | |
268 | retq | |
c88f1766 UD |
269 | |
270 | #ifdef MORE_CODE | |
271 | cfi_adjust_cfa_offset(48) | |
272 | cfi_rel_offset(%rbx, 0) | |
273 | cfi_def_cfa_register(%rbx) | |
274 | # undef MORE_CODE | |
275 | #endif |