]>
Commit | Line | Data |
---|---|---|
21933112 | 1 | /* Wrapper implementations of vector math functions. |
d614a753 | 2 | Copyright (C) 2014-2020 Free Software Foundation, Inc. |
21933112 AS |
3 | This file is part of the GNU C Library. |
4 | ||
5 | The GNU C Library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Lesser General Public | |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
9 | ||
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
16 | License along with the GNU C Library; if not, see | |
5a82c748 | 17 | <https://www.gnu.org/licenses/>. */ |
21933112 AS |
18 | |
19 | /* SSE2 ISA version as wrapper to scalar. */ | |
20 | .macro WRAPPER_IMPL_SSE2 callee | |
21 | subq $40, %rsp | |
22 | cfi_adjust_cfa_offset(40) | |
23 | movaps %xmm0, (%rsp) | |
86ed8882 | 24 | call JUMPTARGET(\callee) |
21933112 AS |
25 | movsd %xmm0, 16(%rsp) |
26 | movsd 8(%rsp), %xmm0 | |
86ed8882 | 27 | call JUMPTARGET(\callee) |
21933112 AS |
28 | movsd 16(%rsp), %xmm1 |
29 | movsd %xmm0, 24(%rsp) | |
30 | unpcklpd %xmm0, %xmm1 | |
31 | movaps %xmm1, %xmm0 | |
32 | addq $40, %rsp | |
33 | cfi_adjust_cfa_offset(-40) | |
34 | ret | |
35 | .endm | |
36 | ||
c10b9b13 AS |
37 | /* 2 argument SSE2 ISA version as wrapper to scalar. */ |
38 | .macro WRAPPER_IMPL_SSE2_ff callee | |
39 | subq $56, %rsp | |
40 | cfi_adjust_cfa_offset(56) | |
41 | movaps %xmm0, (%rsp) | |
42 | movaps %xmm1, 16(%rsp) | |
86ed8882 | 43 | call JUMPTARGET(\callee) |
c10b9b13 AS |
44 | movsd %xmm0, 32(%rsp) |
45 | movsd 8(%rsp), %xmm0 | |
46 | movsd 24(%rsp), %xmm1 | |
86ed8882 | 47 | call JUMPTARGET(\callee) |
c10b9b13 AS |
48 | movsd 32(%rsp), %xmm1 |
49 | movsd %xmm0, 40(%rsp) | |
50 | unpcklpd %xmm0, %xmm1 | |
51 | movaps %xmm1, %xmm0 | |
52 | addq $56, %rsp | |
53 | cfi_adjust_cfa_offset(-56) | |
54 | ret | |
55 | .endm | |
56 | ||
c9a8c526 AS |
57 | /* 3 argument SSE2 ISA version as wrapper to scalar. */ |
58 | .macro WRAPPER_IMPL_SSE2_fFF callee | |
59 | pushq %rbp | |
60 | cfi_adjust_cfa_offset (8) | |
61 | cfi_rel_offset (%rbp, 0) | |
62 | pushq %rbx | |
63 | cfi_adjust_cfa_offset (8) | |
64 | cfi_rel_offset (%rbx, 0) | |
65 | movq %rdi, %rbp | |
66 | movq %rsi, %rbx | |
67 | subq $40, %rsp | |
68 | cfi_adjust_cfa_offset(40) | |
69 | leaq 16(%rsp), %rsi | |
70 | leaq 24(%rsp), %rdi | |
71 | movaps %xmm0, (%rsp) | |
86ed8882 | 72 | call JUMPTARGET(\callee) |
c9a8c526 AS |
73 | leaq 16(%rsp), %rsi |
74 | leaq 24(%rsp), %rdi | |
75 | movsd 24(%rsp), %xmm0 | |
76 | movapd (%rsp), %xmm1 | |
77 | movsd %xmm0, 0(%rbp) | |
78 | unpckhpd %xmm1, %xmm1 | |
79 | movsd 16(%rsp), %xmm0 | |
80 | movsd %xmm0, (%rbx) | |
81 | movapd %xmm1, %xmm0 | |
86ed8882 | 82 | call JUMPTARGET(\callee) |
c9a8c526 AS |
83 | movsd 24(%rsp), %xmm0 |
84 | movsd %xmm0, 8(%rbp) | |
85 | movsd 16(%rsp), %xmm0 | |
86 | movsd %xmm0, 8(%rbx) | |
87 | addq $40, %rsp | |
88 | cfi_adjust_cfa_offset(-40) | |
89 | popq %rbx | |
90 | cfi_adjust_cfa_offset (-8) | |
91 | cfi_restore (%rbx) | |
92 | popq %rbp | |
93 | cfi_adjust_cfa_offset (-8) | |
94 | cfi_restore (%rbp) | |
95 | ret | |
96 | .endm | |
97 | ||
21933112 AS |
98 | /* AVX/AVX2 ISA version as wrapper to SSE ISA version. */ |
99 | .macro WRAPPER_IMPL_AVX callee | |
100 | pushq %rbp | |
101 | cfi_adjust_cfa_offset (8) | |
102 | cfi_rel_offset (%rbp, 0) | |
103 | movq %rsp, %rbp | |
104 | cfi_def_cfa_register (%rbp) | |
105 | andq $-32, %rsp | |
106 | subq $32, %rsp | |
107 | vextractf128 $1, %ymm0, (%rsp) | |
108 | vzeroupper | |
109 | call HIDDEN_JUMPTARGET(\callee) | |
110 | vmovapd %xmm0, 16(%rsp) | |
111 | vmovaps (%rsp), %xmm0 | |
112 | call HIDDEN_JUMPTARGET(\callee) | |
113 | vmovapd %xmm0, %xmm1 | |
114 | vmovapd 16(%rsp), %xmm0 | |
115 | vinsertf128 $1, %xmm1, %ymm0, %ymm0 | |
116 | movq %rbp, %rsp | |
117 | cfi_def_cfa_register (%rsp) | |
118 | popq %rbp | |
119 | cfi_adjust_cfa_offset (-8) | |
120 | cfi_restore (%rbp) | |
121 | ret | |
122 | .endm | |
123 | ||
c10b9b13 AS |
124 | /* 2 argument AVX/AVX2 ISA version as wrapper to SSE ISA version. */ |
125 | .macro WRAPPER_IMPL_AVX_ff callee | |
126 | pushq %rbp | |
127 | cfi_adjust_cfa_offset (8) | |
128 | cfi_rel_offset (%rbp, 0) | |
129 | movq %rsp, %rbp | |
130 | cfi_def_cfa_register (%rbp) | |
131 | andq $-32, %rsp | |
132 | subq $64, %rsp | |
133 | vextractf128 $1, %ymm0, 16(%rsp) | |
134 | vextractf128 $1, %ymm1, (%rsp) | |
135 | vzeroupper | |
136 | call HIDDEN_JUMPTARGET(\callee) | |
137 | vmovaps %xmm0, 32(%rsp) | |
138 | vmovaps 16(%rsp), %xmm0 | |
139 | vmovaps (%rsp), %xmm1 | |
140 | call HIDDEN_JUMPTARGET(\callee) | |
141 | vmovaps %xmm0, %xmm1 | |
142 | vmovaps 32(%rsp), %xmm0 | |
143 | vinsertf128 $1, %xmm1, %ymm0, %ymm0 | |
144 | movq %rbp, %rsp | |
145 | cfi_def_cfa_register (%rsp) | |
146 | popq %rbp | |
147 | cfi_adjust_cfa_offset (-8) | |
148 | cfi_restore (%rbp) | |
149 | ret | |
150 | .endm | |
151 | ||
c9a8c526 AS |
152 | /* 3 argument AVX/AVX2 ISA version as wrapper to SSE ISA version. */ |
153 | .macro WRAPPER_IMPL_AVX_fFF callee | |
154 | pushq %rbp | |
155 | cfi_adjust_cfa_offset (8) | |
156 | cfi_rel_offset (%rbp, 0) | |
157 | movq %rsp, %rbp | |
158 | cfi_def_cfa_register (%rbp) | |
159 | andq $-32, %rsp | |
160 | pushq %r13 | |
161 | cfi_adjust_cfa_offset (8) | |
162 | cfi_rel_offset (%r13, 0) | |
163 | pushq %r14 | |
164 | cfi_adjust_cfa_offset (8) | |
165 | cfi_rel_offset (%r14, 0) | |
166 | subq $48, %rsp | |
167 | movq %rsi, %r14 | |
168 | movq %rdi, %r13 | |
169 | vextractf128 $1, %ymm0, 32(%rsp) | |
170 | vzeroupper | |
171 | call HIDDEN_JUMPTARGET(\callee) | |
172 | vmovaps 32(%rsp), %xmm0 | |
173 | lea (%rsp), %rdi | |
174 | lea 16(%rsp), %rsi | |
175 | call HIDDEN_JUMPTARGET(\callee) | |
176 | vmovapd (%rsp), %xmm0 | |
177 | vmovapd 16(%rsp), %xmm1 | |
178 | vmovapd %xmm0, 16(%r13) | |
179 | vmovapd %xmm1, 16(%r14) | |
180 | addq $48, %rsp | |
181 | popq %r14 | |
182 | cfi_adjust_cfa_offset (-8) | |
183 | cfi_restore (%r14) | |
184 | popq %r13 | |
185 | cfi_adjust_cfa_offset (-8) | |
186 | cfi_restore (%r13) | |
187 | movq %rbp, %rsp | |
188 | cfi_def_cfa_register (%rsp) | |
189 | popq %rbp | |
190 | cfi_adjust_cfa_offset (-8) | |
191 | cfi_restore (%rbp) | |
192 | ret | |
193 | .endm | |
194 | ||
21933112 AS |
195 | /* AVX512 ISA version as wrapper to AVX2 ISA version. */ |
196 | .macro WRAPPER_IMPL_AVX512 callee | |
99017161 | 197 | pushq %rbp |
21933112 AS |
198 | cfi_adjust_cfa_offset (8) |
199 | cfi_rel_offset (%rbp, 0) | |
99017161 | 200 | movq %rsp, %rbp |
21933112 | 201 | cfi_def_cfa_register (%rbp) |
99017161 AS |
202 | andq $-64, %rsp |
203 | subq $128, %rsp | |
b9eaca8f | 204 | vmovups %zmm0, (%rsp) |
99017161 AS |
205 | vmovupd (%rsp), %ymm0 |
206 | call HIDDEN_JUMPTARGET(\callee) | |
207 | vmovupd %ymm0, 64(%rsp) | |
208 | vmovupd 32(%rsp), %ymm0 | |
209 | call HIDDEN_JUMPTARGET(\callee) | |
210 | vmovupd %ymm0, 96(%rsp) | |
b9eaca8f | 211 | vmovups 64(%rsp), %zmm0 |
99017161 | 212 | movq %rbp, %rsp |
21933112 | 213 | cfi_def_cfa_register (%rsp) |
99017161 | 214 | popq %rbp |
21933112 AS |
215 | cfi_adjust_cfa_offset (-8) |
216 | cfi_restore (%rbp) | |
217 | ret | |
218 | .endm | |
c10b9b13 AS |
219 | |
220 | /* 2 argument AVX512 ISA version as wrapper to AVX2 ISA version. */ | |
221 | .macro WRAPPER_IMPL_AVX512_ff callee | |
99017161 | 222 | pushq %rbp |
c10b9b13 AS |
223 | cfi_adjust_cfa_offset (8) |
224 | cfi_rel_offset (%rbp, 0) | |
99017161 | 225 | movq %rsp, %rbp |
c10b9b13 | 226 | cfi_def_cfa_register (%rbp) |
99017161 AS |
227 | andq $-64, %rsp |
228 | subq $192, %rsp | |
b9eaca8f L |
229 | vmovups %zmm0, (%rsp) |
230 | vmovups %zmm1, 64(%rsp) | |
99017161 AS |
231 | vmovupd (%rsp), %ymm0 |
232 | vmovupd 64(%rsp), %ymm1 | |
233 | call HIDDEN_JUMPTARGET(\callee) | |
234 | vmovupd %ymm0, 128(%rsp) | |
235 | vmovupd 32(%rsp), %ymm0 | |
236 | vmovupd 96(%rsp), %ymm1 | |
237 | call HIDDEN_JUMPTARGET(\callee) | |
238 | vmovupd %ymm0, 160(%rsp) | |
b9eaca8f | 239 | vmovups 128(%rsp), %zmm0 |
99017161 | 240 | movq %rbp, %rsp |
c10b9b13 | 241 | cfi_def_cfa_register (%rsp) |
99017161 | 242 | popq %rbp |
c10b9b13 AS |
243 | cfi_adjust_cfa_offset (-8) |
244 | cfi_restore (%rbp) | |
245 | ret | |
246 | .endm | |
c9a8c526 AS |
247 | |
248 | /* 3 argument AVX512 ISA version as wrapper to AVX2 ISA version. */ | |
249 | .macro WRAPPER_IMPL_AVX512_fFF callee | |
250 | pushq %rbp | |
251 | cfi_adjust_cfa_offset (8) | |
252 | cfi_rel_offset (%rbp, 0) | |
253 | movq %rsp, %rbp | |
254 | cfi_def_cfa_register (%rbp) | |
255 | andq $-64, %rsp | |
256 | pushq %r12 | |
257 | cfi_adjust_cfa_offset (8) | |
258 | cfi_rel_offset (%r12, 0) | |
259 | pushq %r13 | |
260 | cfi_adjust_cfa_offset (8) | |
261 | cfi_rel_offset (%r13, 0) | |
262 | subq $176, %rsp | |
263 | movq %rsi, %r13 | |
b9eaca8f | 264 | vmovups %zmm0, (%rsp) |
c9a8c526 | 265 | movq %rdi, %r12 |
99017161 | 266 | vmovupd (%rsp), %ymm0 |
c9a8c526 | 267 | call HIDDEN_JUMPTARGET(\callee) |
99017161 | 268 | vmovupd 32(%rsp), %ymm0 |
c9a8c526 AS |
269 | lea 64(%rsp), %rdi |
270 | lea 96(%rsp), %rsi | |
271 | call HIDDEN_JUMPTARGET(\callee) | |
99017161 AS |
272 | vmovupd 64(%rsp), %ymm0 |
273 | vmovupd 96(%rsp), %ymm1 | |
274 | vmovupd %ymm0, 32(%r12) | |
275 | vmovupd %ymm1, 32(%r13) | |
276 | vzeroupper | |
c9a8c526 AS |
277 | addq $176, %rsp |
278 | popq %r13 | |
279 | cfi_adjust_cfa_offset (-8) | |
280 | cfi_restore (%r13) | |
281 | popq %r12 | |
282 | cfi_adjust_cfa_offset (-8) | |
283 | cfi_restore (%r12) | |
284 | movq %rbp, %rsp | |
285 | cfi_def_cfa_register (%rsp) | |
286 | popq %rbp | |
287 | cfi_adjust_cfa_offset (-8) | |
288 | cfi_restore (%rbp) | |
289 | ret | |
290 | .endm |