]>
Commit | Line | Data |
---|---|---|
c9a8c526 | 1 | /* Function sincos vectorized with SSE2. |
bfff8b1b | 2 | Copyright (C) 2014-2017 Free Software Foundation, Inc. |
c9a8c526 AS |
3 | This file is part of the GNU C Library. |
4 | ||
5 | The GNU C Library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Lesser General Public | |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
9 | ||
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
16 | License along with the GNU C Library; if not, see | |
17 | <http://www.gnu.org/licenses/>. */ | |
18 | ||
19 | #include <sysdep.h> | |
20 | #include "svml_d_wrapper_impl.h" | |
21 | ||
22 | .text | |
ee2196bb | 23 | ENTRY (_ZGVbN2vl8l8_sincos) |
c9a8c526 | 24 | WRAPPER_IMPL_SSE2_fFF sincos |
ee2196bb AS |
25 | END (_ZGVbN2vl8l8_sincos) |
26 | libmvec_hidden_def (_ZGVbN2vl8l8_sincos) | |
27 | ||
28 | /* SSE2 ISA version as wrapper to scalar (for vector | |
29 | function declared with #pragma omp declare simd notinbranch). */ | |
30 | .macro WRAPPER_IMPL_SSE2_fFF_vvv callee | |
31 | #ifndef __ILP32__ | |
32 | subq $88, %rsp | |
33 | cfi_adjust_cfa_offset(88) | |
34 | movaps %xmm0, 64(%rsp) | |
35 | lea (%rsp), %rdi | |
36 | movdqa %xmm1, 32(%rdi) | |
37 | lea 16(%rsp), %rsi | |
38 | movdqa %xmm2, 32(%rsi) | |
39 | call JUMPTARGET(\callee) | |
40 | movsd 72(%rsp), %xmm0 | |
41 | lea 8(%rsp), %rdi | |
42 | lea 24(%rsp), %rsi | |
43 | call JUMPTARGET(\callee) | |
44 | movq 32(%rsp), %rdx | |
45 | movq 48(%rsp), %rsi | |
46 | movq 40(%rsp), %r8 | |
47 | movq 56(%rsp), %r10 | |
48 | movq (%rsp), %rax | |
49 | movq 16(%rsp), %rcx | |
50 | movq 8(%rsp), %rdi | |
51 | movq 24(%rsp), %r9 | |
52 | movq %rax, (%rdx) | |
53 | movq %rcx, (%rsi) | |
54 | movq %rdi, (%r8) | |
55 | movq %r9, (%r10) | |
56 | addq $88, %rsp | |
57 | cfi_adjust_cfa_offset(-88) | |
58 | ret | |
59 | #else | |
60 | pushq %rbp | |
61 | .cfi_def_cfa_offset 16 | |
62 | .cfi_offset 6, -16 | |
63 | pushq %rbx | |
64 | .cfi_def_cfa_offset 24 | |
65 | .cfi_offset 3, -24 | |
66 | subl $88, %esp | |
67 | .cfi_def_cfa_offset 112 | |
68 | leal 64(%rsp), %esi | |
69 | movaps %xmm1, 32(%esp) | |
70 | leal 48(%rsp), %edi | |
71 | movaps %xmm2, 16(%esp) | |
72 | movq %rsi, %rbp | |
73 | movq %rdi, %rbx | |
74 | movaps %xmm0, (%esp) | |
75 | call JUMPTARGET(\callee) | |
76 | movupd 8(%esp), %xmm0 | |
77 | leal 8(%rbp), %esi | |
78 | leal 8(%rbx), %edi | |
79 | call JUMPTARGET(\callee) | |
80 | movdqa 32(%esp), %xmm1 | |
81 | movsd 48(%esp), %xmm0 | |
82 | movq %xmm1, %rax | |
83 | movdqa 16(%esp), %xmm2 | |
84 | movsd %xmm0, (%eax) | |
85 | movsd 56(%esp), %xmm0 | |
86 | pextrd $1, %xmm1, %eax | |
87 | movsd %xmm0, (%eax) | |
88 | movsd 64(%esp), %xmm0 | |
89 | movq %xmm2, %rax | |
90 | movsd %xmm0, (%eax) | |
91 | movsd 72(%esp), %xmm0 | |
92 | pextrd $1, %xmm2, %eax | |
93 | movsd %xmm0, (%eax) | |
94 | addl $88, %esp | |
95 | .cfi_def_cfa_offset 24 | |
96 | popq %rbx | |
97 | .cfi_def_cfa_offset 16 | |
98 | popq %rbp | |
99 | .cfi_def_cfa_offset 8 | |
100 | ret | |
101 | #endif | |
102 | .endm | |
103 | ||
104 | ENTRY (_ZGVbN2vvv_sincos) | |
105 | WRAPPER_IMPL_SSE2_fFF_vvv sincos | |
c9a8c526 AS |
106 | END (_ZGVbN2vvv_sincos) |
107 | ||
108 | #ifndef USE_MULTIARCH | |
109 | libmvec_hidden_def (_ZGVbN2vvv_sincos) | |
110 | #endif |