1 /* Function sinf vectorized with SSE4.
2 Copyright (C) 2014-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
21 #include "svml_s_trig_data.h"
23 .section .text.sse4, "ax", @progbits
24 ENTRY(_ZGVbN4v_sinf_sse4)
26 ALGORITHM DESCRIPTION:
28 1) Range reduction to [-Pi/2; +Pi/2] interval
29 a) Grab sign from source argument and save it.
30 b) Remove sign using AND operation
31 c) Getting octant Y by 1/Pi multiplication
32 d) Add "Right Shifter" value
33 e) Treat obtained value as integer for destination sign setting.
34 Shift first bit of this value to the last (sign) position
35 f) Change destination sign if source sign is negative
37 g) Subtract "Right Shifter" value
38 h) Subtract Y*PI from X argument, where PI divided to 4 parts:
39 X = X - Y*PI1 - Y*PI2 - Y*PI3 - Y*PI4;
40 2) Polynomial (minimax for sin within [-Pi/2; +Pi/2] interval)
41 a) Calculate X^2 = X * X
42 b) Calculate polynomial:
43 R = X + X * X^2 * (A3 + x^2 * (A5 + ......
44 3) Destination sign setting
45 a) Set shifted destination sign using XOR operation:
49 cfi_adjust_cfa_offset (8)
50 cfi_rel_offset (%rbp, 0)
52 cfi_def_cfa_register (%rbp)
56 movq __svml_s_trig_data@GOTPCREL(%rip), %rax
57 movups __sAbsMask(%rax), %xmm2
59 /* b) Remove sign using AND operation */
63 f) Change destination sign if source sign is negative
67 movups __sInvPI(%rax), %xmm1
70 /* c) Getting octant Y by 1/Pi multiplication
71 d) Add "Right Shifter" value */
74 /* h) Subtract Y*PI from X argument, where PI divided to 4 parts:
75 X = X - Y*PI1 - Y*PI2 - Y*PI3 - Y*PI4 */
78 /* Check for large and special values */
79 cmpnleps __sRangeReductionVal(%rax), %xmm4
80 movups __sRShifter(%rax), %xmm6
81 movups __sPI1(%rax), %xmm7
85 /* e) Treat obtained value as integer for destination sign setting.
86 Shift first bit of this value to the last (sign) position */
89 /* g) Subtract "Right Shifter" value */
93 movups __sPI2(%rax), %xmm6
96 movups __sPI3(%rax), %xmm7
99 movups __sPI4(%rax), %xmm6
104 /* 2) Polynomial (minimax for sin within [-Pi/2; +Pi/2] interval)
105 a) Calculate X^2 = X * X
106 b) Calculate polynomial:
107 R = X + X * X^2 * (A3 + x^2 * (A5 + ...... */
111 movups __sA9(%rax), %xmm3
113 addps __sA7(%rax), %xmm3
115 addps __sA5(%rax), %xmm3
117 addps __sA3(%rax), %xmm3
122 /* 3) Destination sign setting
123 a) Set shifted destination sign using XOR operation:
132 cfi_def_cfa_register (%rsp)
134 cfi_adjust_cfa_offset (-8)
140 movups %xmm5, 192(%rsp)
141 movups %xmm0, 256(%rsp)
146 movups %xmm8, 112(%rsp)
147 movups %xmm9, 96(%rsp)
148 movups %xmm10, 80(%rsp)
149 movups %xmm11, 64(%rsp)
150 movups %xmm12, 48(%rsp)
151 movups %xmm13, 32(%rsp)
152 movups %xmm14, 16(%rsp)
153 movups %xmm15, (%rsp)
157 cfi_offset_rel_rsp (12, 168)
160 cfi_offset_rel_rsp (13, 160)
163 cfi_offset_rel_rsp (14, 152)
166 cfi_offset_rel_rsp (15, 144)
184 movups 112(%rsp), %xmm8
185 movups 96(%rsp), %xmm9
186 movups 80(%rsp), %xmm10
187 movups 64(%rsp), %xmm11
188 movups 48(%rsp), %xmm12
189 movups 32(%rsp), %xmm13
190 movups 16(%rsp), %xmm14
191 movups (%rsp), %xmm15
202 movups 256(%rsp), %xmm0
208 movss 196(%rsp,%r15,8), %xmm0
210 call JUMPTARGET(sinf)
212 movss %xmm0, 260(%rsp,%r15,8)
217 movss 192(%rsp,%r15,8), %xmm0
219 call JUMPTARGET(sinf)
221 movss %xmm0, 256(%rsp,%r15,8)
224 END(_ZGVbN4v_sinf_sse4)