]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/i386/i686/fpu/multiarch/s_sincosf-sse2.S
67855395bba2edd9175da24f0f6be20b2caadf1f
[thirdparty/glibc.git] / sysdeps / i386 / i686 / fpu / multiarch / s_sincosf-sse2.S
1 /* Optimized with sse2 version of sincosf
2 Copyright (C) 2012-2018 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
18
19 #include <sysdep.h>
20 #include <errno.h>
21
22 /* Short algorithm description:
23 *
24 * 1) if |x|==0: sin(x)=x,
25 * cos(x)=1.
26 * 2) if |x|<2^-27: sin(x)=x-x*DP_SMALL, raising underflow only when needed,
27 * cos(x)=1-|x|.
28 * 3) if |x|<2^-5 : sin(x)=x+x*x^2*DP_SIN2_0+x^5*DP_SIN2_1,
29 * cos(x)=1+1*x^2*DP_COS2_0+x^5*DP_COS2_1
30 * 4) if |x|< Pi/4: sin(x)=x+x*x^2*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))),
31 * cos(x)=1+1*x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))).
32 * 5) if |x| < 9*Pi/4:
33 * 5.1) Range reduction:
34 * k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+1, t=|x|-j*Pi/4.
35 * 5.2) Reconstruction:
36 * sign_sin = sign(x) * (-1.0)^(( n >>2)&1)
37 * sign_cos = (-1.0)^(((n+2)>>2)&1)
38 * poly_sin = ((((S4*t^2 + S3)*t^2 + S2)*t^2 + S1)*t^2 + S0)*t^2*t+t
39 * poly_cos = ((((C4*t^2 + C3)*t^2 + C2)*t^2 + C1)*t^2 + C0)*t^2*s+s
40 * if(n&2 != 0) {
41 * using cos(t) and sin(t) polynomials for |t|<Pi/4, results are
42 * cos(x) = poly_sin * sign_cos
43 * sin(x) = poly_cos * sign_sin
44 * } else {
45 * sin(x) = poly_sin * sign_sin
46 * cos(x) = poly_cos * sign_cos
47 * }
48 * 6) if |x| < 2^23, large args:
49 * 6.1) Range reduction:
50 * k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1, t=|x|-j*Pi/4
51 * 6.2) Reconstruction same as (5.2).
52 * 7) if |x| >= 2^23, very large args:
53 * 7.1) Range reduction:
54 * k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1, t=|x|-j*Pi/4.
55 * 7.2) Reconstruction same as (5.2).
56 * 8) if x is Inf, return x-x, and set errno=EDOM.
57 * 9) if x is NaN, return x-x.
58 *
59 * Special cases:
60 * sin/cos(+-0) = +-0/1 not raising inexact/underflow,
61 * sin/cos(subnormal) raises inexact/underflow,
62 * sin/cos(min_normalized) raises inexact/underflow,
63 * sin/cos(normalized) raises inexact,
64 * sin/cos(Inf) = NaN, raises invalid, sets errno to EDOM,
65 * sin/cos(NaN) = NaN.
66 */
67
68 #ifdef PIC
69 # define MO1(symbol) L(symbol)##@GOTOFF(%ebx)
70 # define MO2(symbol,reg2,_scale) L(symbol)##@GOTOFF(%ebx,reg2,_scale)
71 # define CFI_PUSH(REG) cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0)
72 # define CFI_POP(REG) cfi_adjust_cfa_offset(-4); cfi_restore(REG)
73 # define PUSH(REG) pushl REG; CFI_PUSH(REG)
74 # define POP(REG) popl REG; CFI_POP(REG)
75 # define ENTRANCE PUSH(%ebx); LOAD_PIC_REG(bx)
76 # define RETURN POP(%ebx); ret; CFI_PUSH(%ebx)
77 # define ARG_X 8(%esp)
78 # define ARG_SIN_PTR 12(%esp)
79 # define ARG_COS_PTR 16(%esp)
80 #else
81 # define MO1(symbol) L(symbol)
82 # define MO2(symbol,reg2,_scale) L(symbol)(,reg2,_scale)
83 # define ENTRANCE
84 # define RETURN ret
85 # define ARG_X 4(%esp)
86 # define ARG_SIN_PTR 8(%esp)
87 # define ARG_COS_PTR 12(%esp)
88 #endif
89
90 .text
91 ENTRY(__sincosf_sse2)
92 /* Input: single precision x on stack at address ARG_X */
93 /* pointer to sin result on stack at address ARG_SIN_PTR */
94 /* pointer to cos result on stack at address ARG_COS_PTR */
95
96 ENTRANCE
97 movl ARG_X, %eax /* Bits of x */
98 cvtss2sd ARG_X, %xmm0 /* DP x */
99 andl $0x7fffffff, %eax /* |x| */
100
101 cmpl $0x3f490fdb, %eax /* |x|<Pi/4 ? */
102 jb L(arg_less_pio4)
103
104 /* Here if |x|>=Pi/4 */
105 movd %eax, %xmm3 /* SP |x| */
106 andpd MO1(DP_ABS_MASK),%xmm0 /* DP |x| */
107 movss MO1(SP_INVPIO4), %xmm2 /* SP 1/(Pi/4) */
108
109 cmpl $0x40e231d6, %eax /* |x|<9*Pi/4 ? */
110 jae L(large_args)
111
112 /* Here if Pi/4<=|x|<9*Pi/4 */
113 mulss %xmm3, %xmm2 /* SP |x|/(Pi/4) */
114 movl ARG_X, %ecx /* Load x */
115 cvttss2si %xmm2, %eax /* k, number of Pi/4 in x */
116 shrl $29, %ecx /* (sign of x) << 2 */
117 addl $1, %eax /* k+1 */
118 movl $0x0e, %edx
119 andl %eax, %edx /* j = (k+1)&0x0e */
120 subsd MO2(PIO4J,%edx,8), %xmm0/* t = |x| - j * Pi/4 */
121
122 L(reconstruction):
123 /* Input: %eax=n, %xmm0=t, %ecx=sign(x) */
124
125 movaps %xmm0, %xmm4 /* t */
126 movhpd MO1(DP_ONES), %xmm4 /* 1|t */
127 mulsd %xmm0, %xmm0 /* y=t^2 */
128 movl $2, %edx
129 unpcklpd %xmm0, %xmm0 /* y|y */
130 addl %eax, %edx /* k+2 */
131 movaps %xmm0, %xmm1 /* y|y */
132 mulpd %xmm0, %xmm0 /* z=t^4|z=t^4 */
133
134 movaps MO1(DP_SC4), %xmm2 /* S4 */
135 mulpd %xmm0, %xmm2 /* z*S4 */
136 movaps MO1(DP_SC3), %xmm3 /* S3 */
137 mulpd %xmm0, %xmm3 /* z*S3 */
138 xorl %eax, %ecx /* (sign_x ^ (k>>2))<<2 */
139 addpd MO1(DP_SC2), %xmm2 /* S2+z*S4 */
140 mulpd %xmm0, %xmm2 /* z*(S2+z*S4) */
141 shrl $2, %edx /* (k+2)>>2 */
142 addpd MO1(DP_SC1), %xmm3 /* S1+z*S3 */
143 mulpd %xmm0, %xmm3 /* z*(S1+z*S3) */
144 shrl $2, %ecx /* sign_x ^ k>>2 */
145 addpd MO1(DP_SC0), %xmm2 /* S0+z*(S2+z*S4) */
146 andl $1, %edx /* sign_cos = ((k+2)>>2)&1 */
147 mulpd %xmm1, %xmm2 /* y*(S0+z*(S2+z*S4)) */
148 andl $1, %ecx /* sign_sin = sign_x ^ ((k>>2)&1) */
149 addpd %xmm2, %xmm3 /* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
150 mulpd %xmm4, %xmm3 /*t*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))))*/
151 testl $2, %eax /* n&2 != 0 ? */
152 addpd %xmm4, %xmm3 /*t+t*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))*/
153 jnz L(sin_result_sin_poly)
154
155 /*L(sin_result_cos_poly):*/
156 /*
157 * Here if
158 * cos(x) = poly_sin * sign_cos
159 * sin(x) = poly_cos * sign_sin
160 */
161 movsd MO2(DP_ONES,%ecx,8), %xmm4/* 0|sign_sin */
162 movhpd MO2(DP_ONES,%edx,8), %xmm4/* sign_cos|sign_sin */
163 mulpd %xmm4, %xmm3 /* result_cos|result_sin */
164 movl ARG_SIN_PTR, %eax
165 cvtpd2ps %xmm3, %xmm0 /* SP results */
166 movl ARG_COS_PTR, %ecx
167 movss %xmm0, (%eax) /* store sin(x) from xmm0[0] */
168 shufps $1, %xmm0, %xmm0 /* move cos(x) to xmm0[0] */
169 movss %xmm0, (%ecx) /* store cos(x) */
170 RETURN
171
172 .p2align 4
173 L(sin_result_sin_poly):
174 /*
175 * Here if
176 * sin(x) = poly_sin * sign_sin
177 * cos(x) = poly_cos * sign_cos
178 */
179 movsd MO2(DP_ONES,%edx,8), %xmm4/* 0|sign_cos */
180 movhpd MO2(DP_ONES,%ecx,8), %xmm4/* sign_sin|sign_cos */
181 mulpd %xmm4, %xmm3 /* result_sin|result_cos */
182 movl ARG_SIN_PTR, %eax
183 cvtpd2ps %xmm3, %xmm0 /* SP results */
184 movl ARG_COS_PTR, %ecx
185 movss %xmm0, (%ecx) /* store cos(x) from xmm0[0] */
186 shufps $1, %xmm0, %xmm0 /* move sin(x) to xmm0[0] */
187 movss %xmm0, (%eax) /* store sin(x) */
188 RETURN
189
190 .p2align 4
191 L(large_args):
192 /* Here if |x|>=9*Pi/4 */
193 cmpl $0x7f800000, %eax /* x is Inf or NaN ? */
194 jae L(arg_inf_or_nan)
195
196 /* Here if finite |x|>=9*Pi/4 */
197 cmpl $0x4b000000, %eax /* |x|<2^23 ? */
198 jae L(very_large_args)
199
200 /* Here if 9*Pi/4<=|x|<2^23 */
201 movsd MO1(DP_INVPIO4), %xmm1 /* 1/(Pi/4) */
202 mulsd %xmm0, %xmm1 /* |x|/(Pi/4) */
203 cvttsd2si %xmm1, %eax /* k=trunc(|x|/(Pi/4)) */
204 addl $1, %eax /* k+1 */
205 movl %eax, %edx
206 andl $0xfffffffe, %edx /* j=(k+1)&0xfffffffe */
207 cvtsi2sdl %edx, %xmm4 /* DP j */
208 movl ARG_X, %ecx /* Load x */
209 movsd MO1(DP_PIO4HI), %xmm2 /* -PIO4HI = high part of -Pi/4 */
210 shrl $29, %ecx /* (sign of x) << 2 */
211 mulsd %xmm4, %xmm2 /* -j*PIO4HI */
212 movsd MO1(DP_PIO4LO), %xmm3 /* -PIO4LO = low part of -Pi/4 */
213 addsd %xmm2, %xmm0 /* |x| - j*PIO4HI */
214 mulsd %xmm3, %xmm4 /* j*PIO4LO */
215 addsd %xmm4, %xmm0 /* t = |x| - j*PIO4HI - j*PIO4LO */
216 jmp L(reconstruction)
217
218 .p2align 4
219 L(very_large_args):
220 /* Here if finite |x|>=2^23 */
221
222 /* bitpos = (ix>>23) - BIAS_32 + 59; */
223 shrl $23, %eax /* eb = biased exponent of x */
224 subl $68, %eax /* bitpos=eb-0x7f+59, where 0x7f */
225 /*is exponent bias */
226 movl $28, %ecx /* %cl=28 */
227 movl %eax, %edx /* bitpos copy */
228
229 /* j = bitpos/28; */
230 div %cl /* j in register %al=%ax/%cl */
231 movapd %xmm0, %xmm3 /* |x| */
232 andl $0xff, %eax /* clear unneeded remainder from %ah*/
233
234 imull $28, %eax, %ecx /* j*28 */
235 movsd MO1(DP_HI_MASK), %xmm4 /* DP_HI_MASK */
236 movapd %xmm0, %xmm5 /* |x| */
237 mulsd -2*8+MO2(_FPI,%eax,8), %xmm3/* tmp3 = FPI[j-2]*|x| */
238 movapd %xmm0, %xmm1 /* |x| */
239 mulsd -1*8+MO2(_FPI,%eax,8), %xmm5/* tmp2 = FPI[j-1]*|x| */
240 mulsd 0*8+MO2(_FPI,%eax,8), %xmm0/* tmp0 = FPI[j]*|x| */
241 addl $19, %ecx /* j*28+19 */
242 mulsd 1*8+MO2(_FPI,%eax,8), %xmm1/* tmp1 = FPI[j+1]*|x| */
243 cmpl %ecx, %edx /* bitpos>=j*28+19 ? */
244 jl L(very_large_skip1)
245
246 /* Here if bitpos>=j*28+19 */
247 andpd %xmm3, %xmm4 /* HI(tmp3) */
248 subsd %xmm4, %xmm3 /* tmp3 = tmp3 - HI(tmp3) */
249 L(very_large_skip1):
250
251 movsd MO1(DP_2POW52), %xmm6
252 movapd %xmm5, %xmm2 /* tmp2 copy */
253 addsd %xmm3, %xmm5 /* tmp5 = tmp3 + tmp2 */
254 movl $1, %edx
255 addsd %xmm5, %xmm6 /* tmp6 = tmp5 + 2^52 */
256 movsd 8+MO1(DP_2POW52), %xmm4
257 movd %xmm6, %eax /* k = I64_LO(tmp6); */
258 addsd %xmm6, %xmm4 /* tmp4 = tmp6 - 2^52 */
259 movl ARG_X, %ecx /* Load x */
260 comisd %xmm5, %xmm4 /* tmp4 > tmp5 ? */
261 jbe L(very_large_skip2)
262
263 /* Here if tmp4 > tmp5 */
264 subl $1, %eax /* k-- */
265 addsd 8+MO1(DP_ONES), %xmm4 /* tmp4 -= 1.0 */
266 L(very_large_skip2):
267
268 andl %eax, %edx /* k&1 */
269 subsd %xmm4, %xmm3 /* tmp3 -= tmp4 */
270 addsd MO2(DP_ZERONE,%edx,8), %xmm3/* t = DP_ZERONE[k&1] + tmp3 */
271 addsd %xmm2, %xmm3 /* t += tmp2 */
272 shrl $29, %ecx /* (sign of x) << 2 */
273 addsd %xmm3, %xmm0 /* t += tmp0 */
274 addl $1, %eax /* n=k+1 */
275 addsd %xmm1, %xmm0 /* t += tmp1 */
276 mulsd MO1(DP_PIO4), %xmm0 /* t *= PI04 */
277
278 jmp L(reconstruction) /* end of very_large_args peth */
279
280 .p2align 4
281 L(arg_less_pio4):
282 /* Here if |x|<Pi/4 */
283 cmpl $0x3d000000, %eax /* |x|<2^-5 ? */
284 jl L(arg_less_2pn5)
285
286 /* Here if 2^-5<=|x|<Pi/4 */
287 movaps %xmm0, %xmm3 /* DP x */
288 movhpd MO1(DP_ONES), %xmm3 /* DP 1|x */
289 mulsd %xmm0, %xmm0 /* DP y=x^2 */
290 unpcklpd %xmm0, %xmm0 /* DP y|y */
291 movaps %xmm0, %xmm1 /* y|y */
292 mulpd %xmm0, %xmm0 /* z=x^4|z=x^4 */
293
294 movapd MO1(DP_SC4), %xmm4 /* S4 */
295 mulpd %xmm0, %xmm4 /* z*S4 */
296 movapd MO1(DP_SC3), %xmm5 /* S3 */
297 mulpd %xmm0, %xmm5 /* z*S3 */
298 addpd MO1(DP_SC2), %xmm4 /* S2+z*S4 */
299 mulpd %xmm0, %xmm4 /* z*(S2+z*S4) */
300 addpd MO1(DP_SC1), %xmm5 /* S1+z*S3 */
301 mulpd %xmm0, %xmm5 /* z*(S1+z*S3) */
302 addpd MO1(DP_SC0), %xmm4 /* S0+z*(S2+z*S4) */
303 mulpd %xmm1, %xmm4 /* y*(S0+z*(S2+z*S4)) */
304 mulpd %xmm3, %xmm5 /* x*z*(S1+z*S3) */
305 mulpd %xmm3, %xmm4 /* x*y*(S0+z*(S2+z*S4)) */
306 addpd %xmm5, %xmm4 /*x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))))*/
307 movl ARG_SIN_PTR, %eax
308 addpd %xmm4, %xmm3 /*x+x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))*/
309 movl ARG_COS_PTR, %ecx
310 cvtpd2ps %xmm3, %xmm0 /* SP results */
311 movss %xmm0, (%eax) /* store sin(x) from xmm0[0] */
312 shufps $1, %xmm0, %xmm0 /* move cos(x) to xmm0[0] */
313 movss %xmm0, (%ecx) /* store cos(x) */
314 RETURN
315
316 .p2align 4
317 L(arg_less_2pn5):
318 /* Here if |x|<2^-5 */
319 cmpl $0x32000000, %eax /* |x|<2^-27 ? */
320 jl L(arg_less_2pn27)
321
322 /* Here if 2^-27<=|x|<2^-5 */
323 movaps %xmm0, %xmm1 /* DP x */
324 movhpd MO1(DP_ONES), %xmm1 /* DP 1|x */
325 mulsd %xmm0, %xmm0 /* DP x^2 */
326 unpcklpd %xmm0, %xmm0 /* DP x^2|x^2 */
327
328 movaps MO1(DP_SINCOS2_1), %xmm3/* DP DP_SIN2_1 */
329 mulpd %xmm0, %xmm3 /* DP x^2*DP_SIN2_1 */
330 addpd MO1(DP_SINCOS2_0), %xmm3/* DP DP_SIN2_0+x^2*DP_SIN2_1 */
331 mulpd %xmm0, %xmm3 /* DP x^2*DP_SIN2_0+x^4*DP_SIN2_1 */
332 mulpd %xmm1, %xmm3 /* DP x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
333 addpd %xmm1, %xmm3 /* DP x+x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
334 movl ARG_SIN_PTR, %eax
335 cvtpd2ps %xmm3, %xmm0 /* SP results */
336 movl ARG_COS_PTR, %ecx
337 movss %xmm0, (%eax) /* store sin(x) from xmm0[0] */
338 shufps $1, %xmm0, %xmm0 /* move cos(x) to xmm0[0] */
339 movss %xmm0, (%ecx) /* store cos(x) */
340 RETURN
341
342 .p2align 4
343 L(arg_less_2pn27):
344 movss ARG_X, %xmm7 /* SP x */
345 cmpl $0, %eax /* x=0 ? */
346 je L(arg_zero) /* in case x=0 return sin(+-0)==+-0 */
347 /* Here if |x|<2^-27 */
348 /*
349 * Special cases here:
350 * sin(subnormal) raises inexact/underflow
351 * sin(min_normalized) raises inexact/underflow
352 * sin(normalized) raises inexact
353 * cos(here)=1-|x| (raising inexact)
354 */
355 movaps %xmm0, %xmm3 /* DP x */
356 mulsd MO1(DP_SMALL), %xmm0 /* DP x*DP_SMALL */
357 subsd %xmm0, %xmm3 /* DP sin result is x-x*DP_SMALL */
358 andps MO1(SP_ABS_MASK), %xmm7 /* SP |x| */
359 cvtsd2ss %xmm3, %xmm0 /* sin(x) */
360 movl ARG_SIN_PTR, %eax
361 movss MO1(SP_ONE), %xmm1 /* SP 1.0 */
362 movss %xmm0, (%eax) /* sin(x) store */
363 movl ARG_COS_PTR, %ecx
364 subss %xmm7, %xmm1 /* cos(x) */
365 movss %xmm1, (%ecx) /* cos(x) store */
366 RETURN
367
368 .p2align 4
369 L(arg_zero):
370 movss MO1(SP_ONE), %xmm0 /* 1.0 */
371 movl ARG_SIN_PTR, %eax
372 movl ARG_COS_PTR, %ecx
373 movss %xmm7, (%eax) /* sin(+-0)==x */
374 movss %xmm0, (%ecx) /* cos(+-0)==1 */
375 RETURN
376
377 .p2align 4
378 L(arg_inf_or_nan):
379 movss ARG_X, %xmm7 /* SP x */
380 /* Here if |x| is Inf or NAN */
381 jne L(skip_errno_setting) /* in case of x is NaN */
382
383 /* Here if x is Inf. Set errno to EDOM. */
384 call JUMPTARGET(__errno_location)
385 movl $EDOM, (%eax)
386
387 .p2align 4
388 L(skip_errno_setting):
389 /* Here if |x| is Inf or NAN. Continued. */
390 subss %xmm7, %xmm7 /* x-x, result is NaN */
391 movl ARG_SIN_PTR, %eax
392 movl ARG_COS_PTR, %ecx
393 movss %xmm7, (%eax)
394 movss %xmm7, (%ecx)
395 RETURN
396 END(__sincosf_sse2)
397
398 .section .rodata, "a"
399 .p2align 3
400 L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
401 .long 0x00000000,0x00000000
402 .long 0x54442d18,0x3fe921fb
403 .long 0x54442d18,0x3ff921fb
404 .long 0x7f3321d2,0x4002d97c
405 .long 0x54442d18,0x400921fb
406 .long 0x2955385e,0x400f6a7a
407 .long 0x7f3321d2,0x4012d97c
408 .long 0xe9bba775,0x4015fdbb
409 .long 0x54442d18,0x401921fb
410 .long 0xbeccb2bb,0x401c463a
411 .long 0x2955385e,0x401f6a7a
412 .type L(PIO4J), @object
413 ASM_SIZE_DIRECTIVE(L(PIO4J))
414
415 .p2align 3
416 L(_FPI): /* 4/Pi broken into sum of positive DP values */
417 .long 0x00000000,0x00000000
418 .long 0x6c000000,0x3ff45f30
419 .long 0x2a000000,0x3e3c9c88
420 .long 0xa8000000,0x3c54fe13
421 .long 0xd0000000,0x3aaf47d4
422 .long 0x6c000000,0x38fbb81b
423 .long 0xe0000000,0x3714acc9
424 .long 0x7c000000,0x3560e410
425 .long 0x56000000,0x33bca2c7
426 .long 0xac000000,0x31fbd778
427 .long 0xe0000000,0x300b7246
428 .long 0xe8000000,0x2e5d2126
429 .long 0x48000000,0x2c970032
430 .long 0xe8000000,0x2ad77504
431 .long 0xe0000000,0x290921cf
432 .long 0xb0000000,0x274deb1c
433 .long 0xe0000000,0x25829a73
434 .long 0xbe000000,0x23fd1046
435 .long 0x10000000,0x2224baed
436 .long 0x8e000000,0x20709d33
437 .long 0x80000000,0x1e535a2f
438 .long 0x64000000,0x1cef904e
439 .long 0x30000000,0x1b0d6398
440 .long 0x24000000,0x1964ce7d
441 .long 0x16000000,0x17b908bf
442 .type L(_FPI), @object
443 ASM_SIZE_DIRECTIVE(L(_FPI))
444
445 /* Coefficients of polynomials for */
446 /* sin(x)~=x+x*x^2*(DP_SIN2_0+x^2*DP_SIN2_1) in low DP part, */
447 /* cos(x)~=1+1*x^2*(DP_COS2_0+x^2*DP_COS2_1) in high DP part, */
448 /* for |x|<2^-5. */
449 .p2align 4
450 L(DP_SINCOS2_0):
451 .long 0x5543d49d,0xbfc55555
452 .long 0xff5cc6fd,0xbfdfffff
453 .type L(DP_SINCOS2_0), @object
454 ASM_SIZE_DIRECTIVE(L(DP_SINCOS2_0))
455
456 .p2align 4
457 L(DP_SINCOS2_1):
458 .long 0x75cec8c5,0x3f8110f4
459 .long 0xb178dac5,0x3fa55514
460 .type L(DP_SINCOS2_1), @object
461 ASM_SIZE_DIRECTIVE(L(DP_SINCOS2_1))
462
463 .p2align 3
464 L(DP_ZERONE):
465 .long 0x00000000,0x00000000 /* 0.0 */
466 .long 0x00000000,0xbff00000 /* 1.0 */
467 .type L(DP_ZERONE), @object
468 ASM_SIZE_DIRECTIVE(L(DP_ZERONE))
469
470 .p2align 3
471 L(DP_ONES):
472 .long 0x00000000,0x3ff00000 /* +1.0 */
473 .long 0x00000000,0xbff00000 /* -1.0 */
474 .type L(DP_ONES), @object
475 ASM_SIZE_DIRECTIVE(L(DP_ONES))
476
477 /* Coefficients of polynomials for */
478 /* sin(t)~=t+t*t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))) in low DP part, */
479 /* cos(t)~=1+1*t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))) in high DP part, */
480 /* for |t|<Pi/4. */
481 .p2align 4
482 L(DP_SC4):
483 .long 0x1674b58a,0xbe5a947e
484 .long 0xdd8844d7,0xbe923c97
485 .type L(DP_SC4), @object
486 ASM_SIZE_DIRECTIVE(L(DP_SC4))
487
488 .p2align 4
489 L(DP_SC3):
490 .long 0x64e6b5b4,0x3ec71d72
491 .long 0x9ac43cc0,0x3efa00eb
492 .type L(DP_SC3), @object
493 ASM_SIZE_DIRECTIVE(L(DP_SC3))
494
495 .p2align 4
496 L(DP_SC2):
497 .long 0x8b4bd1f9,0xbf2a019f
498 .long 0x348b6874,0xbf56c16b
499 .type L(DP_SC2), @object
500 ASM_SIZE_DIRECTIVE(L(DP_SC2))
501
502 .p2align 4
503 L(DP_SC1):
504 .long 0x10c2688b,0x3f811111
505 .long 0x545c50c7,0x3fa55555
506 .type L(DP_SC1), @object
507 ASM_SIZE_DIRECTIVE(L(DP_SC1))
508
509 .p2align 4
510 L(DP_SC0):
511 .long 0x55551cd9,0xbfc55555
512 .long 0xfffe98ae,0xbfdfffff
513 .type L(DP_SC0), @object
514 ASM_SIZE_DIRECTIVE(L(DP_SC0))
515
516 .p2align 3
517 L(DP_SMALL):
518 .long 0x00000000,0x3cd00000 /* 2^(-50) */
519 .type L(DP_SMALL), @object
520 ASM_SIZE_DIRECTIVE(L(DP_SMALL))
521
522 .p2align 3
523 L(DP_PIO4):
524 .long 0x54442d18,0x3fe921fb /* Pi/4 */
525 .type L(DP_PIO4), @object
526 ASM_SIZE_DIRECTIVE(L(DP_PIO4))
527
528 .p2align 3
529 L(DP_2POW52):
530 .long 0x00000000,0x43300000 /* +2^52 */
531 .long 0x00000000,0xc3300000 /* -2^52 */
532 .type L(DP_2POW52), @object
533 ASM_SIZE_DIRECTIVE(L(DP_2POW52))
534
535 .p2align 3
536 L(DP_INVPIO4):
537 .long 0x6dc9c883,0x3ff45f30 /* 4/Pi */
538 .type L(DP_INVPIO4), @object
539 ASM_SIZE_DIRECTIVE(L(DP_INVPIO4))
540
541 .p2align 3
542 L(DP_PIO4HI):
543 .long 0x54000000,0xbfe921fb /* High part of Pi/4 */
544 .type L(DP_PIO4HI), @object
545 ASM_SIZE_DIRECTIVE(L(DP_PIO4HI))
546
547 .p2align 3
548 L(DP_PIO4LO):
549 .long 0x11A62633,0xbe010b46 /* Low part of Pi/4 */
550 .type L(DP_PIO4LO), @object
551 ASM_SIZE_DIRECTIVE(L(DP_PIO4LO))
552
553 .p2align 2
554 L(SP_INVPIO4):
555 .long 0x3fa2f983 /* 4/Pi */
556 .type L(SP_INVPIO4), @object
557 ASM_SIZE_DIRECTIVE(L(SP_INVPIO4))
558
559 .p2align 4
560 L(DP_ABS_MASK): /* Mask for getting DP absolute value */
561 .long 0xffffffff,0x7fffffff
562 .long 0xffffffff,0x7fffffff
563 .type L(DP_ABS_MASK), @object
564 ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
565
566 .p2align 3
567 L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
568 .long 0x00000000,0xffffffff
569 .type L(DP_HI_MASK), @object
570 ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
571
572 .p2align 4
573 L(SP_ABS_MASK): /* Mask for getting SP absolute value */
574 .long 0x7fffffff,0x7fffffff
575 .long 0x7fffffff,0x7fffffff
576 .type L(SP_ABS_MASK), @object
577 ASM_SIZE_DIRECTIVE(L(SP_ABS_MASK))
578
579 .p2align 2
580 L(SP_ONE):
581 .long 0x3f800000 /* 1.0 */
582 .type L(SP_ONE), @object
583 ASM_SIZE_DIRECTIVE(L(SP_ONE))
584
585 weak_alias(__sincosf, sincosf)