]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S
Update copyright dates with scripts/update-copyrights.
[thirdparty/glibc.git] / sysdeps / i386 / i686 / fpu / multiarch / s_cosf-sse2.S
CommitLineData
4ffffbd2 1/* Optimized with sse2 version of cosf
b168057a 2 Copyright (C) 2012-2015 Free Software Foundation, Inc.
4ffffbd2
LD
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
18
19#include <sysdep.h>
20#define __need_Emath
21#include <bits/errno.h>
22
23/* Short algorithm description:
24 *
25 * 1) if |x| == 0: return 1.0-|x|.
26 * 2) if |x| < 2^-27: return 1.0-|x|.
27 * 3) if |x| < 2^-5 : return 1.0+x^2*DP_COS2_0+x^5*DP_COS2_1.
28 * 4) if |x| < Pi/4: return 1.0+x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))).
29 * 5) if |x| < 9*Pi/4:
30 * 5.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+3,
31 * t=|x|-j*Pi/4.
32 * 5.2) Reconstruction:
33 * s = (-1.0)^((n>>2)&1)
34 * if(n&2 != 0) {
35 * using cos(t) polynomial for |t|<Pi/4, result is
36 * s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4))))).
37 * } else {
38 * using sin(t) polynomial for |t|<Pi/4, result is
39 * s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4))))).
40 * }
41 * 6) if |x| < 2^23, large args:
42 * 6.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+3,
43 * t=|x|-j*Pi/4.
44 * 6.2) Reconstruction same as (5.2).
45 * 7) if |x| >= 2^23, very large args:
46 * 7.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+3,
47 * t=|x|-j*Pi/4.
48 * 7.2) Reconstruction same as (5.2).
49 * 8) if x is Inf, return x-x, and set errno=EDOM.
50 * 9) if x is NaN, return x-x.
51 *
52 * Special cases:
80ccd52c
LD
53 * cos(+-0) = 1 not raising inexact,
54 * cos(subnormal) raises inexact,
55 * cos(min_normalized) raises inexact,
56 * cos(normalized) raises inexact,
57 * cos(Inf) = NaN, raises invalid, sets errno to EDOM,
58 * cos(NaN) = NaN.
4ffffbd2
LD
59 */
60
61#ifdef PIC
62# define MO1(symbol) L(symbol)##@GOTOFF(%ebx)
63# define MO2(symbol,reg2,_scale) L(symbol)##@GOTOFF(%ebx,reg2,_scale)
80ccd52c
LD
64# define CFI_PUSH(REG) cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0)
65# define CFI_POP(REG) cfi_adjust_cfa_offset(-4); cfi_restore(REG)
66# define PUSH(REG) pushl REG; CFI_PUSH(REG)
67# define POP(REG) popl REG; CFI_POP(REG)
68# define ENTRANCE PUSH(%ebx); LOAD_PIC_REG(bx)
69# define RETURN POP(%ebx); ret; CFI_PUSH(%ebx)
4ffffbd2
LD
70# define ARG_X 8(%esp)
71#else
72# define MO1(symbol) L(symbol)
73# define MO2(symbol,reg2,_scale) L(symbol)(,reg2,_scale)
80ccd52c
LD
74# define ENTRANCE
75# define RETURN ret
4ffffbd2
LD
76# define ARG_X 4(%esp)
77#endif
78
79 .text
80ENTRY(__cosf_sse2)
81 /* Input: single precision x on stack at address ARG_X */
82
80ccd52c 83 ENTRANCE
4ffffbd2
LD
84 movl ARG_X, %eax /* Bits of x */
85 cvtss2sd ARG_X, %xmm0 /* DP x */
86 andl $0x7fffffff, %eax /* |x| */
87
88 cmpl $0x3f490fdb, %eax /* |x|<Pi/4? */
89 jb L(arg_less_pio4)
90
91 /* Here if |x|>=Pi/4 */
92 movd %eax, %xmm3 /* SP |x| */
93 andpd MO1(DP_ABS_MASK),%xmm0 /* DP |x| */
94 movss MO1(SP_INVPIO4), %xmm2 /* SP 1/(Pi/4) */
95
96 cmpl $0x40e231d6, %eax /* |x|<9*Pi/4? */
97 jae L(large_args)
98
99 /* Here if Pi/4<=|x|<9*Pi/4 */
100 mulss %xmm3, %xmm2 /* SP |x|/(Pi/4) */
101 cvttss2si %xmm2, %eax /* k, number of Pi/4 in x */
102 addl $1, %eax /* k+1 */
103 movl $0x0e, %edx
104 andl %eax, %edx /* j = (k+1)&0x0e */
105 addl $2, %eax /* n */
106 subsd MO2(PIO4J,%edx,8), %xmm0 /* t = |x| - j * Pi/4 */
107
108L(reconstruction):
109 /* Input: %eax=n, %xmm0=t */
110 testl $2, %eax /* n&2 != 0? */
111 jz L(sin_poly)
112
113/*L(cos_poly):*/
114 /* Here if cos(x) calculated using cos(t) polynomial for |t|<Pi/4:
115 * y = t*t; z = y*y;
116 * s = sign(x) * (-1.0)^((n>>2)&1)
117 * result = s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))))
118 */
119 shrl $2, %eax /* n>>2 */
120 mulsd %xmm0, %xmm0 /* y=t^2 */
121 andl $1, %eax /* (n>>2)&1 */
122 movaps %xmm0, %xmm1 /* y */
123 mulsd %xmm0, %xmm0 /* z=t^4 */
124
125 movsd MO1(DP_C4), %xmm4 /* C4 */
126 mulsd %xmm0, %xmm4 /* z*C4 */
127 movsd MO1(DP_C3), %xmm3 /* C3 */
128 mulsd %xmm0, %xmm3 /* z*C3 */
129 addsd MO1(DP_C2), %xmm4 /* C2+z*C4 */
130 mulsd %xmm0, %xmm4 /* z*(C2+z*C4) */
131 lea -8(%esp), %esp /* Borrow 4 bytes of stack frame */
132 addsd MO1(DP_C1), %xmm3 /* C1+z*C3 */
133 mulsd %xmm0, %xmm3 /* z*(C1+z*C3) */
134 addsd MO1(DP_C0), %xmm4 /* C0+z*(C2+z*C4) */
135 mulsd %xmm1, %xmm4 /* y*(C0+z*(C2+z*C4)) */
136
137 addsd %xmm4, %xmm3 /* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
138 /* 1.0+y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
139 addsd MO1(DP_ONES), %xmm3
140
141 mulsd MO2(DP_ONES,%eax,8), %xmm3 /* DP result */
142 movsd %xmm3, 0(%esp) /* Move result from sse... */
143 fldl 0(%esp) /* ...to FPU. */
144 /* Return back 4 bytes of stack frame */
145 lea 8(%esp), %esp
80ccd52c 146 RETURN
4ffffbd2
LD
147
148 .p2align 4
149L(sin_poly):
150 /* Here if cos(x) calculated using sin(t) polynomial for |t|<Pi/4:
151 * y = t*t; z = y*y;
152 * s = sign(x) * (-1.0)^((n>>2)&1)
153 * result = s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))))
154 */
155
156 movaps %xmm0, %xmm4 /* t */
157 shrl $2, %eax /* n>>2 */
158 mulsd %xmm0, %xmm0 /* y=t^2 */
159 andl $1, %eax /* (n>>2)&1 */
160 movaps %xmm0, %xmm1 /* y */
161 mulsd %xmm0, %xmm0 /* z=t^4 */
162
163 movsd MO1(DP_S4), %xmm2 /* S4 */
164 mulsd %xmm0, %xmm2 /* z*S4 */
165 movsd MO1(DP_S3), %xmm3 /* S3 */
166 mulsd %xmm0, %xmm3 /* z*S3 */
167 lea -8(%esp), %esp /* Borrow 4 bytes of stack frame */
168 addsd MO1(DP_S2), %xmm2 /* S2+z*S4 */
169 mulsd %xmm0, %xmm2 /* z*(S2+z*S4) */
170 addsd MO1(DP_S1), %xmm3 /* S1+z*S3 */
171 mulsd %xmm0, %xmm3 /* z*(S1+z*S3) */
172 addsd MO1(DP_S0), %xmm2 /* S0+z*(S2+z*S4) */
173 mulsd %xmm1, %xmm2 /* y*(S0+z*(S2+z*S4)) */
174 /* t*s, where s = sign(x) * (-1.0)^((n>>2)&1) */
175 mulsd MO2(DP_ONES,%eax,8), %xmm4
176 addsd %xmm2, %xmm3 /* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
177 /* t*s*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
178 mulsd %xmm4, %xmm3
179 /* t*s*(1.0+y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
180 addsd %xmm4, %xmm3
181 movsd %xmm3, 0(%esp) /* Move result from sse... */
182 fldl 0(%esp) /* ...to FPU. */
183 /* Return back 4 bytes of stack frame */
184 lea 8(%esp), %esp
80ccd52c 185 RETURN
4ffffbd2
LD
186
187 .p2align 4
188L(large_args):
189 /* Here if |x|>=9*Pi/4 */
190 cmpl $0x7f800000, %eax /* x is Inf or NaN? */
191 jae L(arg_inf_or_nan)
192
193 /* Here if finite |x|>=9*Pi/4 */
194 cmpl $0x4b000000, %eax /* |x|<2^23? */
195 jae L(very_large_args)
196
197 /* Here if 9*Pi/4<=|x|<2^23 */
198 movsd MO1(DP_INVPIO4), %xmm1 /* 1/(Pi/4) */
199 mulsd %xmm0, %xmm1 /* |x|/(Pi/4) */
200 cvttsd2si %xmm1, %eax /* k=trunc(|x|/(Pi/4)) */
201 addl $1, %eax /* k+1 */
202 movl %eax, %edx
203 andl $0xfffffffe, %edx /* j=(k+1)&0xfffffffe */
204 cvtsi2sdl %edx, %xmm4 /* DP j */
205 movsd MO1(DP_PIO4HI), %xmm2 /* -PIO4HI = high part of -Pi/4 */
206 mulsd %xmm4, %xmm2 /* -j*PIO4HI */
207 movsd MO1(DP_PIO4LO), %xmm3 /* -PIO4LO = low part of -Pi/4 */
208 addsd %xmm2, %xmm0 /* |x| - j*PIO4HI */
209 addl $2, %eax /* n */
210 mulsd %xmm3, %xmm4 /* j*PIO4LO */
211 addsd %xmm4, %xmm0 /* t = |x| - j*PIO4HI - j*PIO4LO */
212 jmp L(reconstruction)
213
214 .p2align 4
215L(very_large_args):
216 /* Here if finite |x|>=2^23 */
217
218 /* bitpos = (ix>>23) - BIAS_32 + 59; */
219 shrl $23, %eax /* eb = biased exponent of x */
220 /* bitpos = eb - 0x7f + 59, where 0x7f is exponent bias */
221 subl $68, %eax
222 movl $28, %ecx /* %cl=28 */
223 movl %eax, %edx /* bitpos copy */
224
225 /* j = bitpos/28; */
226 div %cl /* j in register %al=%ax/%cl */
227 movapd %xmm0, %xmm3 /* |x| */
228 /* clear unneeded remainder from %ah */
229 andl $0xff, %eax
230
231 imull $28, %eax, %ecx /* j*28 */
232 movsd MO1(DP_HI_MASK), %xmm4 /* DP_HI_MASK */
233 movapd %xmm0, %xmm5 /* |x| */
234 mulsd -2*8+MO2(_FPI,%eax,8), %xmm3 /* tmp3 = FPI[j-2]*|x| */
235 movapd %xmm0, %xmm1 /* |x| */
236 mulsd -1*8+MO2(_FPI,%eax,8), %xmm5 /* tmp2 = FPI[j-1]*|x| */
237 mulsd 0*8+MO2(_FPI,%eax,8), %xmm0 /* tmp0 = FPI[j]*|x| */
238 addl $19, %ecx /* j*28+19 */
239 mulsd 1*8+MO2(_FPI,%eax,8), %xmm1 /* tmp1 = FPI[j+1]*|x| */
240 cmpl %ecx, %edx /* bitpos>=j*28+19? */
241 jl L(very_large_skip1)
242
243 /* Here if bitpos>=j*28+19 */
244 andpd %xmm3, %xmm4 /* HI(tmp3) */
245 subsd %xmm4, %xmm3 /* tmp3 = tmp3 - HI(tmp3) */
246L(very_large_skip1):
247
248 movsd MO1(DP_2POW52), %xmm6
249 movapd %xmm5, %xmm2 /* tmp2 copy */
250 addsd %xmm3, %xmm5 /* tmp5 = tmp3 + tmp2 */
251 movl $1, %edx
252 addsd %xmm5, %xmm6 /* tmp6 = tmp5 + 2^52 */
253 movsd 8+MO1(DP_2POW52), %xmm4
254 movd %xmm6, %eax /* k = I64_LO(tmp6); */
255 addsd %xmm6, %xmm4 /* tmp4 = tmp6 - 2^52 */
256 comisd %xmm5, %xmm4 /* tmp4 > tmp5? */
257 jbe L(very_large_skip2)
258
259 /* Here if tmp4 > tmp5 */
260 subl $1, %eax /* k-- */
261 addsd 8+MO1(DP_ONES), %xmm4 /* tmp4 -= 1.0 */
262L(very_large_skip2):
263
264 andl %eax, %edx /* k&1 */
265 subsd %xmm4, %xmm3 /* tmp3 -= tmp4 */
266 addsd MO2(DP_ZERONE,%edx,8), %xmm3 /* t = DP_ZERONE[k&1] + tmp3 */
267 addsd %xmm2, %xmm3 /* t += tmp2 */
268 addsd %xmm3, %xmm0 /* t += tmp0 */
269 addl $3, %eax /* n=k+3 */
270 addsd %xmm1, %xmm0 /* t += tmp1 */
271 mulsd MO1(DP_PIO4), %xmm0 /* t *= PI04 */
272
273 jmp L(reconstruction) /* end of very_large_args peth */
274
4ffffbd2
LD
275 .p2align 4
276L(arg_less_pio4):
277 /* Here if |x|<Pi/4 */
278 cmpl $0x3d000000, %eax /* |x|<2^-5? */
279 jl L(arg_less_2pn5)
280
281 /* Here if 2^-5<=|x|<Pi/4 */
282 mulsd %xmm0, %xmm0 /* y=x^2 */
283 movaps %xmm0, %xmm1 /* y */
284 mulsd %xmm0, %xmm0 /* z=x^4 */
285 movsd MO1(DP_C4), %xmm3 /* C4 */
286 mulsd %xmm0, %xmm3 /* z*C4 */
287 movsd MO1(DP_C3), %xmm5 /* C3 */
288 mulsd %xmm0, %xmm5 /* z*C3 */
289 addsd MO1(DP_C2), %xmm3 /* C2+z*C4 */
290 mulsd %xmm0, %xmm3 /* z*(C2+z*C4) */
291 addsd MO1(DP_C1), %xmm5 /* C1+z*C3 */
292 mulsd %xmm0, %xmm5 /* z*(C1+z*C3) */
293 addsd MO1(DP_C0), %xmm3 /* C0+z*(C2+z*C4) */
294 mulsd %xmm1, %xmm3 /* y*(C0+z*(C2+z*C4)) */
295 addsd %xmm5, %xmm3 /* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
296 /* 1.0 + y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
297 addsd MO1(DP_ONES), %xmm3
298 cvtsd2ss %xmm3, %xmm3 /* SP result */
299
300L(epilogue):
301 lea -4(%esp), %esp /* Borrow 4 bytes of stack frame */
302 movss %xmm3, 0(%esp) /* Move result from sse... */
303 flds 0(%esp) /* ...to FPU. */
304 /* Return back 4 bytes of stack frame */
305 lea 4(%esp), %esp
80ccd52c 306 RETURN
4ffffbd2
LD
307
308 .p2align 4
309L(arg_less_2pn5):
310 /* Here if |x|<2^-5 */
311 cmpl $0x32000000, %eax /* |x|<2^-27? */
312 jl L(arg_less_2pn27)
313
314 /* Here if 2^-27<=|x|<2^-5 */
315 mulsd %xmm0, %xmm0 /* DP x^2 */
316 movsd MO1(DP_COS2_1), %xmm3 /* DP DP_COS2_1 */
317 mulsd %xmm0, %xmm3 /* DP x^2*DP_COS2_1 */
318 addsd MO1(DP_COS2_0), %xmm3 /* DP DP_COS2_0+x^2*DP_COS2_1 */
319 mulsd %xmm0, %xmm3 /* DP x^2*DP_COS2_0+x^4*DP_COS2_1 */
320 /* DP 1.0+x^2*DP_COS2_0+x^4*DP_COS2_1 */
321 addsd MO1(DP_ONES), %xmm3
322 cvtsd2ss %xmm3, %xmm3 /* SP result */
323 jmp L(epilogue)
324
325 .p2align 4
326L(arg_less_2pn27):
327 /* Here if |x|<2^-27 */
328 movss ARG_X, %xmm0 /* x */
329 andps MO1(SP_ABS_MASK),%xmm0 /* |x| */
330 movss MO1(SP_ONE), %xmm3 /* 1.0 */
331 subss %xmm0, %xmm3 /* result is 1.0-|x| */
332 jmp L(epilogue)
333
334 .p2align 4
335L(arg_inf_or_nan):
336 /* Here if |x| is Inf or NAN */
337 jne L(skip_errno_setting) /* in case of x is NaN */
338
339 /* Here if x is Inf. Set errno to EDOM. */
340 call JUMPTARGET(__errno_location)
341 movl $EDOM, (%eax)
342
343 .p2align 4
344L(skip_errno_setting):
345 /* Here if |x| is Inf or NAN. Continued. */
346 movss ARG_X, %xmm3 /* load x */
347 subss %xmm3, %xmm3 /* Result is NaN */
348 jmp L(epilogue)
349END(__cosf_sse2)
350
4ffffbd2
LD
351 .section .rodata, "a"
352 .p2align 3
353L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
354 .long 0x00000000,0x00000000
355 .long 0x54442d18,0x3fe921fb
356 .long 0x54442d18,0x3ff921fb
357 .long 0x7f3321d2,0x4002d97c
358 .long 0x54442d18,0x400921fb
359 .long 0x2955385e,0x400f6a7a
360 .long 0x7f3321d2,0x4012d97c
361 .long 0xe9bba775,0x4015fdbb
362 .long 0x54442d18,0x401921fb
363 .long 0xbeccb2bb,0x401c463a
364 .long 0x2955385e,0x401f6a7a
365 .type L(PIO4J), @object
366 ASM_SIZE_DIRECTIVE(L(PIO4J))
367
368 .p2align 3
369L(_FPI): /* 4/Pi broken into sum of positive DP values */
370 .long 0x00000000,0x00000000
371 .long 0x6c000000,0x3ff45f30
372 .long 0x2a000000,0x3e3c9c88
373 .long 0xa8000000,0x3c54fe13
374 .long 0xd0000000,0x3aaf47d4
375 .long 0x6c000000,0x38fbb81b
376 .long 0xe0000000,0x3714acc9
377 .long 0x7c000000,0x3560e410
378 .long 0x56000000,0x33bca2c7
379 .long 0xac000000,0x31fbd778
380 .long 0xe0000000,0x300b7246
381 .long 0xe8000000,0x2e5d2126
382 .long 0x48000000,0x2c970032
383 .long 0xe8000000,0x2ad77504
384 .long 0xe0000000,0x290921cf
385 .long 0xb0000000,0x274deb1c
386 .long 0xe0000000,0x25829a73
387 .long 0xbe000000,0x23fd1046
388 .long 0x10000000,0x2224baed
389 .long 0x8e000000,0x20709d33
390 .long 0x80000000,0x1e535a2f
391 .long 0x64000000,0x1cef904e
392 .long 0x30000000,0x1b0d6398
393 .long 0x24000000,0x1964ce7d
394 .long 0x16000000,0x17b908bf
395 .type L(_FPI), @object
396 ASM_SIZE_DIRECTIVE(L(_FPI))
397
398/* Coefficients of polynomial
399 for cos(x)~=1.0+x^2*DP_COS2_0+x^4*DP_COS2_1, |x|<2^-5. */
400 .p2align 3
401L(DP_COS2_0):
402 .long 0xff5cc6fd,0xbfdfffff
403 .type L(DP_COS2_0), @object
404 ASM_SIZE_DIRECTIVE(L(DP_COS2_0))
405
406 .p2align 3
407L(DP_COS2_1):
408 .long 0xb178dac5,0x3fa55514
409 .type L(DP_COS2_1), @object
410 ASM_SIZE_DIRECTIVE(L(DP_COS2_1))
411
412 .p2align 3
413L(DP_ZERONE):
414 .long 0x00000000,0x00000000 /* 0.0 */
415 .long 0x00000000,0xbff00000 /* 1.0 */
416 .type L(DP_ZERONE),@object
417 ASM_SIZE_DIRECTIVE(L(DP_ZERONE))
418
419 .p2align 3
420L(DP_ONES):
421 .long 0x00000000,0x3ff00000 /* +1.0 */
422 .long 0x00000000,0xbff00000 /* -1.0 */
423 .type L(DP_ONES), @object
424 ASM_SIZE_DIRECTIVE(L(DP_ONES))
425
426/* Coefficients of polynomial
427 for sin(t)~=t+t^3*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))), |t|<Pi/4. */
428 .p2align 3
429L(DP_S3):
430 .long 0x64e6b5b4,0x3ec71d72
431 .type L(DP_S3), @object
432 ASM_SIZE_DIRECTIVE(L(DP_S3))
433
434 .p2align 3
435L(DP_S1):
436 .long 0x10c2688b,0x3f811111
437 .type L(DP_S1), @object
438 ASM_SIZE_DIRECTIVE(L(DP_S1))
439
440 .p2align 3
441L(DP_S4):
442 .long 0x1674b58a,0xbe5a947e
443 .type L(DP_S4), @object
444 ASM_SIZE_DIRECTIVE(L(DP_S4))
445
446 .p2align 3
447L(DP_S2):
448 .long 0x8b4bd1f9,0xbf2a019f
449 .type L(DP_S2), @object
450 ASM_SIZE_DIRECTIVE(L(DP_S2))
451
452 .p2align 3
453L(DP_S0):
454 .long 0x55551cd9,0xbfc55555
455 .type L(DP_S0), @object
456 ASM_SIZE_DIRECTIVE(L(DP_S0))
457
458/* Coefficients of polynomial
459 for cos(t)~=1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))), |t|<Pi/4. */
460 .p2align 3
461L(DP_C3):
462 .long 0x9ac43cc0,0x3efa00eb
463 .type L(DP_C3), @object
464 ASM_SIZE_DIRECTIVE(L(DP_C3))
465
466 .p2align 3
467L(DP_C1):
468 .long 0x545c50c7,0x3fa55555
469 .type L(DP_C1), @object
470 ASM_SIZE_DIRECTIVE(L(DP_C1))
471
472 .p2align 3
473L(DP_C4):
474 .long 0xdd8844d7,0xbe923c97
475 .type L(DP_C4), @object
476 ASM_SIZE_DIRECTIVE(L(DP_C4))
477
478 .p2align 3
479L(DP_C2):
480 .long 0x348b6874,0xbf56c16b
481 .type L(DP_C2), @object
482 ASM_SIZE_DIRECTIVE(L(DP_C2))
483
484 .p2align 3
485L(DP_C0):
486 .long 0xfffe98ae,0xbfdfffff
487 .type L(DP_C0), @object
488 ASM_SIZE_DIRECTIVE(L(DP_C0))
489
490 .p2align 3
491L(DP_PIO4):
492 .long 0x54442d18,0x3fe921fb /* Pi/4 */
493 .type L(DP_PIO4), @object
494 ASM_SIZE_DIRECTIVE(L(DP_PIO4))
495
496 .p2align 3
497L(DP_2POW52):
498 .long 0x00000000,0x43300000 /* +2^52 */
499 .long 0x00000000,0xc3300000 /* -2^52 */
500 .type L(DP_2POW52), @object
501 ASM_SIZE_DIRECTIVE(L(DP_2POW52))
502
503 .p2align 3
504L(DP_INVPIO4):
505 .long 0x6dc9c883,0x3ff45f30 /* 4/Pi */
506 .type L(DP_INVPIO4), @object
507 ASM_SIZE_DIRECTIVE(L(DP_INVPIO4))
508
509 .p2align 3
510L(DP_PIO4HI):
511 .long 0x54000000,0xbfe921fb /* High part of Pi/4 */
512 .type L(DP_PIO4HI), @object
513 ASM_SIZE_DIRECTIVE(L(DP_PIO4HI))
514
515 .p2align 3
516L(DP_PIO4LO):
517 .long 0x11A62633,0xbe010b46 /* Low part of Pi/4 */
518 .type L(DP_PIO4LO), @object
519 ASM_SIZE_DIRECTIVE(L(DP_PIO4LO))
520
521 .p2align 2
522L(SP_INVPIO4):
523 .long 0x3fa2f983 /* 4/Pi */
524 .type L(SP_INVPIO4), @object
525 ASM_SIZE_DIRECTIVE(L(SP_INVPIO4))
526
527 .p2align 4
528L(DP_ABS_MASK): /* Mask for getting DP absolute value */
529 .long 0xffffffff,0x7fffffff
530 .long 0xffffffff,0x7fffffff
531 .type L(DP_ABS_MASK), @object
532 ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
533
534 .p2align 3
535L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
536 .long 0x00000000,0xffffffff
80ccd52c
LD
537 .type L(DP_HI_MASK), @object
538 ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
4ffffbd2
LD
539
540 .p2align 4
541L(SP_ABS_MASK): /* Mask for getting SP absolute value */
542 .long 0x7fffffff,0x7fffffff
543 .long 0x7fffffff,0x7fffffff
544 .type L(SP_ABS_MASK), @object
545 ASM_SIZE_DIRECTIVE(L(SP_ABS_MASK))
546
547 .p2align 2
548L(SP_ONE):
549 .long 0x3f800000 /* 1.0 */
550 .type L(SP_ONE), @object
551 ASM_SIZE_DIRECTIVE(L(SP_ONE))
552
553weak_alias (__cosf, cosf)