]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S
Update copyright notices with scripts/update-copyrights
[thirdparty/glibc.git] / sysdeps / i386 / i686 / fpu / multiarch / s_sinf-sse2.S
1 /* Optimized with sse2 version of sinf
2 Copyright (C) 2012-2014 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
18
19 #include <sysdep.h>
20 #define __need_Emath
21 #include <bits/errno.h>
22
23 /* Short algorithm description:
24 *
25 * 1) if |x| == 0: return x.
26 * 2) if |x| < 2^-27: return x-x*DP_SMALL, raise underflow only when needed.
27 * 3) if |x| < 2^-5 : return x+x^3*DP_SIN2_0+x^5*DP_SIN2_1.
28 * 4) if |x| < Pi/4: return x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))).
29 * 5) if |x| < 9*Pi/4:
30 * 5.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+1,
31 * t=|x|-j*Pi/4.
32 * 5.2) Reconstruction:
33 * s = sign(x) * (-1.0)^((n>>2)&1)
34 * if(n&2 != 0) {
35 * using cos(t) polynomial for |t|<Pi/4, result is
36 * s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4))))).
37 * } else {
38 * using sin(t) polynomial for |t|<Pi/4, result is
39 * s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4))))).
40 * }
41 * 6) if |x| < 2^23, large args:
42 * 6.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1,
43 * t=|x|-j*Pi/4.
44 * 6.2) Reconstruction same as (5.2).
45 * 7) if |x| >= 2^23, very large args:
46 * 7.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1,
47 * t=|x|-j*Pi/4.
48 * 7.2) Reconstruction same as (5.2).
49 * 8) if x is Inf, return x-x, and set errno=EDOM.
50 * 9) if x is NaN, return x-x.
51 *
52 * Special cases:
53 * sin(+-0) = +-0 not raising inexact/underflow,
54 * sin(subnormal) raises inexact/underflow,
55 * sin(min_normalized) raises inexact/underflow,
56 * sin(normalized) raises inexact,
57 * sin(Inf) = NaN, raises invalid, sets errno to EDOM,
58 * sin(NaN) = NaN.
59 */
60
61 #ifdef PIC
62 # define MO1(symbol) L(symbol)##@GOTOFF(%ebx)
63 # define MO2(symbol,reg2,_scale) L(symbol)##@GOTOFF(%ebx,reg2,_scale)
64 # define CFI_PUSH(REG) cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0)
65 # define CFI_POP(REG) cfi_adjust_cfa_offset(-4); cfi_restore(REG)
66 # define PUSH(REG) pushl REG; CFI_PUSH(REG)
67 # define POP(REG) popl REG; CFI_POP(REG)
68 # define ENTRANCE PUSH(%ebx); LOAD_PIC_REG(bx)
69 # define RETURN POP(%ebx); ret; CFI_PUSH(%ebx)
70 # define ARG_X 8(%esp)
71 #else
72 # define MO1(symbol) L(symbol)
73 # define MO2(symbol,reg2,_scale) L(symbol)(,reg2,_scale)
74 # define ENTRANCE
75 # define RETURN ret
76 # define ARG_X 4(%esp)
77 #endif
78
79 .text
80 ENTRY(__sinf_sse2)
81 /* Input: single precision x on stack at address ARG_X */
82
83 ENTRANCE
84 movl ARG_X, %eax /* Bits of x */
85 cvtss2sd ARG_X, %xmm0 /* DP x */
86 andl $0x7fffffff, %eax /* |x| */
87
88 cmpl $0x3f490fdb, %eax /* |x|<Pi/4? */
89 jb L(arg_less_pio4)
90
91 /* Here if |x|>=Pi/4 */
92 movd %eax, %xmm3 /* SP |x| */
93 andpd MO1(DP_ABS_MASK),%xmm0 /* DP |x| */
94 movss MO1(SP_INVPIO4), %xmm2 /* SP 1/(Pi/4) */
95
96 cmpl $0x40e231d6, %eax /* |x|<9*Pi/4? */
97 jae L(large_args)
98
99 /* Here if Pi/4<=|x|<9*Pi/4 */
100 mulss %xmm3, %xmm2 /* SP |x|/(Pi/4) */
101 movl ARG_X, %ecx /* Load x */
102 cvttss2si %xmm2, %eax /* k, number of Pi/4 in x */
103 shrl $31, %ecx /* sign of x */
104 addl $1, %eax /* k+1 */
105 movl $0x0e, %edx
106 andl %eax, %edx /* j = (k+1)&0x0e */
107 subsd MO2(PIO4J,%edx,8), %xmm0 /* t = |x| - j * Pi/4 */
108
109 L(reconstruction):
110 /* Input: %eax=n, %xmm0=t, %ecx=sign(x) */
111 testl $2, %eax /* n&2 != 0? */
112 jz L(sin_poly)
113
114 /*L(cos_poly):*/
115 /* Here if sin(x) calculated using cos(t) polynomial for |t|<Pi/4:
116 * y = t*t; z = y*y;
117 * s = sign(x) * (-1.0)^((n>>2)&1)
118 * result = s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))))
119 */
120 shrl $2, %eax /* n>>2 */
121 mulsd %xmm0, %xmm0 /* y=t^2 */
122 andl $1, %eax /* (n>>2)&1 */
123 movaps %xmm0, %xmm1 /* y */
124 mulsd %xmm0, %xmm0 /* z=t^4 */
125
126 movsd MO1(DP_C4), %xmm4 /* C4 */
127 mulsd %xmm0, %xmm4 /* z*C4 */
128 xorl %eax, %ecx /* (-1.0)^((n>>2)&1) XOR sign(x) */
129 movsd MO1(DP_C3), %xmm3 /* C3 */
130 mulsd %xmm0, %xmm3 /* z*C3 */
131 addsd MO1(DP_C2), %xmm4 /* C2+z*C4 */
132 mulsd %xmm0, %xmm4 /* z*(C2+z*C4) */
133 lea -8(%esp), %esp /* Borrow 4 bytes of stack frame */
134 addsd MO1(DP_C1), %xmm3 /* C1+z*C3 */
135 mulsd %xmm0, %xmm3 /* z*(C1+z*C3) */
136 addsd MO1(DP_C0), %xmm4 /* C0+z*(C2+z*C4) */
137 mulsd %xmm1, %xmm4 /* y*(C0+z*(C2+z*C4)) */
138
139 addsd %xmm4, %xmm3 /* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
140 /* 1.0+y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
141 addsd MO1(DP_ONES), %xmm3
142
143 mulsd MO2(DP_ONES,%ecx,8), %xmm3 /* DP result */
144 movsd %xmm3, 0(%esp) /* Move result from sse... */
145 fldl 0(%esp) /* ...to FPU. */
146 /* Return back 4 bytes of stack frame */
147 lea 8(%esp), %esp
148 RETURN
149
150 .p2align 4
151 L(sin_poly):
152 /* Here if sin(x) calculated using sin(t) polynomial for |t|<Pi/4:
153 * y = t*t; z = y*y;
154 * s = sign(x) * (-1.0)^((n>>2)&1)
155 * result = s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))))
156 */
157
158 movaps %xmm0, %xmm4 /* t */
159 shrl $2, %eax /* n>>2 */
160 mulsd %xmm0, %xmm0 /* y=t^2 */
161 andl $1, %eax /* (n>>2)&1 */
162 movaps %xmm0, %xmm1 /* y */
163 xorl %eax, %ecx /* (-1.0)^((n>>2)&1) XOR sign(x) */
164 mulsd %xmm0, %xmm0 /* z=t^4 */
165
166 movsd MO1(DP_S4), %xmm2 /* S4 */
167 mulsd %xmm0, %xmm2 /* z*S4 */
168 movsd MO1(DP_S3), %xmm3 /* S3 */
169 mulsd %xmm0, %xmm3 /* z*S3 */
170 lea -8(%esp), %esp /* Borrow 4 bytes of stack frame */
171 addsd MO1(DP_S2), %xmm2 /* S2+z*S4 */
172 mulsd %xmm0, %xmm2 /* z*(S2+z*S4) */
173 addsd MO1(DP_S1), %xmm3 /* S1+z*S3 */
174 mulsd %xmm0, %xmm3 /* z*(S1+z*S3) */
175 addsd MO1(DP_S0), %xmm2 /* S0+z*(S2+z*S4) */
176 mulsd %xmm1, %xmm2 /* y*(S0+z*(S2+z*S4)) */
177 /* t*s, where s = sign(x) * (-1.0)^((n>>2)&1) */
178 mulsd MO2(DP_ONES,%ecx,8), %xmm4
179 addsd %xmm2, %xmm3 /* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
180 /* t*s*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
181 mulsd %xmm4, %xmm3
182 /* t*s*(1.0+y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
183 addsd %xmm4, %xmm3
184 movsd %xmm3, 0(%esp) /* Move result from sse... */
185 fldl 0(%esp) /* ...to FPU. */
186 /* Return back 4 bytes of stack frame */
187 lea 8(%esp), %esp
188 RETURN
189
190 .p2align 4
191 L(large_args):
192 /* Here if |x|>=9*Pi/4 */
193 cmpl $0x7f800000, %eax /* x is Inf or NaN? */
194 jae L(arg_inf_or_nan)
195
196 /* Here if finite |x|>=9*Pi/4 */
197 cmpl $0x4b000000, %eax /* |x|<2^23? */
198 jae L(very_large_args)
199
200 /* Here if 9*Pi/4<=|x|<2^23 */
201 movsd MO1(DP_INVPIO4), %xmm1 /* 1/(Pi/4) */
202 mulsd %xmm0, %xmm1 /* |x|/(Pi/4) */
203 cvttsd2si %xmm1, %eax /* k=trunc(|x|/(Pi/4)) */
204 addl $1, %eax /* k+1 */
205 movl %eax, %edx
206 andl $0xfffffffe, %edx /* j=(k+1)&0xfffffffe */
207 cvtsi2sdl %edx, %xmm4 /* DP j */
208 movl ARG_X, %ecx /* Load x */
209 movsd MO1(DP_PIO4HI), %xmm2 /* -PIO4HI = high part of -Pi/4 */
210 shrl $31, %ecx /* sign bit of x */
211 mulsd %xmm4, %xmm2 /* -j*PIO4HI */
212 movsd MO1(DP_PIO4LO), %xmm3 /* -PIO4LO = low part of -Pi/4 */
213 addsd %xmm2, %xmm0 /* |x| - j*PIO4HI */
214 mulsd %xmm3, %xmm4 /* j*PIO4LO */
215 addsd %xmm4, %xmm0 /* t = |x| - j*PIO4HI - j*PIO4LO */
216 jmp L(reconstruction)
217
218 .p2align 4
219 L(very_large_args):
220 /* Here if finite |x|>=2^23 */
221
222 /* bitpos = (ix>>23) - BIAS_32 + 59; */
223 shrl $23, %eax /* eb = biased exponent of x */
224 /* bitpos = eb - 0x7f + 59, where 0x7f is exponent bias */
225 subl $68, %eax
226 movl $28, %ecx /* %cl=28 */
227 movl %eax, %edx /* bitpos copy */
228
229 /* j = bitpos/28; */
230 div %cl /* j in register %al=%ax/%cl */
231 movapd %xmm0, %xmm3 /* |x| */
232 /* clear unneeded remainder from %ah */
233 andl $0xff, %eax
234
235 imull $28, %eax, %ecx /* j*28 */
236 movsd MO1(DP_HI_MASK), %xmm4 /* DP_HI_MASK */
237 movapd %xmm0, %xmm5 /* |x| */
238 mulsd -2*8+MO2(_FPI,%eax,8), %xmm3 /* tmp3 = FPI[j-2]*|x| */
239 movapd %xmm0, %xmm1 /* |x| */
240 mulsd -1*8+MO2(_FPI,%eax,8), %xmm5 /* tmp2 = FPI[j-1]*|x| */
241 mulsd 0*8+MO2(_FPI,%eax,8), %xmm0 /* tmp0 = FPI[j]*|x| */
242 addl $19, %ecx /* j*28+19 */
243 mulsd 1*8+MO2(_FPI,%eax,8), %xmm1 /* tmp1 = FPI[j+1]*|x| */
244 cmpl %ecx, %edx /* bitpos>=j*28+19? */
245 jl L(very_large_skip1)
246
247 /* Here if bitpos>=j*28+19 */
248 andpd %xmm3, %xmm4 /* HI(tmp3) */
249 subsd %xmm4, %xmm3 /* tmp3 = tmp3 - HI(tmp3) */
250 L(very_large_skip1):
251
252 movsd MO1(DP_2POW52), %xmm6
253 movapd %xmm5, %xmm2 /* tmp2 copy */
254 addsd %xmm3, %xmm5 /* tmp5 = tmp3 + tmp2 */
255 movl $1, %edx
256 addsd %xmm5, %xmm6 /* tmp6 = tmp5 + 2^52 */
257 movsd 8+MO1(DP_2POW52), %xmm4
258 movd %xmm6, %eax /* k = I64_LO(tmp6); */
259 addsd %xmm6, %xmm4 /* tmp4 = tmp6 - 2^52 */
260 movl ARG_X, %ecx /* Load x */
261 comisd %xmm5, %xmm4 /* tmp4 > tmp5? */
262 jbe L(very_large_skip2)
263
264 /* Here if tmp4 > tmp5 */
265 subl $1, %eax /* k-- */
266 addsd 8+MO1(DP_ONES), %xmm4 /* tmp4 -= 1.0 */
267 L(very_large_skip2):
268
269 andl %eax, %edx /* k&1 */
270 subsd %xmm4, %xmm3 /* tmp3 -= tmp4 */
271 addsd MO2(DP_ZERONE,%edx,8), %xmm3 /* t = DP_ZERONE[k&1] + tmp3 */
272 addsd %xmm2, %xmm3 /* t += tmp2 */
273 shrl $31, %ecx /* sign of x */
274 addsd %xmm3, %xmm0 /* t += tmp0 */
275 addl $1, %eax /* n=k+1 */
276 addsd %xmm1, %xmm0 /* t += tmp1 */
277 mulsd MO1(DP_PIO4), %xmm0 /* t *= PI04 */
278
279 jmp L(reconstruction) /* end of very_large_args peth */
280
281 .p2align 4
282 L(arg_less_pio4):
283 /* Here if |x|<Pi/4 */
284 cmpl $0x3d000000, %eax /* |x|<2^-5? */
285 jl L(arg_less_2pn5)
286
287 /* Here if 2^-5<=|x|<Pi/4 */
288 movaps %xmm0, %xmm3 /* x */
289 mulsd %xmm0, %xmm0 /* y=x^2 */
290 movaps %xmm0, %xmm1 /* y */
291 mulsd %xmm0, %xmm0 /* z=x^4 */
292 movsd MO1(DP_S4), %xmm4 /* S4 */
293 mulsd %xmm0, %xmm4 /* z*S4 */
294 movsd MO1(DP_S3), %xmm5 /* S3 */
295 mulsd %xmm0, %xmm5 /* z*S3 */
296 addsd MO1(DP_S2), %xmm4 /* S2+z*S4 */
297 mulsd %xmm0, %xmm4 /* z*(S2+z*S4) */
298 addsd MO1(DP_S1), %xmm5 /* S1+z*S3 */
299 mulsd %xmm0, %xmm5 /* z*(S1+z*S3) */
300 addsd MO1(DP_S0), %xmm4 /* S0+z*(S2+z*S4) */
301 mulsd %xmm1, %xmm4 /* y*(S0+z*(S2+z*S4)) */
302 mulsd %xmm3, %xmm5 /* x*z*(S1+z*S3) */
303 mulsd %xmm3, %xmm4 /* x*y*(S0+z*(S2+z*S4)) */
304 /* x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
305 addsd %xmm5, %xmm4
306 /* x + x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
307 addsd %xmm4, %xmm3
308 cvtsd2ss %xmm3, %xmm3 /* SP result */
309
310 L(epilogue):
311 lea -4(%esp), %esp /* Borrow 4 bytes of stack frame */
312 movss %xmm3, 0(%esp) /* Move result from sse... */
313 flds 0(%esp) /* ...to FPU. */
314 /* Return back 4 bytes of stack frame */
315 lea 4(%esp), %esp
316 RETURN
317
318 .p2align 4
319 L(arg_less_2pn5):
320 /* Here if |x|<2^-5 */
321 cmpl $0x32000000, %eax /* |x|<2^-27? */
322 jl L(arg_less_2pn27)
323
324 /* Here if 2^-27<=|x|<2^-5 */
325 movaps %xmm0, %xmm1 /* DP x */
326 mulsd %xmm0, %xmm0 /* DP x^2 */
327 movsd MO1(DP_SIN2_1), %xmm3 /* DP DP_SIN2_1 */
328 mulsd %xmm0, %xmm3 /* DP x^2*DP_SIN2_1 */
329 addsd MO1(DP_SIN2_0), %xmm3 /* DP DP_SIN2_0+x^2*DP_SIN2_1 */
330 mulsd %xmm0, %xmm3 /* DP x^2*DP_SIN2_0+x^4*DP_SIN2_1 */
331 mulsd %xmm1, %xmm3 /* DP x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
332 addsd %xmm1, %xmm3 /* DP x+x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
333 cvtsd2ss %xmm3, %xmm3 /* SP result */
334 jmp L(epilogue)
335
336 .p2align 4
337 L(arg_less_2pn27):
338 movss ARG_X, %xmm3 /* SP x */
339 cmpl $0, %eax /* x=0? */
340 je L(epilogue) /* in case x=0 return sin(+-0)==+-0 */
341 /* Here if |x|<2^-27 */
342 /*
343 * Special cases here:
344 * sin(subnormal) raises inexact/underflow
345 * sin(min_normalized) raises inexact/underflow
346 * sin(normalized) raises inexact
347 */
348 movaps %xmm0, %xmm3 /* Copy of DP x */
349 mulsd MO1(DP_SMALL), %xmm0 /* x*DP_SMALL */
350 subsd %xmm0, %xmm3 /* Result is x-x*DP_SMALL */
351 cvtsd2ss %xmm3, %xmm3 /* Result converted to SP */
352 jmp L(epilogue)
353
354 .p2align 4
355 L(arg_inf_or_nan):
356 /* Here if |x| is Inf or NAN */
357 jne L(skip_errno_setting) /* in case of x is NaN */
358
359 /* Here if x is Inf. Set errno to EDOM. */
360 call JUMPTARGET(__errno_location)
361 movl $EDOM, (%eax)
362
363 .p2align 4
364 L(skip_errno_setting):
365 /* Here if |x| is Inf or NAN. Continued. */
366 movss ARG_X, %xmm3 /* load x */
367 subss %xmm3, %xmm3 /* Result is NaN */
368 jmp L(epilogue)
369 END(__sinf_sse2)
370
371 .section .rodata, "a"
372 .p2align 3
373 L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
374 .long 0x00000000,0x00000000
375 .long 0x54442d18,0x3fe921fb
376 .long 0x54442d18,0x3ff921fb
377 .long 0x7f3321d2,0x4002d97c
378 .long 0x54442d18,0x400921fb
379 .long 0x2955385e,0x400f6a7a
380 .long 0x7f3321d2,0x4012d97c
381 .long 0xe9bba775,0x4015fdbb
382 .long 0x54442d18,0x401921fb
383 .long 0xbeccb2bb,0x401c463a
384 .long 0x2955385e,0x401f6a7a
385 .type L(PIO4J), @object
386 ASM_SIZE_DIRECTIVE(L(PIO4J))
387
388 .p2align 3
389 L(_FPI): /* 4/Pi broken into sum of positive DP values */
390 .long 0x00000000,0x00000000
391 .long 0x6c000000,0x3ff45f30
392 .long 0x2a000000,0x3e3c9c88
393 .long 0xa8000000,0x3c54fe13
394 .long 0xd0000000,0x3aaf47d4
395 .long 0x6c000000,0x38fbb81b
396 .long 0xe0000000,0x3714acc9
397 .long 0x7c000000,0x3560e410
398 .long 0x56000000,0x33bca2c7
399 .long 0xac000000,0x31fbd778
400 .long 0xe0000000,0x300b7246
401 .long 0xe8000000,0x2e5d2126
402 .long 0x48000000,0x2c970032
403 .long 0xe8000000,0x2ad77504
404 .long 0xe0000000,0x290921cf
405 .long 0xb0000000,0x274deb1c
406 .long 0xe0000000,0x25829a73
407 .long 0xbe000000,0x23fd1046
408 .long 0x10000000,0x2224baed
409 .long 0x8e000000,0x20709d33
410 .long 0x80000000,0x1e535a2f
411 .long 0x64000000,0x1cef904e
412 .long 0x30000000,0x1b0d6398
413 .long 0x24000000,0x1964ce7d
414 .long 0x16000000,0x17b908bf
415 .type L(_FPI), @object
416 ASM_SIZE_DIRECTIVE(L(_FPI))
417
418 /* Coefficients of polynomial
419 for sin(x)~=x+x^3*DP_SIN2_0+x^5*DP_SIN2_1, |x|<2^-5. */
420 .p2align 3
421 L(DP_SIN2_0):
422 .long 0x5543d49d,0xbfc55555
423 .type L(DP_SIN2_0), @object
424 ASM_SIZE_DIRECTIVE(L(DP_SIN2_0))
425
426 .p2align 3
427 L(DP_SIN2_1):
428 .long 0x75cec8c5,0x3f8110f4
429 .type L(DP_SIN2_1), @object
430 ASM_SIZE_DIRECTIVE(L(DP_SIN2_1))
431
432 .p2align 3
433 L(DP_ZERONE):
434 .long 0x00000000,0x00000000 /* 0.0 */
435 .long 0x00000000,0xbff00000 /* 1.0 */
436 .type L(DP_ZERONE), @object
437 ASM_SIZE_DIRECTIVE(L(DP_ZERONE))
438
439 .p2align 3
440 L(DP_ONES):
441 .long 0x00000000,0x3ff00000 /* +1.0 */
442 .long 0x00000000,0xbff00000 /* -1.0 */
443 .type L(DP_ONES), @object
444 ASM_SIZE_DIRECTIVE(L(DP_ONES))
445
446 /* Coefficients of polynomial
447 for sin(t)~=t+t^3*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))), |t|<Pi/4. */
448 .p2align 3
449 L(DP_S3):
450 .long 0x64e6b5b4,0x3ec71d72
451 .type L(DP_S3), @object
452 ASM_SIZE_DIRECTIVE(L(DP_S3))
453
454 .p2align 3
455 L(DP_S1):
456 .long 0x10c2688b,0x3f811111
457 .type L(DP_S1), @object
458 ASM_SIZE_DIRECTIVE(L(DP_S1))
459
460 .p2align 3
461 L(DP_S4):
462 .long 0x1674b58a,0xbe5a947e
463 .type L(DP_S4), @object
464 ASM_SIZE_DIRECTIVE(L(DP_S4))
465
466 .p2align 3
467 L(DP_S2):
468 .long 0x8b4bd1f9,0xbf2a019f
469 .type L(DP_S2), @object
470 ASM_SIZE_DIRECTIVE(L(DP_S2))
471
472 .p2align 3
473 L(DP_S0):
474 .long 0x55551cd9,0xbfc55555
475 .type L(DP_S0), @object
476 ASM_SIZE_DIRECTIVE(L(DP_S0))
477
478 .p2align 3
479 L(DP_SMALL):
480 .long 0x00000000,0x3cd00000 /* 2^(-50) */
481 .type L(DP_SMALL), @object
482 ASM_SIZE_DIRECTIVE(L(DP_SMALL))
483
484 /* Coefficients of polynomial
485 for cos(t)~=1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))), |t|<Pi/4. */
486 .p2align 3
487 L(DP_C3):
488 .long 0x9ac43cc0,0x3efa00eb
489 .type L(DP_C3), @object
490 ASM_SIZE_DIRECTIVE(L(DP_C3))
491
492 .p2align 3
493 L(DP_C1):
494 .long 0x545c50c7,0x3fa55555
495 .type L(DP_C1), @object
496 ASM_SIZE_DIRECTIVE(L(DP_C1))
497
498 .p2align 3
499 L(DP_C4):
500 .long 0xdd8844d7,0xbe923c97
501 .type L(DP_C4), @object
502 ASM_SIZE_DIRECTIVE(L(DP_C4))
503
504 .p2align 3
505 L(DP_C2):
506 .long 0x348b6874,0xbf56c16b
507 .type L(DP_C2), @object
508 ASM_SIZE_DIRECTIVE(L(DP_C2))
509
510 .p2align 3
511 L(DP_C0):
512 .long 0xfffe98ae,0xbfdfffff
513 .type L(DP_C0), @object
514 ASM_SIZE_DIRECTIVE(L(DP_C0))
515
516 .p2align 3
517 L(DP_PIO4):
518 .long 0x54442d18,0x3fe921fb /* Pi/4 */
519 .type L(DP_PIO4), @object
520 ASM_SIZE_DIRECTIVE(L(DP_PIO4))
521
522 .p2align 3
523 L(DP_2POW52):
524 .long 0x00000000,0x43300000 /* +2^52 */
525 .long 0x00000000,0xc3300000 /* -2^52 */
526 .type L(DP_2POW52), @object
527 ASM_SIZE_DIRECTIVE(L(DP_2POW52))
528
529 .p2align 3
530 L(DP_INVPIO4):
531 .long 0x6dc9c883,0x3ff45f30 /* 4/Pi */
532 .type L(DP_INVPIO4), @object
533 ASM_SIZE_DIRECTIVE(L(DP_INVPIO4))
534
535 .p2align 3
536 L(DP_PIO4HI):
537 .long 0x54000000,0xbfe921fb /* High part of Pi/4 */
538 .type L(DP_PIO4HI), @object
539 ASM_SIZE_DIRECTIVE(L(DP_PIO4HI))
540
541 .p2align 3
542 L(DP_PIO4LO):
543 .long 0x11A62633,0xbe010b46 /* Low part of Pi/4 */
544 .type L(DP_PIO4LO), @object
545 ASM_SIZE_DIRECTIVE(L(DP_PIO4LO))
546
547 .p2align 2
548 L(SP_INVPIO4):
549 .long 0x3fa2f983 /* 4/Pi */
550 .type L(SP_INVPIO4), @object
551 ASM_SIZE_DIRECTIVE(L(SP_INVPIO4))
552
553 .p2align 4
554 L(DP_ABS_MASK): /* Mask for getting DP absolute value */
555 .long 0xffffffff,0x7fffffff
556 .long 0xffffffff,0x7fffffff
557 .type L(DP_ABS_MASK), @object
558 ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
559
560 .p2align 3
561 L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
562 .long 0x00000000,0xffffffff
563 .type L(DP_HI_MASK), @object
564 ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
565
566 weak_alias (__sinf, sinf)