]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S
730ebff438618eb6ae8a8c04b1e3eb4d66d7d72b
[thirdparty/glibc.git] / sysdeps / i386 / i686 / fpu / multiarch / s_sinf-sse2.S
1 /* Optimized with sse2 version of sinf
2 Copyright (C) 2012-2019 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
18
19 #include <sysdep.h>
20 #include <errno.h>
21
22 /* Short algorithm description:
23 *
24 * 1) if |x| == 0: return x.
25 * 2) if |x| < 2^-27: return x-x*DP_SMALL, raise underflow only when needed.
26 * 3) if |x| < 2^-5 : return x+x^3*DP_SIN2_0+x^5*DP_SIN2_1.
27 * 4) if |x| < Pi/4: return x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))).
28 * 5) if |x| < 9*Pi/4:
29 * 5.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+1,
30 * t=|x|-j*Pi/4.
31 * 5.2) Reconstruction:
32 * s = sign(x) * (-1.0)^((n>>2)&1)
33 * if(n&2 != 0) {
34 * using cos(t) polynomial for |t|<Pi/4, result is
35 * s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4))))).
36 * } else {
37 * using sin(t) polynomial for |t|<Pi/4, result is
38 * s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4))))).
39 * }
40 * 6) if |x| < 2^23, large args:
41 * 6.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1,
42 * t=|x|-j*Pi/4.
43 * 6.2) Reconstruction same as (5.2).
44 * 7) if |x| >= 2^23, very large args:
45 * 7.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1,
46 * t=|x|-j*Pi/4.
47 * 7.2) Reconstruction same as (5.2).
48 * 8) if x is Inf, return x-x, and set errno=EDOM.
49 * 9) if x is NaN, return x-x.
50 *
51 * Special cases:
52 * sin(+-0) = +-0 not raising inexact/underflow,
53 * sin(subnormal) raises inexact/underflow,
54 * sin(min_normalized) raises inexact/underflow,
55 * sin(normalized) raises inexact,
56 * sin(Inf) = NaN, raises invalid, sets errno to EDOM,
57 * sin(NaN) = NaN.
58 */
59
60 #ifdef PIC
61 # define MO1(symbol) L(symbol)##@GOTOFF(%ebx)
62 # define MO2(symbol,reg2,_scale) L(symbol)##@GOTOFF(%ebx,reg2,_scale)
63 # define CFI_PUSH(REG) cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0)
64 # define CFI_POP(REG) cfi_adjust_cfa_offset(-4); cfi_restore(REG)
65 # define PUSH(REG) pushl REG; CFI_PUSH(REG)
66 # define POP(REG) popl REG; CFI_POP(REG)
67 # define ENTRANCE PUSH(%ebx); LOAD_PIC_REG(bx)
68 # define RETURN POP(%ebx); ret; CFI_PUSH(%ebx)
69 # define ARG_X 8(%esp)
70 #else
71 # define MO1(symbol) L(symbol)
72 # define MO2(symbol,reg2,_scale) L(symbol)(,reg2,_scale)
73 # define ENTRANCE
74 # define RETURN ret
75 # define ARG_X 4(%esp)
76 #endif
77
78 .text
79 ENTRY(__sinf_sse2)
80 /* Input: single precision x on stack at address ARG_X */
81
82 ENTRANCE
83 movl ARG_X, %eax /* Bits of x */
84 cvtss2sd ARG_X, %xmm0 /* DP x */
85 andl $0x7fffffff, %eax /* |x| */
86
87 cmpl $0x3f490fdb, %eax /* |x|<Pi/4? */
88 jb L(arg_less_pio4)
89
90 /* Here if |x|>=Pi/4 */
91 movd %eax, %xmm3 /* SP |x| */
92 andpd MO1(DP_ABS_MASK),%xmm0 /* DP |x| */
93 movss MO1(SP_INVPIO4), %xmm2 /* SP 1/(Pi/4) */
94
95 cmpl $0x40e231d6, %eax /* |x|<9*Pi/4? */
96 jae L(large_args)
97
98 /* Here if Pi/4<=|x|<9*Pi/4 */
99 mulss %xmm3, %xmm2 /* SP |x|/(Pi/4) */
100 movl ARG_X, %ecx /* Load x */
101 cvttss2si %xmm2, %eax /* k, number of Pi/4 in x */
102 shrl $31, %ecx /* sign of x */
103 addl $1, %eax /* k+1 */
104 movl $0x0e, %edx
105 andl %eax, %edx /* j = (k+1)&0x0e */
106 subsd MO2(PIO4J,%edx,8), %xmm0 /* t = |x| - j * Pi/4 */
107
108 L(reconstruction):
109 /* Input: %eax=n, %xmm0=t, %ecx=sign(x) */
110 testl $2, %eax /* n&2 != 0? */
111 jz L(sin_poly)
112
113 /*L(cos_poly):*/
114 /* Here if sin(x) calculated using cos(t) polynomial for |t|<Pi/4:
115 * y = t*t; z = y*y;
116 * s = sign(x) * (-1.0)^((n>>2)&1)
117 * result = s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))))
118 */
119 shrl $2, %eax /* n>>2 */
120 mulsd %xmm0, %xmm0 /* y=t^2 */
121 andl $1, %eax /* (n>>2)&1 */
122 movaps %xmm0, %xmm1 /* y */
123 mulsd %xmm0, %xmm0 /* z=t^4 */
124
125 movsd MO1(DP_C4), %xmm4 /* C4 */
126 mulsd %xmm0, %xmm4 /* z*C4 */
127 xorl %eax, %ecx /* (-1.0)^((n>>2)&1) XOR sign(x) */
128 movsd MO1(DP_C3), %xmm3 /* C3 */
129 mulsd %xmm0, %xmm3 /* z*C3 */
130 addsd MO1(DP_C2), %xmm4 /* C2+z*C4 */
131 mulsd %xmm0, %xmm4 /* z*(C2+z*C4) */
132 lea -8(%esp), %esp /* Borrow 4 bytes of stack frame */
133 addsd MO1(DP_C1), %xmm3 /* C1+z*C3 */
134 mulsd %xmm0, %xmm3 /* z*(C1+z*C3) */
135 addsd MO1(DP_C0), %xmm4 /* C0+z*(C2+z*C4) */
136 mulsd %xmm1, %xmm4 /* y*(C0+z*(C2+z*C4)) */
137
138 addsd %xmm4, %xmm3 /* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
139 /* 1.0+y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
140 addsd MO1(DP_ONES), %xmm3
141
142 mulsd MO2(DP_ONES,%ecx,8), %xmm3 /* DP result */
143 movsd %xmm3, 0(%esp) /* Move result from sse... */
144 fldl 0(%esp) /* ...to FPU. */
145 /* Return back 4 bytes of stack frame */
146 lea 8(%esp), %esp
147 RETURN
148
149 .p2align 4
150 L(sin_poly):
151 /* Here if sin(x) calculated using sin(t) polynomial for |t|<Pi/4:
152 * y = t*t; z = y*y;
153 * s = sign(x) * (-1.0)^((n>>2)&1)
154 * result = s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))))
155 */
156
157 movaps %xmm0, %xmm4 /* t */
158 shrl $2, %eax /* n>>2 */
159 mulsd %xmm0, %xmm0 /* y=t^2 */
160 andl $1, %eax /* (n>>2)&1 */
161 movaps %xmm0, %xmm1 /* y */
162 xorl %eax, %ecx /* (-1.0)^((n>>2)&1) XOR sign(x) */
163 mulsd %xmm0, %xmm0 /* z=t^4 */
164
165 movsd MO1(DP_S4), %xmm2 /* S4 */
166 mulsd %xmm0, %xmm2 /* z*S4 */
167 movsd MO1(DP_S3), %xmm3 /* S3 */
168 mulsd %xmm0, %xmm3 /* z*S3 */
169 lea -8(%esp), %esp /* Borrow 4 bytes of stack frame */
170 addsd MO1(DP_S2), %xmm2 /* S2+z*S4 */
171 mulsd %xmm0, %xmm2 /* z*(S2+z*S4) */
172 addsd MO1(DP_S1), %xmm3 /* S1+z*S3 */
173 mulsd %xmm0, %xmm3 /* z*(S1+z*S3) */
174 addsd MO1(DP_S0), %xmm2 /* S0+z*(S2+z*S4) */
175 mulsd %xmm1, %xmm2 /* y*(S0+z*(S2+z*S4)) */
176 /* t*s, where s = sign(x) * (-1.0)^((n>>2)&1) */
177 mulsd MO2(DP_ONES,%ecx,8), %xmm4
178 addsd %xmm2, %xmm3 /* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
179 /* t*s*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
180 mulsd %xmm4, %xmm3
181 /* t*s*(1.0+y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
182 addsd %xmm4, %xmm3
183 movsd %xmm3, 0(%esp) /* Move result from sse... */
184 fldl 0(%esp) /* ...to FPU. */
185 /* Return back 4 bytes of stack frame */
186 lea 8(%esp), %esp
187 RETURN
188
189 .p2align 4
190 L(large_args):
191 /* Here if |x|>=9*Pi/4 */
192 cmpl $0x7f800000, %eax /* x is Inf or NaN? */
193 jae L(arg_inf_or_nan)
194
195 /* Here if finite |x|>=9*Pi/4 */
196 cmpl $0x4b000000, %eax /* |x|<2^23? */
197 jae L(very_large_args)
198
199 /* Here if 9*Pi/4<=|x|<2^23 */
200 movsd MO1(DP_INVPIO4), %xmm1 /* 1/(Pi/4) */
201 mulsd %xmm0, %xmm1 /* |x|/(Pi/4) */
202 cvttsd2si %xmm1, %eax /* k=trunc(|x|/(Pi/4)) */
203 addl $1, %eax /* k+1 */
204 movl %eax, %edx
205 andl $0xfffffffe, %edx /* j=(k+1)&0xfffffffe */
206 cvtsi2sdl %edx, %xmm4 /* DP j */
207 movl ARG_X, %ecx /* Load x */
208 movsd MO1(DP_PIO4HI), %xmm2 /* -PIO4HI = high part of -Pi/4 */
209 shrl $31, %ecx /* sign bit of x */
210 mulsd %xmm4, %xmm2 /* -j*PIO4HI */
211 movsd MO1(DP_PIO4LO), %xmm3 /* -PIO4LO = low part of -Pi/4 */
212 addsd %xmm2, %xmm0 /* |x| - j*PIO4HI */
213 mulsd %xmm3, %xmm4 /* j*PIO4LO */
214 addsd %xmm4, %xmm0 /* t = |x| - j*PIO4HI - j*PIO4LO */
215 jmp L(reconstruction)
216
217 .p2align 4
218 L(very_large_args):
219 /* Here if finite |x|>=2^23 */
220
221 /* bitpos = (ix>>23) - BIAS_32 + 59; */
222 shrl $23, %eax /* eb = biased exponent of x */
223 /* bitpos = eb - 0x7f + 59, where 0x7f is exponent bias */
224 subl $68, %eax
225 movl $28, %ecx /* %cl=28 */
226 movl %eax, %edx /* bitpos copy */
227
228 /* j = bitpos/28; */
229 div %cl /* j in register %al=%ax/%cl */
230 movapd %xmm0, %xmm3 /* |x| */
231 /* clear unneeded remainder from %ah */
232 andl $0xff, %eax
233
234 imull $28, %eax, %ecx /* j*28 */
235 movsd MO1(DP_HI_MASK), %xmm4 /* DP_HI_MASK */
236 movapd %xmm0, %xmm5 /* |x| */
237 mulsd -2*8+MO2(_FPI,%eax,8), %xmm3 /* tmp3 = FPI[j-2]*|x| */
238 movapd %xmm0, %xmm1 /* |x| */
239 mulsd -1*8+MO2(_FPI,%eax,8), %xmm5 /* tmp2 = FPI[j-1]*|x| */
240 mulsd 0*8+MO2(_FPI,%eax,8), %xmm0 /* tmp0 = FPI[j]*|x| */
241 addl $19, %ecx /* j*28+19 */
242 mulsd 1*8+MO2(_FPI,%eax,8), %xmm1 /* tmp1 = FPI[j+1]*|x| */
243 cmpl %ecx, %edx /* bitpos>=j*28+19? */
244 jl L(very_large_skip1)
245
246 /* Here if bitpos>=j*28+19 */
247 andpd %xmm3, %xmm4 /* HI(tmp3) */
248 subsd %xmm4, %xmm3 /* tmp3 = tmp3 - HI(tmp3) */
249 L(very_large_skip1):
250
251 movsd MO1(DP_2POW52), %xmm6
252 movapd %xmm5, %xmm2 /* tmp2 copy */
253 addsd %xmm3, %xmm5 /* tmp5 = tmp3 + tmp2 */
254 movl $1, %edx
255 addsd %xmm5, %xmm6 /* tmp6 = tmp5 + 2^52 */
256 movsd 8+MO1(DP_2POW52), %xmm4
257 movd %xmm6, %eax /* k = I64_LO(tmp6); */
258 addsd %xmm6, %xmm4 /* tmp4 = tmp6 - 2^52 */
259 movl ARG_X, %ecx /* Load x */
260 comisd %xmm5, %xmm4 /* tmp4 > tmp5? */
261 jbe L(very_large_skip2)
262
263 /* Here if tmp4 > tmp5 */
264 subl $1, %eax /* k-- */
265 addsd 8+MO1(DP_ONES), %xmm4 /* tmp4 -= 1.0 */
266 L(very_large_skip2):
267
268 andl %eax, %edx /* k&1 */
269 subsd %xmm4, %xmm3 /* tmp3 -= tmp4 */
270 addsd MO2(DP_ZERONE,%edx,8), %xmm3 /* t = DP_ZERONE[k&1] + tmp3 */
271 addsd %xmm2, %xmm3 /* t += tmp2 */
272 shrl $31, %ecx /* sign of x */
273 addsd %xmm3, %xmm0 /* t += tmp0 */
274 addl $1, %eax /* n=k+1 */
275 addsd %xmm1, %xmm0 /* t += tmp1 */
276 mulsd MO1(DP_PIO4), %xmm0 /* t *= PI04 */
277
278 jmp L(reconstruction) /* end of very_large_args peth */
279
280 .p2align 4
281 L(arg_less_pio4):
282 /* Here if |x|<Pi/4 */
283 cmpl $0x3d000000, %eax /* |x|<2^-5? */
284 jl L(arg_less_2pn5)
285
286 /* Here if 2^-5<=|x|<Pi/4 */
287 movaps %xmm0, %xmm3 /* x */
288 mulsd %xmm0, %xmm0 /* y=x^2 */
289 movaps %xmm0, %xmm1 /* y */
290 mulsd %xmm0, %xmm0 /* z=x^4 */
291 movsd MO1(DP_S4), %xmm4 /* S4 */
292 mulsd %xmm0, %xmm4 /* z*S4 */
293 movsd MO1(DP_S3), %xmm5 /* S3 */
294 mulsd %xmm0, %xmm5 /* z*S3 */
295 addsd MO1(DP_S2), %xmm4 /* S2+z*S4 */
296 mulsd %xmm0, %xmm4 /* z*(S2+z*S4) */
297 addsd MO1(DP_S1), %xmm5 /* S1+z*S3 */
298 mulsd %xmm0, %xmm5 /* z*(S1+z*S3) */
299 addsd MO1(DP_S0), %xmm4 /* S0+z*(S2+z*S4) */
300 mulsd %xmm1, %xmm4 /* y*(S0+z*(S2+z*S4)) */
301 mulsd %xmm3, %xmm5 /* x*z*(S1+z*S3) */
302 mulsd %xmm3, %xmm4 /* x*y*(S0+z*(S2+z*S4)) */
303 /* x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
304 addsd %xmm5, %xmm4
305 /* x + x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
306 addsd %xmm4, %xmm3
307 cvtsd2ss %xmm3, %xmm3 /* SP result */
308
309 L(epilogue):
310 lea -4(%esp), %esp /* Borrow 4 bytes of stack frame */
311 movss %xmm3, 0(%esp) /* Move result from sse... */
312 flds 0(%esp) /* ...to FPU. */
313 /* Return back 4 bytes of stack frame */
314 lea 4(%esp), %esp
315 RETURN
316
317 .p2align 4
318 L(arg_less_2pn5):
319 /* Here if |x|<2^-5 */
320 cmpl $0x32000000, %eax /* |x|<2^-27? */
321 jl L(arg_less_2pn27)
322
323 /* Here if 2^-27<=|x|<2^-5 */
324 movaps %xmm0, %xmm1 /* DP x */
325 mulsd %xmm0, %xmm0 /* DP x^2 */
326 movsd MO1(DP_SIN2_1), %xmm3 /* DP DP_SIN2_1 */
327 mulsd %xmm0, %xmm3 /* DP x^2*DP_SIN2_1 */
328 addsd MO1(DP_SIN2_0), %xmm3 /* DP DP_SIN2_0+x^2*DP_SIN2_1 */
329 mulsd %xmm0, %xmm3 /* DP x^2*DP_SIN2_0+x^4*DP_SIN2_1 */
330 mulsd %xmm1, %xmm3 /* DP x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
331 addsd %xmm1, %xmm3 /* DP x+x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
332 cvtsd2ss %xmm3, %xmm3 /* SP result */
333 jmp L(epilogue)
334
335 .p2align 4
336 L(arg_less_2pn27):
337 movss ARG_X, %xmm3 /* SP x */
338 cmpl $0, %eax /* x=0? */
339 je L(epilogue) /* in case x=0 return sin(+-0)==+-0 */
340 /* Here if |x|<2^-27 */
341 /*
342 * Special cases here:
343 * sin(subnormal) raises inexact/underflow
344 * sin(min_normalized) raises inexact/underflow
345 * sin(normalized) raises inexact
346 */
347 movaps %xmm0, %xmm3 /* Copy of DP x */
348 mulsd MO1(DP_SMALL), %xmm0 /* x*DP_SMALL */
349 subsd %xmm0, %xmm3 /* Result is x-x*DP_SMALL */
350 cvtsd2ss %xmm3, %xmm3 /* Result converted to SP */
351 jmp L(epilogue)
352
353 .p2align 4
354 L(arg_inf_or_nan):
355 /* Here if |x| is Inf or NAN */
356 jne L(skip_errno_setting) /* in case of x is NaN */
357
358 /* Here if x is Inf. Set errno to EDOM. */
359 call JUMPTARGET(__errno_location)
360 movl $EDOM, (%eax)
361
362 .p2align 4
363 L(skip_errno_setting):
364 /* Here if |x| is Inf or NAN. Continued. */
365 movss ARG_X, %xmm3 /* load x */
366 subss %xmm3, %xmm3 /* Result is NaN */
367 jmp L(epilogue)
368 END(__sinf_sse2)
369
370 .section .rodata, "a"
371 .p2align 3
372 L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
373 .long 0x00000000,0x00000000
374 .long 0x54442d18,0x3fe921fb
375 .long 0x54442d18,0x3ff921fb
376 .long 0x7f3321d2,0x4002d97c
377 .long 0x54442d18,0x400921fb
378 .long 0x2955385e,0x400f6a7a
379 .long 0x7f3321d2,0x4012d97c
380 .long 0xe9bba775,0x4015fdbb
381 .long 0x54442d18,0x401921fb
382 .long 0xbeccb2bb,0x401c463a
383 .long 0x2955385e,0x401f6a7a
384 .type L(PIO4J), @object
385 ASM_SIZE_DIRECTIVE(L(PIO4J))
386
387 .p2align 3
388 L(_FPI): /* 4/Pi broken into sum of positive DP values */
389 .long 0x00000000,0x00000000
390 .long 0x6c000000,0x3ff45f30
391 .long 0x2a000000,0x3e3c9c88
392 .long 0xa8000000,0x3c54fe13
393 .long 0xd0000000,0x3aaf47d4
394 .long 0x6c000000,0x38fbb81b
395 .long 0xe0000000,0x3714acc9
396 .long 0x7c000000,0x3560e410
397 .long 0x56000000,0x33bca2c7
398 .long 0xac000000,0x31fbd778
399 .long 0xe0000000,0x300b7246
400 .long 0xe8000000,0x2e5d2126
401 .long 0x48000000,0x2c970032
402 .long 0xe8000000,0x2ad77504
403 .long 0xe0000000,0x290921cf
404 .long 0xb0000000,0x274deb1c
405 .long 0xe0000000,0x25829a73
406 .long 0xbe000000,0x23fd1046
407 .long 0x10000000,0x2224baed
408 .long 0x8e000000,0x20709d33
409 .long 0x80000000,0x1e535a2f
410 .long 0x64000000,0x1cef904e
411 .long 0x30000000,0x1b0d6398
412 .long 0x24000000,0x1964ce7d
413 .long 0x16000000,0x17b908bf
414 .type L(_FPI), @object
415 ASM_SIZE_DIRECTIVE(L(_FPI))
416
417 /* Coefficients of polynomial
418 for sin(x)~=x+x^3*DP_SIN2_0+x^5*DP_SIN2_1, |x|<2^-5. */
419 .p2align 3
420 L(DP_SIN2_0):
421 .long 0x5543d49d,0xbfc55555
422 .type L(DP_SIN2_0), @object
423 ASM_SIZE_DIRECTIVE(L(DP_SIN2_0))
424
425 .p2align 3
426 L(DP_SIN2_1):
427 .long 0x75cec8c5,0x3f8110f4
428 .type L(DP_SIN2_1), @object
429 ASM_SIZE_DIRECTIVE(L(DP_SIN2_1))
430
431 .p2align 3
432 L(DP_ZERONE):
433 .long 0x00000000,0x00000000 /* 0.0 */
434 .long 0x00000000,0xbff00000 /* 1.0 */
435 .type L(DP_ZERONE), @object
436 ASM_SIZE_DIRECTIVE(L(DP_ZERONE))
437
438 .p2align 3
439 L(DP_ONES):
440 .long 0x00000000,0x3ff00000 /* +1.0 */
441 .long 0x00000000,0xbff00000 /* -1.0 */
442 .type L(DP_ONES), @object
443 ASM_SIZE_DIRECTIVE(L(DP_ONES))
444
445 /* Coefficients of polynomial
446 for sin(t)~=t+t^3*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))), |t|<Pi/4. */
447 .p2align 3
448 L(DP_S3):
449 .long 0x64e6b5b4,0x3ec71d72
450 .type L(DP_S3), @object
451 ASM_SIZE_DIRECTIVE(L(DP_S3))
452
453 .p2align 3
454 L(DP_S1):
455 .long 0x10c2688b,0x3f811111
456 .type L(DP_S1), @object
457 ASM_SIZE_DIRECTIVE(L(DP_S1))
458
459 .p2align 3
460 L(DP_S4):
461 .long 0x1674b58a,0xbe5a947e
462 .type L(DP_S4), @object
463 ASM_SIZE_DIRECTIVE(L(DP_S4))
464
465 .p2align 3
466 L(DP_S2):
467 .long 0x8b4bd1f9,0xbf2a019f
468 .type L(DP_S2), @object
469 ASM_SIZE_DIRECTIVE(L(DP_S2))
470
471 .p2align 3
472 L(DP_S0):
473 .long 0x55551cd9,0xbfc55555
474 .type L(DP_S0), @object
475 ASM_SIZE_DIRECTIVE(L(DP_S0))
476
477 .p2align 3
478 L(DP_SMALL):
479 .long 0x00000000,0x3cd00000 /* 2^(-50) */
480 .type L(DP_SMALL), @object
481 ASM_SIZE_DIRECTIVE(L(DP_SMALL))
482
483 /* Coefficients of polynomial
484 for cos(t)~=1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))), |t|<Pi/4. */
485 .p2align 3
486 L(DP_C3):
487 .long 0x9ac43cc0,0x3efa00eb
488 .type L(DP_C3), @object
489 ASM_SIZE_DIRECTIVE(L(DP_C3))
490
491 .p2align 3
492 L(DP_C1):
493 .long 0x545c50c7,0x3fa55555
494 .type L(DP_C1), @object
495 ASM_SIZE_DIRECTIVE(L(DP_C1))
496
497 .p2align 3
498 L(DP_C4):
499 .long 0xdd8844d7,0xbe923c97
500 .type L(DP_C4), @object
501 ASM_SIZE_DIRECTIVE(L(DP_C4))
502
503 .p2align 3
504 L(DP_C2):
505 .long 0x348b6874,0xbf56c16b
506 .type L(DP_C2), @object
507 ASM_SIZE_DIRECTIVE(L(DP_C2))
508
509 .p2align 3
510 L(DP_C0):
511 .long 0xfffe98ae,0xbfdfffff
512 .type L(DP_C0), @object
513 ASM_SIZE_DIRECTIVE(L(DP_C0))
514
515 .p2align 3
516 L(DP_PIO4):
517 .long 0x54442d18,0x3fe921fb /* Pi/4 */
518 .type L(DP_PIO4), @object
519 ASM_SIZE_DIRECTIVE(L(DP_PIO4))
520
521 .p2align 3
522 L(DP_2POW52):
523 .long 0x00000000,0x43300000 /* +2^52 */
524 .long 0x00000000,0xc3300000 /* -2^52 */
525 .type L(DP_2POW52), @object
526 ASM_SIZE_DIRECTIVE(L(DP_2POW52))
527
528 .p2align 3
529 L(DP_INVPIO4):
530 .long 0x6dc9c883,0x3ff45f30 /* 4/Pi */
531 .type L(DP_INVPIO4), @object
532 ASM_SIZE_DIRECTIVE(L(DP_INVPIO4))
533
534 .p2align 3
535 L(DP_PIO4HI):
536 .long 0x54000000,0xbfe921fb /* High part of Pi/4 */
537 .type L(DP_PIO4HI), @object
538 ASM_SIZE_DIRECTIVE(L(DP_PIO4HI))
539
540 .p2align 3
541 L(DP_PIO4LO):
542 .long 0x11A62633,0xbe010b46 /* Low part of Pi/4 */
543 .type L(DP_PIO4LO), @object
544 ASM_SIZE_DIRECTIVE(L(DP_PIO4LO))
545
546 .p2align 2
547 L(SP_INVPIO4):
548 .long 0x3fa2f983 /* 4/Pi */
549 .type L(SP_INVPIO4), @object
550 ASM_SIZE_DIRECTIVE(L(SP_INVPIO4))
551
552 .p2align 4
553 L(DP_ABS_MASK): /* Mask for getting DP absolute value */
554 .long 0xffffffff,0x7fffffff
555 .long 0xffffffff,0x7fffffff
556 .type L(DP_ABS_MASK), @object
557 ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
558
559 .p2align 3
560 L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
561 .long 0x00000000,0xffffffff
562 .type L(DP_HI_MASK), @object
563 ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
564
565 weak_alias (__sinf, sinf)