]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S
b54590a6df83784a230f706a7bbc24ed353b374d
[thirdparty/glibc.git] / sysdeps / i386 / i686 / fpu / multiarch / s_cosf-sse2.S
1 /* Optimized with sse2 version of cosf
2 Copyright (C) 2012-2021 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19 #include <sysdep.h>
20 #include <errno.h>
21
22 /* Short algorithm description:
23 *
24 * 1) if |x| == 0: return 1.0-|x|.
25 * 2) if |x| < 2^-27: return 1.0-|x|.
26 * 3) if |x| < 2^-5 : return 1.0+x^2*DP_COS2_0+x^5*DP_COS2_1.
27 * 4) if |x| < Pi/4: return 1.0+x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))).
28 * 5) if |x| < 9*Pi/4:
29 * 5.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+3,
30 * t=|x|-j*Pi/4.
31 * 5.2) Reconstruction:
32 * s = (-1.0)^((n>>2)&1)
33 * if(n&2 != 0) {
34 * using cos(t) polynomial for |t|<Pi/4, result is
35 * s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4))))).
36 * } else {
37 * using sin(t) polynomial for |t|<Pi/4, result is
38 * s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4))))).
39 * }
40 * 6) if |x| < 2^23, large args:
41 * 6.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+3,
42 * t=|x|-j*Pi/4.
43 * 6.2) Reconstruction same as (5.2).
44 * 7) if |x| >= 2^23, very large args:
45 * 7.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+3,
46 * t=|x|-j*Pi/4.
47 * 7.2) Reconstruction same as (5.2).
48 * 8) if x is Inf, return x-x, and set errno=EDOM.
49 * 9) if x is NaN, return x-x.
50 *
51 * Special cases:
52 * cos(+-0) = 1 not raising inexact,
53 * cos(subnormal) raises inexact,
54 * cos(min_normalized) raises inexact,
55 * cos(normalized) raises inexact,
56 * cos(Inf) = NaN, raises invalid, sets errno to EDOM,
57 * cos(NaN) = NaN.
58 */
59
60 #ifdef PIC
61 # define MO1(symbol) L(symbol)##@GOTOFF(%ebx)
62 # define MO2(symbol,reg2,_scale) L(symbol)##@GOTOFF(%ebx,reg2,_scale)
63 # define CFI_PUSH(REG) cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0)
64 # define CFI_POP(REG) cfi_adjust_cfa_offset(-4); cfi_restore(REG)
65 # define PUSH(REG) pushl REG; CFI_PUSH(REG)
66 # define POP(REG) popl REG; CFI_POP(REG)
67 # define ENTRANCE PUSH(%ebx); LOAD_PIC_REG(bx)
68 # define RETURN POP(%ebx); ret; CFI_PUSH(%ebx)
69 # define ARG_X 8(%esp)
70 #else
71 # define MO1(symbol) L(symbol)
72 # define MO2(symbol,reg2,_scale) L(symbol)(,reg2,_scale)
73 # define ENTRANCE
74 # define RETURN ret
75 # define ARG_X 4(%esp)
76 #endif
77
78 .text
79 ENTRY(__cosf_sse2)
80 /* Input: single precision x on stack at address ARG_X */
81
82 ENTRANCE
83 movl ARG_X, %eax /* Bits of x */
84 cvtss2sd ARG_X, %xmm0 /* DP x */
85 andl $0x7fffffff, %eax /* |x| */
86
87 cmpl $0x3f490fdb, %eax /* |x|<Pi/4? */
88 jb L(arg_less_pio4)
89
90 /* Here if |x|>=Pi/4 */
91 movd %eax, %xmm3 /* SP |x| */
92 andpd MO1(DP_ABS_MASK),%xmm0 /* DP |x| */
93 movss MO1(SP_INVPIO4), %xmm2 /* SP 1/(Pi/4) */
94
95 cmpl $0x40e231d6, %eax /* |x|<9*Pi/4? */
96 jae L(large_args)
97
98 /* Here if Pi/4<=|x|<9*Pi/4 */
99 mulss %xmm3, %xmm2 /* SP |x|/(Pi/4) */
100 cvttss2si %xmm2, %eax /* k, number of Pi/4 in x */
101 addl $1, %eax /* k+1 */
102 movl $0x0e, %edx
103 andl %eax, %edx /* j = (k+1)&0x0e */
104 addl $2, %eax /* n */
105 subsd MO2(PIO4J,%edx,8), %xmm0 /* t = |x| - j * Pi/4 */
106
107 L(reconstruction):
108 /* Input: %eax=n, %xmm0=t */
109 testl $2, %eax /* n&2 != 0? */
110 jz L(sin_poly)
111
112 /*L(cos_poly):*/
113 /* Here if cos(x) calculated using cos(t) polynomial for |t|<Pi/4:
114 * y = t*t; z = y*y;
115 * s = sign(x) * (-1.0)^((n>>2)&1)
116 * result = s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))))
117 */
118 shrl $2, %eax /* n>>2 */
119 mulsd %xmm0, %xmm0 /* y=t^2 */
120 andl $1, %eax /* (n>>2)&1 */
121 movaps %xmm0, %xmm1 /* y */
122 mulsd %xmm0, %xmm0 /* z=t^4 */
123
124 movsd MO1(DP_C4), %xmm4 /* C4 */
125 mulsd %xmm0, %xmm4 /* z*C4 */
126 movsd MO1(DP_C3), %xmm3 /* C3 */
127 mulsd %xmm0, %xmm3 /* z*C3 */
128 addsd MO1(DP_C2), %xmm4 /* C2+z*C4 */
129 mulsd %xmm0, %xmm4 /* z*(C2+z*C4) */
130 lea -8(%esp), %esp /* Borrow 4 bytes of stack frame */
131 addsd MO1(DP_C1), %xmm3 /* C1+z*C3 */
132 mulsd %xmm0, %xmm3 /* z*(C1+z*C3) */
133 addsd MO1(DP_C0), %xmm4 /* C0+z*(C2+z*C4) */
134 mulsd %xmm1, %xmm4 /* y*(C0+z*(C2+z*C4)) */
135
136 addsd %xmm4, %xmm3 /* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
137 /* 1.0+y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
138 addsd MO1(DP_ONES), %xmm3
139
140 mulsd MO2(DP_ONES,%eax,8), %xmm3 /* DP result */
141 movsd %xmm3, 0(%esp) /* Move result from sse... */
142 fldl 0(%esp) /* ...to FPU. */
143 /* Return back 4 bytes of stack frame */
144 lea 8(%esp), %esp
145 RETURN
146
147 .p2align 4
148 L(sin_poly):
149 /* Here if cos(x) calculated using sin(t) polynomial for |t|<Pi/4:
150 * y = t*t; z = y*y;
151 * s = sign(x) * (-1.0)^((n>>2)&1)
152 * result = s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))))
153 */
154
155 movaps %xmm0, %xmm4 /* t */
156 shrl $2, %eax /* n>>2 */
157 mulsd %xmm0, %xmm0 /* y=t^2 */
158 andl $1, %eax /* (n>>2)&1 */
159 movaps %xmm0, %xmm1 /* y */
160 mulsd %xmm0, %xmm0 /* z=t^4 */
161
162 movsd MO1(DP_S4), %xmm2 /* S4 */
163 mulsd %xmm0, %xmm2 /* z*S4 */
164 movsd MO1(DP_S3), %xmm3 /* S3 */
165 mulsd %xmm0, %xmm3 /* z*S3 */
166 lea -8(%esp), %esp /* Borrow 4 bytes of stack frame */
167 addsd MO1(DP_S2), %xmm2 /* S2+z*S4 */
168 mulsd %xmm0, %xmm2 /* z*(S2+z*S4) */
169 addsd MO1(DP_S1), %xmm3 /* S1+z*S3 */
170 mulsd %xmm0, %xmm3 /* z*(S1+z*S3) */
171 addsd MO1(DP_S0), %xmm2 /* S0+z*(S2+z*S4) */
172 mulsd %xmm1, %xmm2 /* y*(S0+z*(S2+z*S4)) */
173 /* t*s, where s = sign(x) * (-1.0)^((n>>2)&1) */
174 mulsd MO2(DP_ONES,%eax,8), %xmm4
175 addsd %xmm2, %xmm3 /* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
176 /* t*s*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
177 mulsd %xmm4, %xmm3
178 /* t*s*(1.0+y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
179 addsd %xmm4, %xmm3
180 movsd %xmm3, 0(%esp) /* Move result from sse... */
181 fldl 0(%esp) /* ...to FPU. */
182 /* Return back 4 bytes of stack frame */
183 lea 8(%esp), %esp
184 RETURN
185
186 .p2align 4
187 L(large_args):
188 /* Here if |x|>=9*Pi/4 */
189 cmpl $0x7f800000, %eax /* x is Inf or NaN? */
190 jae L(arg_inf_or_nan)
191
192 /* Here if finite |x|>=9*Pi/4 */
193 cmpl $0x4b000000, %eax /* |x|<2^23? */
194 jae L(very_large_args)
195
196 /* Here if 9*Pi/4<=|x|<2^23 */
197 movsd MO1(DP_INVPIO4), %xmm1 /* 1/(Pi/4) */
198 mulsd %xmm0, %xmm1 /* |x|/(Pi/4) */
199 cvttsd2si %xmm1, %eax /* k=trunc(|x|/(Pi/4)) */
200 addl $1, %eax /* k+1 */
201 movl %eax, %edx
202 andl $0xfffffffe, %edx /* j=(k+1)&0xfffffffe */
203 cvtsi2sdl %edx, %xmm4 /* DP j */
204 movsd MO1(DP_PIO4HI), %xmm2 /* -PIO4HI = high part of -Pi/4 */
205 mulsd %xmm4, %xmm2 /* -j*PIO4HI */
206 movsd MO1(DP_PIO4LO), %xmm3 /* -PIO4LO = low part of -Pi/4 */
207 addsd %xmm2, %xmm0 /* |x| - j*PIO4HI */
208 addl $2, %eax /* n */
209 mulsd %xmm3, %xmm4 /* j*PIO4LO */
210 addsd %xmm4, %xmm0 /* t = |x| - j*PIO4HI - j*PIO4LO */
211 jmp L(reconstruction)
212
213 .p2align 4
214 L(very_large_args):
215 /* Here if finite |x|>=2^23 */
216
217 /* bitpos = (ix>>23) - BIAS_32 + 59; */
218 shrl $23, %eax /* eb = biased exponent of x */
219 /* bitpos = eb - 0x7f + 59, where 0x7f is exponent bias */
220 subl $68, %eax
221 movl $28, %ecx /* %cl=28 */
222 movl %eax, %edx /* bitpos copy */
223
224 /* j = bitpos/28; */
225 div %cl /* j in register %al=%ax/%cl */
226 movapd %xmm0, %xmm3 /* |x| */
227 /* clear unneeded remainder from %ah */
228 andl $0xff, %eax
229
230 imull $28, %eax, %ecx /* j*28 */
231 movsd MO1(DP_HI_MASK), %xmm4 /* DP_HI_MASK */
232 movapd %xmm0, %xmm5 /* |x| */
233 mulsd -2*8+MO2(_FPI,%eax,8), %xmm3 /* tmp3 = FPI[j-2]*|x| */
234 movapd %xmm0, %xmm1 /* |x| */
235 mulsd -1*8+MO2(_FPI,%eax,8), %xmm5 /* tmp2 = FPI[j-1]*|x| */
236 mulsd 0*8+MO2(_FPI,%eax,8), %xmm0 /* tmp0 = FPI[j]*|x| */
237 addl $19, %ecx /* j*28+19 */
238 mulsd 1*8+MO2(_FPI,%eax,8), %xmm1 /* tmp1 = FPI[j+1]*|x| */
239 cmpl %ecx, %edx /* bitpos>=j*28+19? */
240 jl L(very_large_skip1)
241
242 /* Here if bitpos>=j*28+19 */
243 andpd %xmm3, %xmm4 /* HI(tmp3) */
244 subsd %xmm4, %xmm3 /* tmp3 = tmp3 - HI(tmp3) */
245 L(very_large_skip1):
246
247 movsd MO1(DP_2POW52), %xmm6
248 movapd %xmm5, %xmm2 /* tmp2 copy */
249 addsd %xmm3, %xmm5 /* tmp5 = tmp3 + tmp2 */
250 movl $1, %edx
251 addsd %xmm5, %xmm6 /* tmp6 = tmp5 + 2^52 */
252 movsd 8+MO1(DP_2POW52), %xmm4
253 movd %xmm6, %eax /* k = I64_LO(tmp6); */
254 addsd %xmm6, %xmm4 /* tmp4 = tmp6 - 2^52 */
255 comisd %xmm5, %xmm4 /* tmp4 > tmp5? */
256 jbe L(very_large_skip2)
257
258 /* Here if tmp4 > tmp5 */
259 subl $1, %eax /* k-- */
260 addsd 8+MO1(DP_ONES), %xmm4 /* tmp4 -= 1.0 */
261 L(very_large_skip2):
262
263 andl %eax, %edx /* k&1 */
264 subsd %xmm4, %xmm3 /* tmp3 -= tmp4 */
265 addsd MO2(DP_ZERONE,%edx,8), %xmm3 /* t = DP_ZERONE[k&1] + tmp3 */
266 addsd %xmm2, %xmm3 /* t += tmp2 */
267 addsd %xmm3, %xmm0 /* t += tmp0 */
268 addl $3, %eax /* n=k+3 */
269 addsd %xmm1, %xmm0 /* t += tmp1 */
270 mulsd MO1(DP_PIO4), %xmm0 /* t *= PI04 */
271
272 jmp L(reconstruction) /* end of very_large_args peth */
273
274 .p2align 4
275 L(arg_less_pio4):
276 /* Here if |x|<Pi/4 */
277 cmpl $0x3d000000, %eax /* |x|<2^-5? */
278 jl L(arg_less_2pn5)
279
280 /* Here if 2^-5<=|x|<Pi/4 */
281 mulsd %xmm0, %xmm0 /* y=x^2 */
282 movaps %xmm0, %xmm1 /* y */
283 mulsd %xmm0, %xmm0 /* z=x^4 */
284 movsd MO1(DP_C4), %xmm3 /* C4 */
285 mulsd %xmm0, %xmm3 /* z*C4 */
286 movsd MO1(DP_C3), %xmm5 /* C3 */
287 mulsd %xmm0, %xmm5 /* z*C3 */
288 addsd MO1(DP_C2), %xmm3 /* C2+z*C4 */
289 mulsd %xmm0, %xmm3 /* z*(C2+z*C4) */
290 addsd MO1(DP_C1), %xmm5 /* C1+z*C3 */
291 mulsd %xmm0, %xmm5 /* z*(C1+z*C3) */
292 addsd MO1(DP_C0), %xmm3 /* C0+z*(C2+z*C4) */
293 mulsd %xmm1, %xmm3 /* y*(C0+z*(C2+z*C4)) */
294 addsd %xmm5, %xmm3 /* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
295 /* 1.0 + y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
296 addsd MO1(DP_ONES), %xmm3
297 cvtsd2ss %xmm3, %xmm3 /* SP result */
298
299 L(epilogue):
300 lea -4(%esp), %esp /* Borrow 4 bytes of stack frame */
301 movss %xmm3, 0(%esp) /* Move result from sse... */
302 flds 0(%esp) /* ...to FPU. */
303 /* Return back 4 bytes of stack frame */
304 lea 4(%esp), %esp
305 RETURN
306
307 .p2align 4
308 L(arg_less_2pn5):
309 /* Here if |x|<2^-5 */
310 cmpl $0x32000000, %eax /* |x|<2^-27? */
311 jl L(arg_less_2pn27)
312
313 /* Here if 2^-27<=|x|<2^-5 */
314 mulsd %xmm0, %xmm0 /* DP x^2 */
315 movsd MO1(DP_COS2_1), %xmm3 /* DP DP_COS2_1 */
316 mulsd %xmm0, %xmm3 /* DP x^2*DP_COS2_1 */
317 addsd MO1(DP_COS2_0), %xmm3 /* DP DP_COS2_0+x^2*DP_COS2_1 */
318 mulsd %xmm0, %xmm3 /* DP x^2*DP_COS2_0+x^4*DP_COS2_1 */
319 /* DP 1.0+x^2*DP_COS2_0+x^4*DP_COS2_1 */
320 addsd MO1(DP_ONES), %xmm3
321 cvtsd2ss %xmm3, %xmm3 /* SP result */
322 jmp L(epilogue)
323
324 .p2align 4
325 L(arg_less_2pn27):
326 /* Here if |x|<2^-27 */
327 movss ARG_X, %xmm0 /* x */
328 andps MO1(SP_ABS_MASK),%xmm0 /* |x| */
329 movss MO1(SP_ONE), %xmm3 /* 1.0 */
330 subss %xmm0, %xmm3 /* result is 1.0-|x| */
331 jmp L(epilogue)
332
333 .p2align 4
334 L(arg_inf_or_nan):
335 /* Here if |x| is Inf or NAN */
336 jne L(skip_errno_setting) /* in case of x is NaN */
337
338 /* Here if x is Inf. Set errno to EDOM. */
339 call JUMPTARGET(__errno_location)
340 movl $EDOM, (%eax)
341
342 .p2align 4
343 L(skip_errno_setting):
344 /* Here if |x| is Inf or NAN. Continued. */
345 movss ARG_X, %xmm3 /* load x */
346 subss %xmm3, %xmm3 /* Result is NaN */
347 jmp L(epilogue)
348 END(__cosf_sse2)
349
350 .section .rodata, "a"
351 .p2align 3
352 L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
353 .long 0x00000000,0x00000000
354 .long 0x54442d18,0x3fe921fb
355 .long 0x54442d18,0x3ff921fb
356 .long 0x7f3321d2,0x4002d97c
357 .long 0x54442d18,0x400921fb
358 .long 0x2955385e,0x400f6a7a
359 .long 0x7f3321d2,0x4012d97c
360 .long 0xe9bba775,0x4015fdbb
361 .long 0x54442d18,0x401921fb
362 .long 0xbeccb2bb,0x401c463a
363 .long 0x2955385e,0x401f6a7a
364 .type L(PIO4J), @object
365 ASM_SIZE_DIRECTIVE(L(PIO4J))
366
367 .p2align 3
368 L(_FPI): /* 4/Pi broken into sum of positive DP values */
369 .long 0x00000000,0x00000000
370 .long 0x6c000000,0x3ff45f30
371 .long 0x2a000000,0x3e3c9c88
372 .long 0xa8000000,0x3c54fe13
373 .long 0xd0000000,0x3aaf47d4
374 .long 0x6c000000,0x38fbb81b
375 .long 0xe0000000,0x3714acc9
376 .long 0x7c000000,0x3560e410
377 .long 0x56000000,0x33bca2c7
378 .long 0xac000000,0x31fbd778
379 .long 0xe0000000,0x300b7246
380 .long 0xe8000000,0x2e5d2126
381 .long 0x48000000,0x2c970032
382 .long 0xe8000000,0x2ad77504
383 .long 0xe0000000,0x290921cf
384 .long 0xb0000000,0x274deb1c
385 .long 0xe0000000,0x25829a73
386 .long 0xbe000000,0x23fd1046
387 .long 0x10000000,0x2224baed
388 .long 0x8e000000,0x20709d33
389 .long 0x80000000,0x1e535a2f
390 .long 0x64000000,0x1cef904e
391 .long 0x30000000,0x1b0d6398
392 .long 0x24000000,0x1964ce7d
393 .long 0x16000000,0x17b908bf
394 .type L(_FPI), @object
395 ASM_SIZE_DIRECTIVE(L(_FPI))
396
397 /* Coefficients of polynomial
398 for cos(x)~=1.0+x^2*DP_COS2_0+x^4*DP_COS2_1, |x|<2^-5. */
399 .p2align 3
400 L(DP_COS2_0):
401 .long 0xff5cc6fd,0xbfdfffff
402 .type L(DP_COS2_0), @object
403 ASM_SIZE_DIRECTIVE(L(DP_COS2_0))
404
405 .p2align 3
406 L(DP_COS2_1):
407 .long 0xb178dac5,0x3fa55514
408 .type L(DP_COS2_1), @object
409 ASM_SIZE_DIRECTIVE(L(DP_COS2_1))
410
411 .p2align 3
412 L(DP_ZERONE):
413 .long 0x00000000,0x00000000 /* 0.0 */
414 .long 0x00000000,0xbff00000 /* 1.0 */
415 .type L(DP_ZERONE),@object
416 ASM_SIZE_DIRECTIVE(L(DP_ZERONE))
417
418 .p2align 3
419 L(DP_ONES):
420 .long 0x00000000,0x3ff00000 /* +1.0 */
421 .long 0x00000000,0xbff00000 /* -1.0 */
422 .type L(DP_ONES), @object
423 ASM_SIZE_DIRECTIVE(L(DP_ONES))
424
425 /* Coefficients of polynomial
426 for sin(t)~=t+t^3*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))), |t|<Pi/4. */
427 .p2align 3
428 L(DP_S3):
429 .long 0x64e6b5b4,0x3ec71d72
430 .type L(DP_S3), @object
431 ASM_SIZE_DIRECTIVE(L(DP_S3))
432
433 .p2align 3
434 L(DP_S1):
435 .long 0x10c2688b,0x3f811111
436 .type L(DP_S1), @object
437 ASM_SIZE_DIRECTIVE(L(DP_S1))
438
439 .p2align 3
440 L(DP_S4):
441 .long 0x1674b58a,0xbe5a947e
442 .type L(DP_S4), @object
443 ASM_SIZE_DIRECTIVE(L(DP_S4))
444
445 .p2align 3
446 L(DP_S2):
447 .long 0x8b4bd1f9,0xbf2a019f
448 .type L(DP_S2), @object
449 ASM_SIZE_DIRECTIVE(L(DP_S2))
450
451 .p2align 3
452 L(DP_S0):
453 .long 0x55551cd9,0xbfc55555
454 .type L(DP_S0), @object
455 ASM_SIZE_DIRECTIVE(L(DP_S0))
456
457 /* Coefficients of polynomial
458 for cos(t)~=1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))), |t|<Pi/4. */
459 .p2align 3
460 L(DP_C3):
461 .long 0x9ac43cc0,0x3efa00eb
462 .type L(DP_C3), @object
463 ASM_SIZE_DIRECTIVE(L(DP_C3))
464
465 .p2align 3
466 L(DP_C1):
467 .long 0x545c50c7,0x3fa55555
468 .type L(DP_C1), @object
469 ASM_SIZE_DIRECTIVE(L(DP_C1))
470
471 .p2align 3
472 L(DP_C4):
473 .long 0xdd8844d7,0xbe923c97
474 .type L(DP_C4), @object
475 ASM_SIZE_DIRECTIVE(L(DP_C4))
476
477 .p2align 3
478 L(DP_C2):
479 .long 0x348b6874,0xbf56c16b
480 .type L(DP_C2), @object
481 ASM_SIZE_DIRECTIVE(L(DP_C2))
482
483 .p2align 3
484 L(DP_C0):
485 .long 0xfffe98ae,0xbfdfffff
486 .type L(DP_C0), @object
487 ASM_SIZE_DIRECTIVE(L(DP_C0))
488
489 .p2align 3
490 L(DP_PIO4):
491 .long 0x54442d18,0x3fe921fb /* Pi/4 */
492 .type L(DP_PIO4), @object
493 ASM_SIZE_DIRECTIVE(L(DP_PIO4))
494
495 .p2align 3
496 L(DP_2POW52):
497 .long 0x00000000,0x43300000 /* +2^52 */
498 .long 0x00000000,0xc3300000 /* -2^52 */
499 .type L(DP_2POW52), @object
500 ASM_SIZE_DIRECTIVE(L(DP_2POW52))
501
502 .p2align 3
503 L(DP_INVPIO4):
504 .long 0x6dc9c883,0x3ff45f30 /* 4/Pi */
505 .type L(DP_INVPIO4), @object
506 ASM_SIZE_DIRECTIVE(L(DP_INVPIO4))
507
508 .p2align 3
509 L(DP_PIO4HI):
510 .long 0x54000000,0xbfe921fb /* High part of Pi/4 */
511 .type L(DP_PIO4HI), @object
512 ASM_SIZE_DIRECTIVE(L(DP_PIO4HI))
513
514 .p2align 3
515 L(DP_PIO4LO):
516 .long 0x11A62633,0xbe010b46 /* Low part of Pi/4 */
517 .type L(DP_PIO4LO), @object
518 ASM_SIZE_DIRECTIVE(L(DP_PIO4LO))
519
520 .p2align 2
521 L(SP_INVPIO4):
522 .long 0x3fa2f983 /* 4/Pi */
523 .type L(SP_INVPIO4), @object
524 ASM_SIZE_DIRECTIVE(L(SP_INVPIO4))
525
526 .p2align 4
527 L(DP_ABS_MASK): /* Mask for getting DP absolute value */
528 .long 0xffffffff,0x7fffffff
529 .long 0xffffffff,0x7fffffff
530 .type L(DP_ABS_MASK), @object
531 ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
532
533 .p2align 3
534 L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
535 .long 0x00000000,0xffffffff
536 .type L(DP_HI_MASK), @object
537 ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
538
539 .p2align 4
540 L(SP_ABS_MASK): /* Mask for getting SP absolute value */
541 .long 0x7fffffff,0x7fffffff
542 .long 0x7fffffff,0x7fffffff
543 .type L(SP_ABS_MASK), @object
544 ASM_SIZE_DIRECTIVE(L(SP_ABS_MASK))
545
546 .p2align 2
547 L(SP_ONE):
548 .long 0x3f800000 /* 1.0 */
549 .type L(SP_ONE), @object
550 ASM_SIZE_DIRECTIVE(L(SP_ONE))
551
552 weak_alias (__cosf, cosf)