sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S

   1 /* Optimized with sse2 version of cosf
   2    Copyright (C) 2012-2021 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, see
  17    <https://www.gnu.org/licenses/>.  */
  18
  19 #include <sysdep.h>
  20 #include <errno.h>
  21
  22 /* Short algorithm description:
  23  *
  24  *  1) if |x| == 0: return 1.0-|x|.
  25  *  2) if |x| <  2^-27: return 1.0-|x|.
  26  *  3) if |x| <  2^-5 : return 1.0+x^2*DP_COS2_0+x^5*DP_COS2_1.
  27  *  4) if |x| <   Pi/4: return 1.0+x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))).
  28  *  5) if |x| < 9*Pi/4:
  29  *      5.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+3,
  30  *           t=|x|-j*Pi/4.
  31  *      5.2) Reconstruction:
  32  *          s = (-1.0)^((n>>2)&1)
  33  *          if(n&2 != 0) {
  34  *              using cos(t) polynomial for |t|<Pi/4, result is
  35  *              s     * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4))))).
  36  *          } else {
  37  *              using sin(t) polynomial for |t|<Pi/4, result is
  38  *              s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4))))).
  39  *          }
  40  *  6) if |x| < 2^23, large args:
  41  *      6.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+3,
  42  *           t=|x|-j*Pi/4.
  43  *      6.2) Reconstruction same as (5.2).
  44  *  7) if |x| >= 2^23, very large args:
  45  *      7.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+3,
  46  *           t=|x|-j*Pi/4.
  47  *      7.2) Reconstruction same as (5.2).
  48  *  8) if x is Inf, return x-x, and set errno=EDOM.
  49  *  9) if x is NaN, return x-x.
  50  *
  51  * Special cases:
  52  *  cos(+-0) = 1 not raising inexact,
  53  *  cos(subnormal) raises inexact,
  54  *  cos(min_normalized) raises inexact,
  55  *  cos(normalized) raises inexact,
  56  *  cos(Inf) = NaN, raises invalid, sets errno to EDOM,
  57  *  cos(NaN) = NaN.
  58  */
  59
  60 #ifdef  PIC
  61 # define MO1(symbol)                    L(symbol)##@GOTOFF(%ebx)
  62 # define MO2(symbol,reg2,_scale)        L(symbol)##@GOTOFF(%ebx,reg2,_scale)
  63 # define CFI_PUSH(REG)  cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0)
  64 # define CFI_POP(REG)   cfi_adjust_cfa_offset(-4); cfi_restore(REG)
  65 # define PUSH(REG)                      pushl REG; CFI_PUSH(REG)
  66 # define POP(REG)                       popl REG; CFI_POP(REG)
  67 # define ENTRANCE                       PUSH(%ebx); LOAD_PIC_REG(bx)
  68 # define RETURN                         POP(%ebx); ret; CFI_PUSH(%ebx)
  69 # define ARG_X                          8(%esp)
  70 #else
  71 # define MO1(symbol)                    L(symbol)
  72 # define MO2(symbol,reg2,_scale)        L(symbol)(,reg2,_scale)
  73 # define ENTRANCE
  74 # define RETURN                         ret
  75 # define ARG_X                          4(%esp)
  76 #endif
  77
  78         .text
  79 ENTRY(__cosf_sse2)
  80         /* Input: single precision x on stack at address ARG_X */
  81
  82         ENTRANCE
  83         movl    ARG_X, %eax             /* Bits of x */
  84         cvtss2sd ARG_X, %xmm0           /* DP x */
  85         andl    $0x7fffffff, %eax       /* |x| */
  86
  87         cmpl    $0x3f490fdb, %eax       /* |x|<Pi/4?  */
  88         jb      L(arg_less_pio4)
  89
  90         /* Here if |x|>=Pi/4 */
  91         movd    %eax, %xmm3             /* SP |x| */
  92         andpd   MO1(DP_ABS_MASK),%xmm0  /* DP |x| */
  93         movss   MO1(SP_INVPIO4), %xmm2  /* SP 1/(Pi/4) */
  94
  95         cmpl    $0x40e231d6, %eax       /* |x|<9*Pi/4?  */
  96         jae     L(large_args)
  97
  98         /* Here if Pi/4<=|x|<9*Pi/4 */
  99         mulss   %xmm3, %xmm2            /* SP |x|/(Pi/4) */
 100         cvttss2si %xmm2, %eax           /* k, number of Pi/4 in x */
 101         addl    $1, %eax                /* k+1 */
 102         movl    $0x0e, %edx
 103         andl    %eax, %edx              /* j = (k+1)&0x0e */
 104         addl    $2, %eax                /* n */
 105         subsd   MO2(PIO4J,%edx,8), %xmm0 /* t = |x| - j * Pi/4 */
 106
 107 L(reconstruction):
 108         /* Input: %eax=n, %xmm0=t */
 109         testl   $2, %eax                /* n&2 != 0?  */
 110         jz      L(sin_poly)
 111
 112 /*L(cos_poly):*/
 113         /* Here if cos(x) calculated using cos(t) polynomial for |t|<Pi/4:
 114          * y = t*t; z = y*y;
 115          * s = sign(x) * (-1.0)^((n>>2)&1)
 116          * result = s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))))
 117          */
 118         shrl    $2, %eax                /* n>>2 */
 119         mulsd   %xmm0, %xmm0            /* y=t^2 */
 120         andl    $1, %eax                /* (n>>2)&1 */
 121         movaps  %xmm0, %xmm1            /* y */
 122         mulsd   %xmm0, %xmm0            /* z=t^4 */
 123
 124         movsd   MO1(DP_C4), %xmm4       /* C4 */
 125         mulsd   %xmm0, %xmm4            /* z*C4 */
 126         movsd   MO1(DP_C3), %xmm3       /* C3 */
 127         mulsd   %xmm0, %xmm3            /* z*C3 */
 128         addsd   MO1(DP_C2), %xmm4       /* C2+z*C4 */
 129         mulsd   %xmm0, %xmm4            /* z*(C2+z*C4) */
 130         lea     -8(%esp), %esp          /* Borrow 4 bytes of stack frame */
 131         addsd   MO1(DP_C1), %xmm3       /* C1+z*C3 */
 132         mulsd   %xmm0, %xmm3            /* z*(C1+z*C3) */
 133         addsd   MO1(DP_C0), %xmm4       /* C0+z*(C2+z*C4) */
 134         mulsd   %xmm1, %xmm4            /* y*(C0+z*(C2+z*C4)) */
 135
 136         addsd   %xmm4, %xmm3            /* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
 137         /* 1.0+y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
 138         addsd   MO1(DP_ONES), %xmm3
 139
 140         mulsd   MO2(DP_ONES,%eax,8), %xmm3 /* DP result */
 141         movsd   %xmm3, 0(%esp)          /* Move result from sse...  */
 142         fldl    0(%esp)                 /* ...to FPU.  */
 143         /* Return back 4 bytes of stack frame */
 144         lea     8(%esp), %esp
 145         RETURN
 146
 147         .p2align        4
 148 L(sin_poly):
 149         /* Here if cos(x) calculated using sin(t) polynomial for |t|<Pi/4:
 150          * y = t*t; z = y*y;
 151          * s = sign(x) * (-1.0)^((n>>2)&1)
 152          * result = s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))))
 153          */
 154
 155         movaps  %xmm0, %xmm4            /* t */
 156         shrl    $2, %eax                /* n>>2 */
 157         mulsd   %xmm0, %xmm0            /* y=t^2 */
 158         andl    $1, %eax                /* (n>>2)&1 */
 159         movaps  %xmm0, %xmm1            /* y */
 160         mulsd   %xmm0, %xmm0            /* z=t^4 */
 161
 162         movsd   MO1(DP_S4), %xmm2       /* S4 */
 163         mulsd   %xmm0, %xmm2            /* z*S4 */
 164         movsd   MO1(DP_S3), %xmm3       /* S3 */
 165         mulsd   %xmm0, %xmm3            /* z*S3 */
 166         lea     -8(%esp), %esp          /* Borrow 4 bytes of stack frame */
 167         addsd   MO1(DP_S2), %xmm2       /* S2+z*S4 */
 168         mulsd   %xmm0, %xmm2            /* z*(S2+z*S4) */
 169         addsd   MO1(DP_S1), %xmm3       /* S1+z*S3 */
 170         mulsd   %xmm0, %xmm3            /* z*(S1+z*S3) */
 171         addsd   MO1(DP_S0), %xmm2       /* S0+z*(S2+z*S4) */
 172         mulsd   %xmm1, %xmm2            /* y*(S0+z*(S2+z*S4)) */
 173         /* t*s, where s = sign(x) * (-1.0)^((n>>2)&1) */
 174         mulsd   MO2(DP_ONES,%eax,8), %xmm4
 175         addsd   %xmm2, %xmm3            /* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
 176         /* t*s*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
 177         mulsd   %xmm4, %xmm3
 178         /* t*s*(1.0+y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
 179         addsd   %xmm4, %xmm3
 180         movsd   %xmm3, 0(%esp)          /* Move result from sse...   */
 181         fldl    0(%esp)                 /* ...to FPU.  */
 182         /* Return back 4 bytes of stack frame */
 183         lea     8(%esp), %esp
 184         RETURN
 185
 186         .p2align        4
 187 L(large_args):
 188         /* Here if |x|>=9*Pi/4 */
 189         cmpl    $0x7f800000, %eax       /* x is Inf or NaN?  */
 190         jae     L(arg_inf_or_nan)
 191
 192         /* Here if finite |x|>=9*Pi/4 */
 193         cmpl    $0x4b000000, %eax       /* |x|<2^23?  */
 194         jae     L(very_large_args)
 195
 196         /* Here if 9*Pi/4<=|x|<2^23 */
 197         movsd   MO1(DP_INVPIO4), %xmm1  /* 1/(Pi/4) */
 198         mulsd   %xmm0, %xmm1            /* |x|/(Pi/4) */
 199         cvttsd2si %xmm1, %eax           /* k=trunc(|x|/(Pi/4)) */
 200         addl    $1, %eax                /* k+1 */
 201         movl    %eax, %edx
 202         andl    $0xfffffffe, %edx       /* j=(k+1)&0xfffffffe */
 203         cvtsi2sdl %edx, %xmm4           /* DP j */
 204         movsd   MO1(DP_PIO4HI), %xmm2   /* -PIO4HI = high part of -Pi/4 */
 205         mulsd   %xmm4, %xmm2            /* -j*PIO4HI */
 206         movsd   MO1(DP_PIO4LO), %xmm3   /* -PIO4LO = low part of -Pi/4 */
 207         addsd   %xmm2, %xmm0            /* |x| - j*PIO4HI */
 208         addl    $2, %eax                /* n */
 209         mulsd   %xmm3, %xmm4            /* j*PIO4LO */
 210         addsd   %xmm4, %xmm0            /* t = |x| - j*PIO4HI - j*PIO4LO */
 211         jmp     L(reconstruction)
 212
 213         .p2align        4
 214 L(very_large_args):
 215         /* Here if finite |x|>=2^23 */
 216
 217         /* bitpos = (ix>>23) - BIAS_32 + 59; */
 218         shrl    $23, %eax               /* eb = biased exponent of x */
 219         /* bitpos = eb - 0x7f + 59, where 0x7f is exponent bias */
 220         subl    $68, %eax
 221         movl    $28, %ecx               /* %cl=28 */
 222         movl    %eax, %edx              /* bitpos copy */
 223
 224         /* j = bitpos/28; */
 225         div     %cl                     /* j in register %al=%ax/%cl */
 226         movapd  %xmm0, %xmm3            /* |x| */
 227         /* clear unneeded remainder from %ah */
 228         andl    $0xff, %eax
 229
 230         imull   $28, %eax, %ecx         /* j*28 */
 231         movsd   MO1(DP_HI_MASK), %xmm4  /* DP_HI_MASK */
 232         movapd  %xmm0, %xmm5            /* |x| */
 233         mulsd   -2*8+MO2(_FPI,%eax,8), %xmm3    /* tmp3 = FPI[j-2]*|x| */
 234         movapd  %xmm0, %xmm1            /* |x| */
 235         mulsd   -1*8+MO2(_FPI,%eax,8), %xmm5    /* tmp2 = FPI[j-1]*|x| */
 236         mulsd   0*8+MO2(_FPI,%eax,8), %xmm0     /* tmp0 = FPI[j]*|x| */
 237         addl    $19, %ecx               /* j*28+19 */
 238         mulsd   1*8+MO2(_FPI,%eax,8), %xmm1     /* tmp1 = FPI[j+1]*|x| */
 239         cmpl    %ecx, %edx              /* bitpos>=j*28+19?  */
 240         jl      L(very_large_skip1)
 241
 242         /* Here if bitpos>=j*28+19 */
 243         andpd   %xmm3, %xmm4            /* HI(tmp3) */
 244         subsd   %xmm4, %xmm3            /* tmp3 = tmp3 - HI(tmp3) */
 245 L(very_large_skip1):
 246
 247         movsd   MO1(DP_2POW52), %xmm6
 248         movapd  %xmm5, %xmm2            /* tmp2 copy */
 249         addsd   %xmm3, %xmm5            /* tmp5 = tmp3 + tmp2 */
 250         movl    $1, %edx
 251         addsd   %xmm5, %xmm6            /* tmp6 = tmp5 + 2^52 */
 252         movsd   8+MO1(DP_2POW52), %xmm4
 253         movd    %xmm6, %eax             /* k = I64_LO(tmp6); */
 254         addsd   %xmm6, %xmm4            /* tmp4 = tmp6 - 2^52 */
 255         comisd  %xmm5, %xmm4            /* tmp4 > tmp5?  */
 256         jbe     L(very_large_skip2)
 257
 258         /* Here if tmp4 > tmp5 */
 259         subl    $1, %eax                /* k-- */
 260         addsd   8+MO1(DP_ONES), %xmm4   /* tmp4 -= 1.0 */
 261 L(very_large_skip2):
 262
 263         andl    %eax, %edx              /* k&1 */
 264         subsd   %xmm4, %xmm3            /* tmp3 -= tmp4 */
 265         addsd   MO2(DP_ZERONE,%edx,8), %xmm3 /* t  = DP_ZERONE[k&1] + tmp3 */
 266         addsd   %xmm2, %xmm3            /* t += tmp2 */
 267         addsd   %xmm3, %xmm0            /* t += tmp0 */
 268         addl    $3, %eax                /* n=k+3 */
 269         addsd   %xmm1, %xmm0            /* t += tmp1 */
 270         mulsd   MO1(DP_PIO4), %xmm0     /* t *= PI04 */
 271
 272         jmp     L(reconstruction)       /* end of very_large_args peth */
 273
 274         .p2align        4
 275 L(arg_less_pio4):
 276         /* Here if |x|<Pi/4 */
 277         cmpl    $0x3d000000, %eax       /* |x|<2^-5?  */
 278         jl      L(arg_less_2pn5)
 279
 280         /* Here if 2^-5<=|x|<Pi/4 */
 281         mulsd   %xmm0, %xmm0            /* y=x^2 */
 282         movaps  %xmm0, %xmm1            /* y */
 283         mulsd   %xmm0, %xmm0            /* z=x^4 */
 284         movsd   MO1(DP_C4), %xmm3       /* C4 */
 285         mulsd   %xmm0, %xmm3            /* z*C4 */
 286         movsd   MO1(DP_C3), %xmm5       /* C3 */
 287         mulsd   %xmm0, %xmm5            /* z*C3 */
 288         addsd   MO1(DP_C2), %xmm3       /* C2+z*C4 */
 289         mulsd   %xmm0, %xmm3            /* z*(C2+z*C4) */
 290         addsd   MO1(DP_C1), %xmm5       /* C1+z*C3 */
 291         mulsd   %xmm0, %xmm5            /* z*(C1+z*C3) */
 292         addsd   MO1(DP_C0), %xmm3       /* C0+z*(C2+z*C4) */
 293         mulsd   %xmm1, %xmm3            /* y*(C0+z*(C2+z*C4)) */
 294         addsd   %xmm5, %xmm3            /* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
 295         /* 1.0 + y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
 296         addsd   MO1(DP_ONES), %xmm3
 297         cvtsd2ss %xmm3, %xmm3           /* SP result */
 298
 299 L(epilogue):
 300         lea     -4(%esp), %esp          /* Borrow 4 bytes of stack frame */
 301         movss   %xmm3, 0(%esp)          /* Move result from sse...  */
 302         flds    0(%esp)                 /* ...to FPU.  */
 303         /* Return back 4 bytes of stack frame */
 304         lea     4(%esp), %esp
 305         RETURN
 306
 307         .p2align        4
 308 L(arg_less_2pn5):
 309         /* Here if |x|<2^-5 */
 310         cmpl    $0x32000000, %eax       /* |x|<2^-27?  */
 311         jl      L(arg_less_2pn27)
 312
 313         /* Here if 2^-27<=|x|<2^-5 */
 314         mulsd   %xmm0, %xmm0            /* DP x^2 */
 315         movsd   MO1(DP_COS2_1), %xmm3   /* DP DP_COS2_1 */
 316         mulsd   %xmm0, %xmm3            /* DP x^2*DP_COS2_1 */
 317         addsd   MO1(DP_COS2_0), %xmm3   /* DP DP_COS2_0+x^2*DP_COS2_1 */
 318         mulsd   %xmm0, %xmm3            /* DP x^2*DP_COS2_0+x^4*DP_COS2_1 */
 319         /* DP 1.0+x^2*DP_COS2_0+x^4*DP_COS2_1 */
 320         addsd   MO1(DP_ONES), %xmm3
 321         cvtsd2ss %xmm3, %xmm3           /* SP result */
 322         jmp     L(epilogue)
 323
 324         .p2align        4
 325 L(arg_less_2pn27):
 326         /* Here if |x|<2^-27 */
 327         movss   ARG_X, %xmm0            /* x */
 328         andps   MO1(SP_ABS_MASK),%xmm0  /* |x| */
 329         movss   MO1(SP_ONE), %xmm3      /* 1.0 */
 330         subss   %xmm0, %xmm3            /* result is 1.0-|x| */
 331         jmp     L(epilogue)
 332
 333         .p2align        4
 334 L(arg_inf_or_nan):
 335         /* Here if |x| is Inf or NAN */
 336         jne     L(skip_errno_setting)   /* in case of x is NaN */
 337
 338         /* Here if x is Inf. Set errno to EDOM.  */
 339         call    JUMPTARGET(__errno_location)
 340         movl    $EDOM, (%eax)
 341
 342         .p2align        4
 343 L(skip_errno_setting):
 344         /* Here if |x| is Inf or NAN. Continued.  */
 345         movss   ARG_X, %xmm3            /* load x */
 346         subss   %xmm3, %xmm3            /* Result is NaN */
 347         jmp     L(epilogue)
 348 END(__cosf_sse2)
 349
 350         .section .rodata, "a"
 351         .p2align 3
 352 L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
 353         .long   0x00000000,0x00000000
 354         .long   0x54442d18,0x3fe921fb
 355         .long   0x54442d18,0x3ff921fb
 356         .long   0x7f3321d2,0x4002d97c
 357         .long   0x54442d18,0x400921fb
 358         .long   0x2955385e,0x400f6a7a
 359         .long   0x7f3321d2,0x4012d97c
 360         .long   0xe9bba775,0x4015fdbb
 361         .long   0x54442d18,0x401921fb
 362         .long   0xbeccb2bb,0x401c463a
 363         .long   0x2955385e,0x401f6a7a
 364         .type L(PIO4J), @object
 365         ASM_SIZE_DIRECTIVE(L(PIO4J))
 366
 367         .p2align 3
 368 L(_FPI): /* 4/Pi broken into sum of positive DP values */
 369         .long   0x00000000,0x00000000
 370         .long   0x6c000000,0x3ff45f30
 371         .long   0x2a000000,0x3e3c9c88
 372         .long   0xa8000000,0x3c54fe13
 373         .long   0xd0000000,0x3aaf47d4
 374         .long   0x6c000000,0x38fbb81b
 375         .long   0xe0000000,0x3714acc9
 376         .long   0x7c000000,0x3560e410
 377         .long   0x56000000,0x33bca2c7
 378         .long   0xac000000,0x31fbd778
 379         .long   0xe0000000,0x300b7246
 380         .long   0xe8000000,0x2e5d2126
 381         .long   0x48000000,0x2c970032
 382         .long   0xe8000000,0x2ad77504
 383         .long   0xe0000000,0x290921cf
 384         .long   0xb0000000,0x274deb1c
 385         .long   0xe0000000,0x25829a73
 386         .long   0xbe000000,0x23fd1046
 387         .long   0x10000000,0x2224baed
 388         .long   0x8e000000,0x20709d33
 389         .long   0x80000000,0x1e535a2f
 390         .long   0x64000000,0x1cef904e
 391         .long   0x30000000,0x1b0d6398
 392         .long   0x24000000,0x1964ce7d
 393         .long   0x16000000,0x17b908bf
 394         .type L(_FPI), @object
 395         ASM_SIZE_DIRECTIVE(L(_FPI))
 396
 397 /* Coefficients of polynomial
 398  for cos(x)~=1.0+x^2*DP_COS2_0+x^4*DP_COS2_1, |x|<2^-5.  */
 399         .p2align 3
 400 L(DP_COS2_0):
 401         .long   0xff5cc6fd,0xbfdfffff
 402         .type L(DP_COS2_0), @object
 403         ASM_SIZE_DIRECTIVE(L(DP_COS2_0))
 404
 405         .p2align 3
 406 L(DP_COS2_1):
 407         .long   0xb178dac5,0x3fa55514
 408         .type L(DP_COS2_1), @object
 409         ASM_SIZE_DIRECTIVE(L(DP_COS2_1))
 410
 411         .p2align 3
 412 L(DP_ZERONE):
 413         .long   0x00000000,0x00000000   /* 0.0 */
 414         .long   0x00000000,0xbff00000   /* 1.0 */
 415         .type L(DP_ZERONE),@object
 416         ASM_SIZE_DIRECTIVE(L(DP_ZERONE))
 417
 418         .p2align 3
 419 L(DP_ONES):
 420         .long   0x00000000,0x3ff00000   /* +1.0 */
 421         .long   0x00000000,0xbff00000   /* -1.0 */
 422         .type L(DP_ONES), @object
 423         ASM_SIZE_DIRECTIVE(L(DP_ONES))
 424
 425 /* Coefficients of polynomial
 426  for sin(t)~=t+t^3*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))), |t|<Pi/4.  */
 427         .p2align 3
 428 L(DP_S3):
 429         .long   0x64e6b5b4,0x3ec71d72
 430         .type L(DP_S3), @object
 431         ASM_SIZE_DIRECTIVE(L(DP_S3))
 432
 433         .p2align 3
 434 L(DP_S1):
 435         .long   0x10c2688b,0x3f811111
 436         .type L(DP_S1), @object
 437         ASM_SIZE_DIRECTIVE(L(DP_S1))
 438
 439         .p2align 3
 440 L(DP_S4):
 441         .long   0x1674b58a,0xbe5a947e
 442         .type L(DP_S4), @object
 443         ASM_SIZE_DIRECTIVE(L(DP_S4))
 444
 445         .p2align 3
 446 L(DP_S2):
 447         .long   0x8b4bd1f9,0xbf2a019f
 448         .type L(DP_S2), @object
 449         ASM_SIZE_DIRECTIVE(L(DP_S2))
 450
 451         .p2align 3
 452 L(DP_S0):
 453         .long   0x55551cd9,0xbfc55555
 454         .type L(DP_S0), @object
 455         ASM_SIZE_DIRECTIVE(L(DP_S0))
 456
 457 /* Coefficients of polynomial
 458  for cos(t)~=1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))), |t|<Pi/4.  */
 459         .p2align 3
 460 L(DP_C3):
 461         .long   0x9ac43cc0,0x3efa00eb
 462         .type L(DP_C3), @object
 463         ASM_SIZE_DIRECTIVE(L(DP_C3))
 464
 465         .p2align 3
 466 L(DP_C1):
 467         .long   0x545c50c7,0x3fa55555
 468         .type L(DP_C1), @object
 469         ASM_SIZE_DIRECTIVE(L(DP_C1))
 470
 471         .p2align 3
 472 L(DP_C4):
 473         .long   0xdd8844d7,0xbe923c97
 474         .type L(DP_C4), @object
 475         ASM_SIZE_DIRECTIVE(L(DP_C4))
 476
 477         .p2align 3
 478 L(DP_C2):
 479         .long   0x348b6874,0xbf56c16b
 480         .type L(DP_C2), @object
 481         ASM_SIZE_DIRECTIVE(L(DP_C2))
 482
 483         .p2align 3
 484 L(DP_C0):
 485         .long   0xfffe98ae,0xbfdfffff
 486         .type L(DP_C0), @object
 487         ASM_SIZE_DIRECTIVE(L(DP_C0))
 488
 489         .p2align 3
 490 L(DP_PIO4):
 491         .long   0x54442d18,0x3fe921fb   /* Pi/4 */
 492         .type L(DP_PIO4), @object
 493         ASM_SIZE_DIRECTIVE(L(DP_PIO4))
 494
 495         .p2align 3
 496 L(DP_2POW52):
 497         .long   0x00000000,0x43300000   /* +2^52 */
 498         .long   0x00000000,0xc3300000   /* -2^52 */
 499         .type L(DP_2POW52), @object
 500         ASM_SIZE_DIRECTIVE(L(DP_2POW52))
 501
 502         .p2align 3
 503 L(DP_INVPIO4):
 504         .long   0x6dc9c883,0x3ff45f30   /* 4/Pi */
 505         .type L(DP_INVPIO4), @object
 506         ASM_SIZE_DIRECTIVE(L(DP_INVPIO4))
 507
 508         .p2align 3
 509 L(DP_PIO4HI):
 510         .long   0x54000000,0xbfe921fb   /* High part of Pi/4 */
 511         .type L(DP_PIO4HI), @object
 512         ASM_SIZE_DIRECTIVE(L(DP_PIO4HI))
 513
 514         .p2align 3
 515 L(DP_PIO4LO):
 516         .long   0x11A62633,0xbe010b46   /* Low part of Pi/4 */
 517         .type L(DP_PIO4LO), @object
 518         ASM_SIZE_DIRECTIVE(L(DP_PIO4LO))
 519
 520         .p2align 2
 521 L(SP_INVPIO4):
 522         .long   0x3fa2f983              /* 4/Pi */
 523         .type L(SP_INVPIO4), @object
 524         ASM_SIZE_DIRECTIVE(L(SP_INVPIO4))
 525
 526         .p2align 4
 527 L(DP_ABS_MASK): /* Mask for getting DP absolute value */
 528         .long   0xffffffff,0x7fffffff
 529         .long   0xffffffff,0x7fffffff
 530         .type L(DP_ABS_MASK), @object
 531         ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
 532
 533         .p2align 3
 534 L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
 535         .long   0x00000000,0xffffffff
 536         .type L(DP_HI_MASK), @object
 537         ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
 538
 539         .p2align 4
 540 L(SP_ABS_MASK): /* Mask for getting SP absolute value */
 541         .long   0x7fffffff,0x7fffffff
 542         .long   0x7fffffff,0x7fffffff
 543         .type L(SP_ABS_MASK), @object
 544         ASM_SIZE_DIRECTIVE(L(SP_ABS_MASK))
 545
 546         .p2align 2
 547 L(SP_ONE):
 548         .long   0x3f800000              /* 1.0 */
 549         .type L(SP_ONE), @object
 550         ASM_SIZE_DIRECTIVE(L(SP_ONE))
 551
 552 weak_alias (__cosf, cosf)