sysdeps/i386/i686/fpu/multiarch/s_sincosf-sse2.S

   1 /* Optimized with sse2 version of sincosf
   2    Copyright (C) 2012-2018 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, see
  17    <http://www.gnu.org/licenses/>.  */
  18
  19 #include <sysdep.h>
  20 #include <errno.h>
  21
  22 /* Short algorithm description:
  23  *
  24  *  1) if |x|==0:    sin(x)=x,
  25  *                   cos(x)=1.
  26  *  2) if |x|<2^-27: sin(x)=x-x*DP_SMALL, raising underflow only when needed,
  27  *                   cos(x)=1-|x|.
  28  *  3) if |x|<2^-5 : sin(x)=x+x*x^2*DP_SIN2_0+x^5*DP_SIN2_1,
  29  *                   cos(x)=1+1*x^2*DP_COS2_0+x^5*DP_COS2_1
  30  *  4) if |x|< Pi/4: sin(x)=x+x*x^2*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))),
  31  *                   cos(x)=1+1*x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))).
  32  *  5) if |x| < 9*Pi/4:
  33  *      5.1) Range reduction:
  34  *          k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+1, t=|x|-j*Pi/4.
  35  *      5.2) Reconstruction:
  36  *          sign_sin = sign(x) * (-1.0)^(( n   >>2)&1)
  37  *          sign_cos =           (-1.0)^(((n+2)>>2)&1)
  38  *          poly_sin = ((((S4*t^2 + S3)*t^2 + S2)*t^2 + S1)*t^2 + S0)*t^2*t+t
  39  *          poly_cos = ((((C4*t^2 + C3)*t^2 + C2)*t^2 + C1)*t^2 + C0)*t^2*s+s
  40  *          if(n&2 != 0) {
  41  *              using cos(t) and sin(t) polynomials for |t|<Pi/4, results are
  42  *              cos(x) = poly_sin * sign_cos
  43  *              sin(x) = poly_cos * sign_sin
  44  *          } else {
  45  *              sin(x) = poly_sin * sign_sin
  46  *              cos(x) = poly_cos * sign_cos
  47  *          }
  48  *  6) if |x| < 2^23, large args:
  49  *      6.1) Range reduction:
  50  *          k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1, t=|x|-j*Pi/4
  51  *      6.2) Reconstruction same as (5.2).
  52  *  7) if |x| >= 2^23, very large args:
  53  *      7.1) Range reduction:
  54  *          k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1, t=|x|-j*Pi/4.
  55  *      7.2) Reconstruction same as (5.2).
  56  *  8) if x is Inf, return x-x, and set errno=EDOM.
  57  *  9) if x is NaN, return x-x.
  58  *
  59  * Special cases:
  60  *  sin/cos(+-0) = +-0/1 not raising inexact/underflow,
  61  *  sin/cos(subnormal) raises inexact/underflow,
  62  *  sin/cos(min_normalized) raises inexact/underflow,
  63  *  sin/cos(normalized) raises inexact,
  64  *  sin/cos(Inf) = NaN, raises invalid, sets errno to EDOM,
  65  *  sin/cos(NaN) = NaN.
  66  */
  67
  68 #ifdef  PIC
  69 # define MO1(symbol)                    L(symbol)##@GOTOFF(%ebx)
  70 # define MO2(symbol,reg2,_scale)        L(symbol)##@GOTOFF(%ebx,reg2,_scale)
  71 # define CFI_PUSH(REG)  cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0)
  72 # define CFI_POP(REG)   cfi_adjust_cfa_offset(-4); cfi_restore(REG)
  73 # define PUSH(REG)                      pushl REG; CFI_PUSH(REG)
  74 # define POP(REG)                       popl REG; CFI_POP(REG)
  75 # define ENTRANCE                       PUSH(%ebx); LOAD_PIC_REG(bx)
  76 # define RETURN                         POP(%ebx); ret; CFI_PUSH(%ebx)
  77 # define ARG_X                          8(%esp)
  78 # define ARG_SIN_PTR                    12(%esp)
  79 # define ARG_COS_PTR                    16(%esp)
  80 #else
  81 # define MO1(symbol)                    L(symbol)
  82 # define MO2(symbol,reg2,_scale)        L(symbol)(,reg2,_scale)
  83 # define ENTRANCE
  84 # define RETURN                         ret
  85 # define ARG_X                          4(%esp)
  86 # define ARG_SIN_PTR                    8(%esp)
  87 # define ARG_COS_PTR                    12(%esp)
  88 #endif
  89
  90         .text
  91 ENTRY(__sincosf_sse2)
  92         /* Input: single precision x on stack at address ARG_X */
  93         /*        pointer to sin result on stack at address ARG_SIN_PTR */
  94         /*        pointer to cos result on stack at address ARG_COS_PTR */
  95
  96         ENTRANCE
  97         movl    ARG_X, %eax             /* Bits of x */
  98         cvtss2sd ARG_X, %xmm0           /* DP x */
  99         andl    $0x7fffffff, %eax       /* |x| */
 100
 101         cmpl    $0x3f490fdb, %eax       /* |x|<Pi/4 ? */
 102         jb      L(arg_less_pio4)
 103
 104         /* Here if |x|>=Pi/4 */
 105         movd    %eax, %xmm3             /* SP |x| */
 106         andpd   MO1(DP_ABS_MASK),%xmm0  /* DP |x| */
 107         movss   MO1(SP_INVPIO4), %xmm2  /* SP 1/(Pi/4) */
 108
 109         cmpl    $0x40e231d6, %eax       /* |x|<9*Pi/4 ? */
 110         jae     L(large_args)
 111
 112         /* Here if Pi/4<=|x|<9*Pi/4 */
 113         mulss   %xmm3, %xmm2            /* SP |x|/(Pi/4) */
 114         movl    ARG_X, %ecx             /* Load x */
 115         cvttss2si %xmm2, %eax           /* k, number of Pi/4 in x */
 116         shrl    $29, %ecx               /* (sign of x) << 2 */
 117         addl    $1, %eax                /* k+1 */
 118         movl    $0x0e, %edx
 119         andl    %eax, %edx              /* j = (k+1)&0x0e */
 120         subsd   MO2(PIO4J,%edx,8), %xmm0/* t = |x| - j * Pi/4 */
 121
 122 L(reconstruction):
 123         /* Input: %eax=n, %xmm0=t, %ecx=sign(x) */
 124
 125         movaps  %xmm0, %xmm4            /* t */
 126         movhpd  MO1(DP_ONES), %xmm4     /* 1|t */
 127         mulsd   %xmm0, %xmm0            /* y=t^2 */
 128         movl    $2, %edx
 129         unpcklpd %xmm0, %xmm0           /* y|y */
 130         addl    %eax, %edx              /* k+2 */
 131         movaps  %xmm0, %xmm1            /* y|y */
 132         mulpd   %xmm0, %xmm0            /* z=t^4|z=t^4 */
 133
 134         movaps  MO1(DP_SC4), %xmm2      /* S4 */
 135         mulpd   %xmm0, %xmm2            /* z*S4 */
 136         movaps  MO1(DP_SC3), %xmm3      /* S3 */
 137         mulpd   %xmm0, %xmm3            /* z*S3 */
 138         xorl    %eax, %ecx              /* (sign_x ^ (k>>2))<<2 */
 139         addpd   MO1(DP_SC2), %xmm2      /* S2+z*S4 */
 140         mulpd   %xmm0, %xmm2            /* z*(S2+z*S4) */
 141         shrl    $2, %edx                /* (k+2)>>2 */
 142         addpd   MO1(DP_SC1), %xmm3      /* S1+z*S3 */
 143         mulpd   %xmm0, %xmm3            /* z*(S1+z*S3) */
 144         shrl    $2, %ecx                /* sign_x ^ k>>2 */
 145         addpd   MO1(DP_SC0), %xmm2      /* S0+z*(S2+z*S4) */
 146         andl    $1, %edx                /* sign_cos = ((k+2)>>2)&1 */
 147         mulpd   %xmm1, %xmm2            /* y*(S0+z*(S2+z*S4)) */
 148         andl    $1, %ecx                /* sign_sin = sign_x ^ ((k>>2)&1) */
 149         addpd   %xmm2, %xmm3            /* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
 150         mulpd   %xmm4, %xmm3            /*t*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))))*/
 151         testl   $2, %eax                /* n&2 != 0 ? */
 152         addpd   %xmm4, %xmm3            /*t+t*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))*/
 153         jnz     L(sin_result_sin_poly)
 154
 155 /*L(sin_result_cos_poly):*/
 156         /*
 157          * Here if
 158          * cos(x) = poly_sin * sign_cos
 159          * sin(x) = poly_cos * sign_sin
 160          */
 161         movsd   MO2(DP_ONES,%ecx,8), %xmm4/* 0|sign_sin */
 162         movhpd  MO2(DP_ONES,%edx,8), %xmm4/* sign_cos|sign_sin */
 163         mulpd   %xmm4, %xmm3            /* result_cos|result_sin */
 164         movl    ARG_SIN_PTR, %eax
 165         cvtpd2ps %xmm3, %xmm0           /* SP results */
 166         movl    ARG_COS_PTR, %ecx
 167         movss   %xmm0, (%eax)           /* store sin(x) from xmm0[0] */
 168         shufps  $1, %xmm0, %xmm0        /* move cos(x) to xmm0[0] */
 169         movss   %xmm0, (%ecx)           /* store cos(x) */
 170         RETURN
 171
 172         .p2align        4
 173 L(sin_result_sin_poly):
 174         /*
 175          * Here if
 176          * sin(x) = poly_sin * sign_sin
 177          * cos(x) = poly_cos * sign_cos
 178          */
 179         movsd   MO2(DP_ONES,%edx,8), %xmm4/* 0|sign_cos */
 180         movhpd  MO2(DP_ONES,%ecx,8), %xmm4/* sign_sin|sign_cos */
 181         mulpd   %xmm4, %xmm3            /* result_sin|result_cos */
 182         movl    ARG_SIN_PTR, %eax
 183         cvtpd2ps %xmm3, %xmm0           /* SP results */
 184         movl    ARG_COS_PTR, %ecx
 185         movss   %xmm0, (%ecx)           /* store cos(x) from xmm0[0] */
 186         shufps  $1, %xmm0, %xmm0        /* move sin(x) to xmm0[0] */
 187         movss   %xmm0, (%eax)           /* store sin(x) */
 188         RETURN
 189
 190         .p2align        4
 191 L(large_args):
 192         /* Here if |x|>=9*Pi/4 */
 193         cmpl    $0x7f800000, %eax       /* x is Inf or NaN ? */
 194         jae     L(arg_inf_or_nan)
 195
 196         /* Here if finite |x|>=9*Pi/4 */
 197         cmpl    $0x4b000000, %eax       /* |x|<2^23 ? */
 198         jae     L(very_large_args)
 199
 200         /* Here if 9*Pi/4<=|x|<2^23 */
 201         movsd   MO1(DP_INVPIO4), %xmm1  /* 1/(Pi/4) */
 202         mulsd   %xmm0, %xmm1            /* |x|/(Pi/4) */
 203         cvttsd2si %xmm1, %eax           /* k=trunc(|x|/(Pi/4)) */
 204         addl    $1, %eax                /* k+1 */
 205         movl    %eax, %edx
 206         andl    $0xfffffffe, %edx       /* j=(k+1)&0xfffffffe */
 207         cvtsi2sdl %edx, %xmm4           /* DP j */
 208         movl    ARG_X, %ecx             /* Load x */
 209         movsd   MO1(DP_PIO4HI), %xmm2   /* -PIO4HI = high part of -Pi/4 */
 210         shrl    $29, %ecx               /* (sign of x) << 2 */
 211         mulsd   %xmm4, %xmm2            /* -j*PIO4HI */
 212         movsd   MO1(DP_PIO4LO), %xmm3   /* -PIO4LO = low part of -Pi/4 */
 213         addsd   %xmm2, %xmm0            /* |x| - j*PIO4HI */
 214         mulsd   %xmm3, %xmm4            /* j*PIO4LO */
 215         addsd   %xmm4, %xmm0            /* t = |x| - j*PIO4HI - j*PIO4LO */
 216         jmp     L(reconstruction)
 217
 218         .p2align        4
 219 L(very_large_args):
 220         /* Here if finite |x|>=2^23 */
 221
 222         /* bitpos = (ix>>23) - BIAS_32 + 59; */
 223         shrl    $23, %eax               /* eb = biased exponent of x */
 224         subl    $68, %eax               /* bitpos=eb-0x7f+59, where 0x7f */
 225                                                         /*is exponent bias */
 226         movl    $28, %ecx               /* %cl=28 */
 227         movl    %eax, %edx              /* bitpos copy */
 228
 229         /* j = bitpos/28; */
 230         div     %cl                     /* j in register %al=%ax/%cl */
 231         movapd  %xmm0, %xmm3            /* |x| */
 232         andl    $0xff, %eax             /* clear unneeded remainder from %ah*/
 233
 234         imull   $28, %eax, %ecx         /* j*28 */
 235         movsd   MO1(DP_HI_MASK), %xmm4  /* DP_HI_MASK */
 236         movapd  %xmm0, %xmm5            /* |x| */
 237         mulsd   -2*8+MO2(_FPI,%eax,8), %xmm3/* tmp3 = FPI[j-2]*|x| */
 238         movapd  %xmm0, %xmm1            /* |x| */
 239         mulsd   -1*8+MO2(_FPI,%eax,8), %xmm5/* tmp2 = FPI[j-1]*|x| */
 240         mulsd   0*8+MO2(_FPI,%eax,8), %xmm0/* tmp0 = FPI[j]*|x| */
 241         addl    $19, %ecx               /* j*28+19 */
 242         mulsd   1*8+MO2(_FPI,%eax,8), %xmm1/* tmp1 = FPI[j+1]*|x| */
 243         cmpl    %ecx, %edx              /* bitpos>=j*28+19 ? */
 244         jl      L(very_large_skip1)
 245
 246         /* Here if bitpos>=j*28+19 */
 247         andpd   %xmm3, %xmm4            /* HI(tmp3) */
 248         subsd   %xmm4, %xmm3            /* tmp3 = tmp3 - HI(tmp3) */
 249 L(very_large_skip1):
 250
 251         movsd   MO1(DP_2POW52), %xmm6
 252         movapd  %xmm5, %xmm2            /* tmp2 copy */
 253         addsd   %xmm3, %xmm5            /* tmp5 = tmp3 + tmp2 */
 254         movl    $1, %edx
 255         addsd   %xmm5, %xmm6            /* tmp6 = tmp5 + 2^52 */
 256         movsd   8+MO1(DP_2POW52), %xmm4
 257         movd    %xmm6, %eax             /* k = I64_LO(tmp6); */
 258         addsd   %xmm6, %xmm4            /* tmp4 = tmp6 - 2^52 */
 259         movl    ARG_X, %ecx             /* Load x */
 260         comisd  %xmm5, %xmm4            /* tmp4 > tmp5 ? */
 261         jbe     L(very_large_skip2)
 262
 263         /* Here if tmp4 > tmp5 */
 264         subl    $1, %eax                /* k-- */
 265         addsd   8+MO1(DP_ONES), %xmm4   /* tmp4 -= 1.0 */
 266 L(very_large_skip2):
 267
 268         andl    %eax, %edx              /* k&1 */
 269         subsd   %xmm4, %xmm3            /* tmp3 -= tmp4 */
 270         addsd   MO2(DP_ZERONE,%edx,8), %xmm3/* t  = DP_ZERONE[k&1] + tmp3 */
 271         addsd   %xmm2, %xmm3            /* t += tmp2 */
 272         shrl    $29, %ecx               /* (sign of x) << 2 */
 273         addsd   %xmm3, %xmm0            /* t += tmp0 */
 274         addl    $1, %eax                /* n=k+1 */
 275         addsd   %xmm1, %xmm0            /* t += tmp1 */
 276         mulsd   MO1(DP_PIO4), %xmm0     /* t *= PI04 */
 277
 278         jmp     L(reconstruction)       /* end of very_large_args peth */
 279
 280         .p2align        4
 281 L(arg_less_pio4):
 282         /* Here if |x|<Pi/4 */
 283         cmpl    $0x3d000000, %eax       /* |x|<2^-5 ? */
 284         jl      L(arg_less_2pn5)
 285
 286         /* Here if 2^-5<=|x|<Pi/4 */
 287         movaps  %xmm0, %xmm3            /* DP x */
 288         movhpd  MO1(DP_ONES), %xmm3     /* DP 1|x */
 289         mulsd   %xmm0, %xmm0            /* DP y=x^2 */
 290         unpcklpd %xmm0, %xmm0           /* DP y|y */
 291         movaps  %xmm0, %xmm1            /* y|y */
 292         mulpd   %xmm0, %xmm0            /* z=x^4|z=x^4 */
 293
 294         movapd  MO1(DP_SC4), %xmm4      /* S4 */
 295         mulpd   %xmm0, %xmm4            /* z*S4 */
 296         movapd  MO1(DP_SC3), %xmm5      /* S3 */
 297         mulpd   %xmm0, %xmm5            /* z*S3 */
 298         addpd   MO1(DP_SC2), %xmm4      /* S2+z*S4 */
 299         mulpd   %xmm0, %xmm4            /* z*(S2+z*S4) */
 300         addpd   MO1(DP_SC1), %xmm5      /* S1+z*S3 */
 301         mulpd   %xmm0, %xmm5            /* z*(S1+z*S3) */
 302         addpd   MO1(DP_SC0), %xmm4      /* S0+z*(S2+z*S4) */
 303         mulpd   %xmm1, %xmm4            /* y*(S0+z*(S2+z*S4)) */
 304         mulpd   %xmm3, %xmm5            /* x*z*(S1+z*S3) */
 305         mulpd   %xmm3, %xmm4            /* x*y*(S0+z*(S2+z*S4)) */
 306         addpd   %xmm5, %xmm4            /*x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))))*/
 307         movl    ARG_SIN_PTR, %eax
 308         addpd   %xmm4, %xmm3            /*x+x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))*/
 309         movl    ARG_COS_PTR, %ecx
 310         cvtpd2ps %xmm3, %xmm0           /* SP results */
 311         movss   %xmm0, (%eax)           /* store sin(x) from xmm0[0] */
 312         shufps  $1, %xmm0, %xmm0        /* move cos(x) to xmm0[0] */
 313         movss   %xmm0, (%ecx)           /* store cos(x) */
 314         RETURN
 315
 316         .p2align        4
 317 L(arg_less_2pn5):
 318         /* Here if |x|<2^-5 */
 319         cmpl    $0x32000000, %eax       /* |x|<2^-27 ? */
 320         jl      L(arg_less_2pn27)
 321
 322         /* Here if 2^-27<=|x|<2^-5 */
 323         movaps  %xmm0, %xmm1            /* DP x */
 324         movhpd  MO1(DP_ONES), %xmm1     /* DP 1|x */
 325         mulsd   %xmm0, %xmm0            /* DP x^2 */
 326         unpcklpd %xmm0, %xmm0           /* DP x^2|x^2 */
 327
 328         movaps  MO1(DP_SINCOS2_1), %xmm3/* DP DP_SIN2_1 */
 329         mulpd   %xmm0, %xmm3            /* DP x^2*DP_SIN2_1 */
 330         addpd   MO1(DP_SINCOS2_0), %xmm3/* DP DP_SIN2_0+x^2*DP_SIN2_1 */
 331         mulpd   %xmm0, %xmm3            /* DP x^2*DP_SIN2_0+x^4*DP_SIN2_1 */
 332         mulpd   %xmm1, %xmm3            /* DP x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
 333         addpd   %xmm1, %xmm3            /* DP x+x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
 334         movl    ARG_SIN_PTR, %eax
 335         cvtpd2ps %xmm3, %xmm0           /* SP results */
 336         movl    ARG_COS_PTR, %ecx
 337         movss   %xmm0, (%eax)           /* store sin(x) from xmm0[0] */
 338         shufps  $1, %xmm0, %xmm0        /* move cos(x) to xmm0[0] */
 339         movss   %xmm0, (%ecx)           /* store cos(x) */
 340         RETURN
 341
 342         .p2align        4
 343 L(arg_less_2pn27):
 344         movss   ARG_X, %xmm7            /* SP x */
 345         cmpl    $0, %eax                /* x=0 ? */
 346         je      L(arg_zero)             /* in case x=0 return sin(+-0)==+-0 */
 347         /* Here if |x|<2^-27 */
 348         /*
 349          * Special cases here:
 350          *  sin(subnormal) raises inexact/underflow
 351          *  sin(min_normalized) raises inexact/underflow
 352          *  sin(normalized) raises inexact
 353          *  cos(here)=1-|x| (raising inexact)
 354          */
 355         movaps  %xmm0, %xmm3            /* DP x */
 356         mulsd   MO1(DP_SMALL), %xmm0    /* DP x*DP_SMALL */
 357         subsd   %xmm0, %xmm3            /* DP sin result is x-x*DP_SMALL */
 358         andps   MO1(SP_ABS_MASK), %xmm7 /* SP |x| */
 359         cvtsd2ss %xmm3, %xmm0           /* sin(x) */
 360         movl    ARG_SIN_PTR, %eax
 361         movss   MO1(SP_ONE), %xmm1      /* SP 1.0 */
 362         movss   %xmm0, (%eax)           /* sin(x) store */
 363         movl    ARG_COS_PTR, %ecx
 364         subss   %xmm7, %xmm1            /* cos(x) */
 365         movss   %xmm1, (%ecx)           /* cos(x) store */
 366         RETURN
 367
 368         .p2align        4
 369 L(arg_zero):
 370         movss   MO1(SP_ONE), %xmm0      /* 1.0 */
 371         movl    ARG_SIN_PTR, %eax
 372         movl    ARG_COS_PTR, %ecx
 373         movss   %xmm7, (%eax)           /* sin(+-0)==x */
 374         movss   %xmm0, (%ecx)           /* cos(+-0)==1 */
 375         RETURN
 376
 377         .p2align        4
 378 L(arg_inf_or_nan):
 379         movss   ARG_X, %xmm7            /* SP x */
 380         /* Here if |x| is Inf or NAN */
 381         jne     L(skip_errno_setting)   /* in case of x is NaN */
 382
 383         /* Here if x is Inf. Set errno to EDOM.  */
 384         call    JUMPTARGET(__errno_location)
 385         movl    $EDOM, (%eax)
 386
 387         .p2align        4
 388 L(skip_errno_setting):
 389         /* Here if |x| is Inf or NAN. Continued. */
 390         subss   %xmm7, %xmm7            /* x-x, result is NaN */
 391         movl    ARG_SIN_PTR, %eax
 392         movl    ARG_COS_PTR, %ecx
 393         movss   %xmm7, (%eax)
 394         movss   %xmm7, (%ecx)
 395         RETURN
 396 END(__sincosf_sse2)
 397
 398         .section .rodata, "a"
 399         .p2align 3
 400 L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
 401         .long   0x00000000,0x00000000
 402         .long   0x54442d18,0x3fe921fb
 403         .long   0x54442d18,0x3ff921fb
 404         .long   0x7f3321d2,0x4002d97c
 405         .long   0x54442d18,0x400921fb
 406         .long   0x2955385e,0x400f6a7a
 407         .long   0x7f3321d2,0x4012d97c
 408         .long   0xe9bba775,0x4015fdbb
 409         .long   0x54442d18,0x401921fb
 410         .long   0xbeccb2bb,0x401c463a
 411         .long   0x2955385e,0x401f6a7a
 412         .type L(PIO4J), @object
 413         ASM_SIZE_DIRECTIVE(L(PIO4J))
 414
 415         .p2align 3
 416 L(_FPI): /* 4/Pi broken into sum of positive DP values */
 417         .long   0x00000000,0x00000000
 418         .long   0x6c000000,0x3ff45f30
 419         .long   0x2a000000,0x3e3c9c88
 420         .long   0xa8000000,0x3c54fe13
 421         .long   0xd0000000,0x3aaf47d4
 422         .long   0x6c000000,0x38fbb81b
 423         .long   0xe0000000,0x3714acc9
 424         .long   0x7c000000,0x3560e410
 425         .long   0x56000000,0x33bca2c7
 426         .long   0xac000000,0x31fbd778
 427         .long   0xe0000000,0x300b7246
 428         .long   0xe8000000,0x2e5d2126
 429         .long   0x48000000,0x2c970032
 430         .long   0xe8000000,0x2ad77504
 431         .long   0xe0000000,0x290921cf
 432         .long   0xb0000000,0x274deb1c
 433         .long   0xe0000000,0x25829a73
 434         .long   0xbe000000,0x23fd1046
 435         .long   0x10000000,0x2224baed
 436         .long   0x8e000000,0x20709d33
 437         .long   0x80000000,0x1e535a2f
 438         .long   0x64000000,0x1cef904e
 439         .long   0x30000000,0x1b0d6398
 440         .long   0x24000000,0x1964ce7d
 441         .long   0x16000000,0x17b908bf
 442         .type L(_FPI), @object
 443         ASM_SIZE_DIRECTIVE(L(_FPI))
 444
 445 /* Coefficients of polynomials for */
 446 /* sin(x)~=x+x*x^2*(DP_SIN2_0+x^2*DP_SIN2_1) in low  DP part, */
 447 /* cos(x)~=1+1*x^2*(DP_COS2_0+x^2*DP_COS2_1) in high DP part, */
 448 /* for |x|<2^-5. */
 449         .p2align 4
 450 L(DP_SINCOS2_0):
 451         .long   0x5543d49d,0xbfc55555
 452         .long   0xff5cc6fd,0xbfdfffff
 453         .type L(DP_SINCOS2_0), @object
 454         ASM_SIZE_DIRECTIVE(L(DP_SINCOS2_0))
 455
 456         .p2align 4
 457 L(DP_SINCOS2_1):
 458         .long   0x75cec8c5,0x3f8110f4
 459         .long   0xb178dac5,0x3fa55514
 460         .type L(DP_SINCOS2_1), @object
 461         ASM_SIZE_DIRECTIVE(L(DP_SINCOS2_1))
 462
 463         .p2align 3
 464 L(DP_ZERONE):
 465         .long   0x00000000,0x00000000   /* 0.0 */
 466         .long   0x00000000,0xbff00000   /* 1.0 */
 467         .type L(DP_ZERONE), @object
 468         ASM_SIZE_DIRECTIVE(L(DP_ZERONE))
 469
 470         .p2align 3
 471 L(DP_ONES):
 472         .long   0x00000000,0x3ff00000   /* +1.0 */
 473         .long   0x00000000,0xbff00000   /* -1.0 */
 474         .type L(DP_ONES), @object
 475         ASM_SIZE_DIRECTIVE(L(DP_ONES))
 476
 477 /* Coefficients of polynomials for */
 478 /* sin(t)~=t+t*t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))) in low  DP part, */
 479 /* cos(t)~=1+1*t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))) in high DP part, */
 480 /* for |t|<Pi/4. */
 481         .p2align 4
 482 L(DP_SC4):
 483         .long   0x1674b58a,0xbe5a947e
 484         .long   0xdd8844d7,0xbe923c97
 485         .type L(DP_SC4), @object
 486         ASM_SIZE_DIRECTIVE(L(DP_SC4))
 487
 488         .p2align 4
 489 L(DP_SC3):
 490         .long   0x64e6b5b4,0x3ec71d72
 491         .long   0x9ac43cc0,0x3efa00eb
 492         .type L(DP_SC3), @object
 493         ASM_SIZE_DIRECTIVE(L(DP_SC3))
 494
 495         .p2align 4
 496 L(DP_SC2):
 497         .long   0x8b4bd1f9,0xbf2a019f
 498         .long   0x348b6874,0xbf56c16b
 499         .type L(DP_SC2), @object
 500         ASM_SIZE_DIRECTIVE(L(DP_SC2))
 501
 502         .p2align 4
 503 L(DP_SC1):
 504         .long   0x10c2688b,0x3f811111
 505         .long   0x545c50c7,0x3fa55555
 506         .type L(DP_SC1), @object
 507         ASM_SIZE_DIRECTIVE(L(DP_SC1))
 508
 509         .p2align 4
 510 L(DP_SC0):
 511         .long   0x55551cd9,0xbfc55555
 512         .long   0xfffe98ae,0xbfdfffff
 513         .type L(DP_SC0), @object
 514         ASM_SIZE_DIRECTIVE(L(DP_SC0))
 515
 516         .p2align 3
 517 L(DP_SMALL):
 518         .long   0x00000000,0x3cd00000   /* 2^(-50) */
 519         .type L(DP_SMALL), @object
 520         ASM_SIZE_DIRECTIVE(L(DP_SMALL))
 521
 522         .p2align 3
 523 L(DP_PIO4):
 524         .long   0x54442d18,0x3fe921fb   /* Pi/4 */
 525         .type L(DP_PIO4), @object
 526         ASM_SIZE_DIRECTIVE(L(DP_PIO4))
 527
 528         .p2align 3
 529 L(DP_2POW52):
 530         .long   0x00000000,0x43300000   /* +2^52 */
 531         .long   0x00000000,0xc3300000   /* -2^52 */
 532         .type L(DP_2POW52), @object
 533         ASM_SIZE_DIRECTIVE(L(DP_2POW52))
 534
 535         .p2align 3
 536 L(DP_INVPIO4):
 537         .long   0x6dc9c883,0x3ff45f30   /* 4/Pi */
 538         .type L(DP_INVPIO4), @object
 539         ASM_SIZE_DIRECTIVE(L(DP_INVPIO4))
 540
 541         .p2align 3
 542 L(DP_PIO4HI):
 543         .long   0x54000000,0xbfe921fb   /* High part of Pi/4 */
 544         .type L(DP_PIO4HI), @object
 545         ASM_SIZE_DIRECTIVE(L(DP_PIO4HI))
 546
 547         .p2align 3
 548 L(DP_PIO4LO):
 549         .long   0x11A62633,0xbe010b46   /* Low part of Pi/4 */
 550         .type L(DP_PIO4LO), @object
 551         ASM_SIZE_DIRECTIVE(L(DP_PIO4LO))
 552
 553         .p2align 2
 554 L(SP_INVPIO4):
 555         .long   0x3fa2f983              /* 4/Pi */
 556         .type L(SP_INVPIO4), @object
 557         ASM_SIZE_DIRECTIVE(L(SP_INVPIO4))
 558
 559         .p2align 4
 560 L(DP_ABS_MASK): /* Mask for getting DP absolute value */
 561         .long   0xffffffff,0x7fffffff
 562         .long   0xffffffff,0x7fffffff
 563         .type L(DP_ABS_MASK), @object
 564         ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
 565
 566         .p2align 3
 567 L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
 568         .long   0x00000000,0xffffffff
 569         .type L(DP_HI_MASK), @object
 570         ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
 571
 572         .p2align 4
 573 L(SP_ABS_MASK): /* Mask for getting SP absolute value */
 574         .long   0x7fffffff,0x7fffffff
 575         .long   0x7fffffff,0x7fffffff
 576         .type L(SP_ABS_MASK), @object
 577         ASM_SIZE_DIRECTIVE(L(SP_ABS_MASK))
 578
 579         .p2align 2
 580 L(SP_ONE):
 581         .long   0x3f800000              /* 1.0 */
 582         .type L(SP_ONE), @object
 583         ASM_SIZE_DIRECTIVE(L(SP_ONE))
 584
 585 weak_alias(__sincosf, sincosf)