sysdeps/i386/fpu/s_cbrt.S

   1 /* Compute cubic root of double value.
   2    Copyright (C) 1997-2013 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4    Contributed by Dirk Alboth <dirka@uni-paderborn.de> and
   5    Ulrich Drepper <drepper@cygnus.com>, 1997.
   6
   7    The GNU C Library is free software; you can redistribute it and/or
   8    modify it under the terms of the GNU Lesser General Public
   9    License as published by the Free Software Foundation; either
  10    version 2.1 of the License, or (at your option) any later version.
  11
  12    The GNU C Library is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15    Lesser General Public License for more details.
  16
  17    You should have received a copy of the GNU Lesser General Public
  18    License along with the GNU C Library; if not, see
  19    <http://www.gnu.org/licenses/>.  */
  20
  21 #include <machine/asm.h>
  22
  23         .section .rodata
  24
  25         .align ALIGNARG(4)
  26         .type f7,@object
  27 f7:     .double -0.145263899385486377
  28         ASM_SIZE_DIRECTIVE(f7)
  29         .type f6,@object
  30 f6:     .double 0.784932344976639262
  31         ASM_SIZE_DIRECTIVE(f6)
  32         .type f5,@object
  33 f5:     .double -1.83469277483613086
  34         ASM_SIZE_DIRECTIVE(f5)
  35         .type f4,@object
  36 f4:     .double 2.44693122563534430
  37         ASM_SIZE_DIRECTIVE(f4)
  38         .type f3,@object
  39 f3:     .double -2.11499494167371287
  40         ASM_SIZE_DIRECTIVE(f3)
  41         .type f2,@object
  42 f2:     .double 1.50819193781584896
  43         ASM_SIZE_DIRECTIVE(f2)
  44         .type f1,@object
  45 f1:     .double 0.354895765043919860
  46         ASM_SIZE_DIRECTIVE(f1)
  47
  48 #define CBRT2           1.2599210498948731648
  49 #define ONE_CBRT2       0.793700525984099737355196796584
  50 #define SQR_CBRT2       1.5874010519681994748
  51 #define ONE_SQR_CBRT2   0.629960524947436582364439673883
  52
  53         .type factor,@object
  54 factor: .double ONE_SQR_CBRT2
  55         .double ONE_CBRT2
  56         .double 1.0
  57         .double CBRT2
  58         .double SQR_CBRT2
  59         ASM_SIZE_DIRECTIVE(factor)
  60
  61         .type two54,@object
  62 two54:  .byte 0, 0, 0, 0, 0, 0, 0x50, 0x43
  63         ASM_SIZE_DIRECTIVE(two54)
  64
  65 #ifdef PIC
  66 #define MO(op) op##@GOTOFF(%ebx)
  67 #define MOX(op,x) op##@GOTOFF(%ebx,x,1)
  68 #else
  69 #define MO(op) op
  70 #define MOX(op,x) op(x)
  71 #endif
  72
  73         .text
  74 ENTRY(__cbrt)
  75         movl    4(%esp), %ecx
  76         movl    8(%esp), %eax
  77         movl    %eax, %edx
  78         andl    $0x7fffffff, %eax
  79         orl     %eax, %ecx
  80         jz      1f
  81         xorl    %ecx, %ecx
  82         cmpl    $0x7ff00000, %eax
  83         jae     1f
  84
  85 #ifdef PIC
  86         pushl   %ebx
  87         cfi_adjust_cfa_offset (4)
  88         cfi_rel_offset (ebx, 0)
  89         LOAD_PIC_REG (bx)
  90 #endif
  91
  92         cmpl    $0x00100000, %eax
  93         jae     2f
  94
  95 #ifdef PIC
  96         fldl    8(%esp)
  97 #else
  98         fldl    4(%esp)
  99 #endif
 100         fmull   MO(two54)
 101         movl    $-54, %ecx
 102 #ifdef PIC
 103         fstpl   8(%esp)
 104         movl    12(%esp), %eax
 105 #else
 106         fstpl   4(%esp)
 107         movl    8(%esp), %eax
 108 #endif
 109         movl    %eax, %edx
 110         andl    $0x7fffffff, %eax
 111
 112 2:      shrl    $20, %eax
 113         andl    $0x800fffff, %edx
 114         subl    $1022, %eax
 115         orl     $0x3fe00000, %edx
 116         addl    %eax, %ecx
 117 #ifdef PIC
 118         movl    %edx, 12(%esp)
 119
 120         fldl    8(%esp)                 /* xm */
 121 #else
 122         movl    %edx, 8(%esp)
 123
 124         fldl    4(%esp)                 /* xm */
 125 #endif
 126         fabs
 127
 128         /* The following code has two tracks:
 129             a) compute the normalized cbrt value
 130             b) compute xe/3 and xe%3
 131            The right track computes the value for b) and this is done
 132            in an optimized way by avoiding division.
 133
 134            But why two tracks at all?  Very easy: efficiency.  Some FP
 135            instruction can overlap with a certain amount of integer (and
 136            FP) instructions.  So we get (except for the imull) all
 137            instructions for free.  */
 138
 139         fld     %st(0)                  /* xm : xm */
 140
 141         fmull   MO(f7)                  /* f7*xm : xm */
 142                         movl    $1431655766, %eax
 143         faddl   MO(f6)                  /* f6+f7*xm : xm */
 144                         imull   %ecx
 145         fmul    %st(1)                  /* (f6+f7*xm)*xm : xm */
 146                         movl    %ecx, %eax
 147         faddl   MO(f5)                  /* f5+(f6+f7*xm)*xm : xm */
 148                         sarl    $31, %eax
 149         fmul    %st(1)                  /* (f5+(f6+f7*xm)*xm)*xm : xm */
 150                         subl    %eax, %edx
 151         faddl   MO(f4)                  /* f4+(f5+(f6+f7*xm)*xm)*xm : xm */
 152         fmul    %st(1)                  /* (f4+(f5+(f6+f7*xm)*xm)*xm)*xm : xm */
 153         faddl   MO(f3)                  /* f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm : xm */
 154         fmul    %st(1)                  /* (f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm : xm */
 155         faddl   MO(f2)                  /* f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm : xm */
 156         fmul    %st(1)                  /* (f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm)*xm : xm */
 157         faddl   MO(f1)                  /* u:=f1+(f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm)*xm : xm */
 158
 159         fld     %st                     /* u : u : xm */
 160         fmul    %st(1)                  /* u*u : u : xm */
 161         fld     %st(2)                  /* xm : u*u : u : xm */
 162         fadd    %st                     /* 2*xm : u*u : u : xm */
 163         fxch    %st(1)                  /* u*u : 2*xm : u : xm */
 164         fmul    %st(2)                  /* t2:=u*u*u : 2*xm : u : xm */
 165                         movl    %edx, %eax
 166         fadd    %st, %st(1)             /* t2 : t2+2*xm : u : xm */
 167                         leal    (%edx,%edx,2),%edx
 168         fadd    %st(0)                  /* 2*t2 : t2+2*xm : u : xm */
 169                         subl    %edx, %ecx
 170         faddp   %st, %st(3)             /* t2+2*xm : u : 2*t2+xm */
 171                         shll    $3, %ecx
 172         fmulp                           /* u*(t2+2*xm) : 2*t2+xm */
 173         fdivp   %st, %st(1)             /* u*(t2+2*xm)/(2*t2+xm) */
 174         fmull   MOX(16+factor,%ecx)     /* u*(t2+2*xm)/(2*t2+xm)*FACT */
 175         pushl   %eax
 176         cfi_adjust_cfa_offset (4)
 177         fildl   (%esp)                  /* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */
 178         fxch                            /* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */
 179         fscale                          /* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */
 180         popl    %edx
 181         cfi_adjust_cfa_offset (-4)
 182 #ifdef PIC
 183         movl    12(%esp), %eax
 184         popl    %ebx
 185         cfi_adjust_cfa_offset (-4)
 186         cfi_restore (ebx)
 187 #else
 188         movl    8(%esp), %eax
 189 #endif
 190         testl   %eax, %eax
 191         fstp    %st(1)
 192         jns     4f
 193         fchs
 194 4:      ret
 195
 196         /* Return the argument.  */
 197 1:      fldl    4(%esp)
 198         ret
 199 END(__cbrt)
 200 weak_alias (__cbrt, cbrt)