sysdeps/i386/fpu/s_cbrt.S

   1 /* Compute cubic root of double value.
   2    Copyright (C) 1997-2019 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4    Contributed by Dirk Alboth <dirka@uni-paderborn.de> and
   5    Ulrich Drepper <drepper@cygnus.com>, 1997.
   6
   7    The GNU C Library is free software; you can redistribute it and/or
   8    modify it under the terms of the GNU Lesser General Public
   9    License as published by the Free Software Foundation; either
  10    version 2.1 of the License, or (at your option) any later version.
  11
  12    The GNU C Library is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15    Lesser General Public License for more details.
  16
  17    You should have received a copy of the GNU Lesser General Public
  18    License along with the GNU C Library; if not, see
  19    <http://www.gnu.org/licenses/>.  */
  20
  21 #include <machine/asm.h>
  22 #include <libm-alias-double.h>
  23
  24         .section .rodata
  25
  26         .align ALIGNARG(4)
  27         .type f7,@object
  28 f7:     .double -0.145263899385486377
  29         ASM_SIZE_DIRECTIVE(f7)
  30         .type f6,@object
  31 f6:     .double 0.784932344976639262
  32         ASM_SIZE_DIRECTIVE(f6)
  33         .type f5,@object
  34 f5:     .double -1.83469277483613086
  35         ASM_SIZE_DIRECTIVE(f5)
  36         .type f4,@object
  37 f4:     .double 2.44693122563534430
  38         ASM_SIZE_DIRECTIVE(f4)
  39         .type f3,@object
  40 f3:     .double -2.11499494167371287
  41         ASM_SIZE_DIRECTIVE(f3)
  42         .type f2,@object
  43 f2:     .double 1.50819193781584896
  44         ASM_SIZE_DIRECTIVE(f2)
  45         .type f1,@object
  46 f1:     .double 0.354895765043919860
  47         ASM_SIZE_DIRECTIVE(f1)
  48
  49 #define CBRT2           1.2599210498948731648
  50 #define ONE_CBRT2       0.793700525984099737355196796584
  51 #define SQR_CBRT2       1.5874010519681994748
  52 #define ONE_SQR_CBRT2   0.629960524947436582364439673883
  53
  54         .type factor,@object
  55 factor: .double ONE_SQR_CBRT2
  56         .double ONE_CBRT2
  57         .double 1.0
  58         .double CBRT2
  59         .double SQR_CBRT2
  60         ASM_SIZE_DIRECTIVE(factor)
  61
  62         .type two54,@object
  63 two54:  .byte 0, 0, 0, 0, 0, 0, 0x50, 0x43
  64         ASM_SIZE_DIRECTIVE(two54)
  65
  66 #ifdef PIC
  67 #define MO(op) op##@GOTOFF(%ebx)
  68 #define MOX(op,x) op##@GOTOFF(%ebx,x,1)
  69 #else
  70 #define MO(op) op
  71 #define MOX(op,x) op(x)
  72 #endif
  73
  74         .text
  75 ENTRY(__cbrt)
  76         movl    4(%esp), %ecx
  77         movl    8(%esp), %eax
  78         movl    %eax, %edx
  79         andl    $0x7fffffff, %eax
  80         orl     %eax, %ecx
  81         jz      1f
  82         xorl    %ecx, %ecx
  83         cmpl    $0x7ff00000, %eax
  84         jae     1f
  85
  86 #ifdef PIC
  87         pushl   %ebx
  88         cfi_adjust_cfa_offset (4)
  89         cfi_rel_offset (ebx, 0)
  90         LOAD_PIC_REG (bx)
  91 #endif
  92
  93         cmpl    $0x00100000, %eax
  94         jae     2f
  95
  96 #ifdef PIC
  97         fldl    8(%esp)
  98 #else
  99         fldl    4(%esp)
 100 #endif
 101         fmull   MO(two54)
 102         movl    $-54, %ecx
 103 #ifdef PIC
 104         fstpl   8(%esp)
 105         movl    12(%esp), %eax
 106 #else
 107         fstpl   4(%esp)
 108         movl    8(%esp), %eax
 109 #endif
 110         movl    %eax, %edx
 111         andl    $0x7fffffff, %eax
 112
 113 2:      shrl    $20, %eax
 114         andl    $0x800fffff, %edx
 115         subl    $1022, %eax
 116         orl     $0x3fe00000, %edx
 117         addl    %eax, %ecx
 118 #ifdef PIC
 119         movl    %edx, 12(%esp)
 120
 121         fldl    8(%esp)                 /* xm */
 122 #else
 123         movl    %edx, 8(%esp)
 124
 125         fldl    4(%esp)                 /* xm */
 126 #endif
 127         fabs
 128
 129         /* The following code has two tracks:
 130             a) compute the normalized cbrt value
 131             b) compute xe/3 and xe%3
 132            The right track computes the value for b) and this is done
 133            in an optimized way by avoiding division.
 134
 135            But why two tracks at all?  Very easy: efficiency.  Some FP
 136            instruction can overlap with a certain amount of integer (and
 137            FP) instructions.  So we get (except for the imull) all
 138            instructions for free.  */
 139
 140         fld     %st(0)                  /* xm : xm */
 141
 142         fmull   MO(f7)                  /* f7*xm : xm */
 143                         movl    $1431655766, %eax
 144         faddl   MO(f6)                  /* f6+f7*xm : xm */
 145                         imull   %ecx
 146         fmul    %st(1)                  /* (f6+f7*xm)*xm : xm */
 147                         movl    %ecx, %eax
 148         faddl   MO(f5)                  /* f5+(f6+f7*xm)*xm : xm */
 149                         sarl    $31, %eax
 150         fmul    %st(1)                  /* (f5+(f6+f7*xm)*xm)*xm : xm */
 151                         subl    %eax, %edx
 152         faddl   MO(f4)                  /* f4+(f5+(f6+f7*xm)*xm)*xm : xm */
 153         fmul    %st(1)                  /* (f4+(f5+(f6+f7*xm)*xm)*xm)*xm : xm */
 154         faddl   MO(f3)                  /* f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm : xm */
 155         fmul    %st(1)                  /* (f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm : xm */
 156         faddl   MO(f2)                  /* f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm : xm */
 157         fmul    %st(1)                  /* (f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm)*xm : xm */
 158         faddl   MO(f1)                  /* u:=f1+(f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm)*xm : xm */
 159
 160         fld     %st                     /* u : u : xm */
 161         fmul    %st(1)                  /* u*u : u : xm */
 162         fld     %st(2)                  /* xm : u*u : u : xm */
 163         fadd    %st                     /* 2*xm : u*u : u : xm */
 164         fxch    %st(1)                  /* u*u : 2*xm : u : xm */
 165         fmul    %st(2)                  /* t2:=u*u*u : 2*xm : u : xm */
 166                         movl    %edx, %eax
 167         fadd    %st, %st(1)             /* t2 : t2+2*xm : u : xm */
 168                         leal    (%edx,%edx,2),%edx
 169         fadd    %st(0)                  /* 2*t2 : t2+2*xm : u : xm */
 170                         subl    %edx, %ecx
 171         faddp   %st, %st(3)             /* t2+2*xm : u : 2*t2+xm */
 172                         shll    $3, %ecx
 173         fmulp                           /* u*(t2+2*xm) : 2*t2+xm */
 174         fdivp   %st, %st(1)             /* u*(t2+2*xm)/(2*t2+xm) */
 175         fmull   MOX(16+factor,%ecx)     /* u*(t2+2*xm)/(2*t2+xm)*FACT */
 176         pushl   %eax
 177         cfi_adjust_cfa_offset (4)
 178         fildl   (%esp)                  /* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */
 179         fxch                            /* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */
 180         fscale                          /* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */
 181         popl    %edx
 182         cfi_adjust_cfa_offset (-4)
 183 #ifdef PIC
 184         movl    12(%esp), %eax
 185         popl    %ebx
 186         cfi_adjust_cfa_offset (-4)
 187         cfi_restore (ebx)
 188 #else
 189         movl    8(%esp), %eax
 190 #endif
 191         testl   %eax, %eax
 192         fstp    %st(1)
 193         jns     4f
 194         fchs
 195 4:      ret
 196
 197         /* Return the argument.  */
 198 1:      fldl    4(%esp)
 199         ret
 200 END(__cbrt)
 201 libm_alias_double (__cbrt, cbrt)