sysdeps/i386/fpu/s_cbrtf.S

   1 /* Compute cubic root of float value.
   2    Copyright (C) 1997-2019 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4    Contributed by Dirk Alboth <dirka@uni-paderborn.de> and
   5    Ulrich Drepper <drepper@cygnus.com>, 1997.
   6
   7    The GNU C Library is free software; you can redistribute it and/or
   8    modify it under the terms of the GNU Lesser General Public
   9    License as published by the Free Software Foundation; either
  10    version 2.1 of the License, or (at your option) any later version.
  11
  12    The GNU C Library is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15    Lesser General Public License for more details.
  16
  17    You should have received a copy of the GNU Lesser General Public
  18    License along with the GNU C Library; if not, see
  19    <http://www.gnu.org/licenses/>.  */
  20
  21 #include <machine/asm.h>
  22 #include <libm-alias-float.h>
  23
  24         .section .rodata
  25
  26         .align ALIGNARG(4)
  27         .type f3,@object
  28 f3:     .double 0.191502161678719066
  29         ASM_SIZE_DIRECTIVE(f3)
  30         .type f2,@object
  31 f2:     .double 0.697570460207922770
  32         ASM_SIZE_DIRECTIVE(f2)
  33         .type f1,@object
  34 f1:     .double 0.492659620528969547
  35         ASM_SIZE_DIRECTIVE(f1)
  36
  37 #define CBRT2           1.2599210498948731648
  38 #define ONE_CBRT2       0.793700525984099737355196796584
  39 #define SQR_CBRT2       1.5874010519681994748
  40 #define ONE_SQR_CBRT2   0.629960524947436582364439673883
  41
  42         .type factor,@object
  43         .align ALIGNARG(4)
  44 factor: .double ONE_SQR_CBRT2
  45         .double ONE_CBRT2
  46         .double 1.0
  47         .double CBRT2
  48         .double SQR_CBRT2
  49         ASM_SIZE_DIRECTIVE(factor)
  50
  51         .type two25,@object
  52 two25:  .byte 0, 0, 0, 0x4c
  53         ASM_SIZE_DIRECTIVE(two25)
  54
  55 #ifdef PIC
  56 #define MO(op) op##@GOTOFF(%ebx)
  57 #define MOX(op,x) op##@GOTOFF(%ebx,x,1)
  58 #else
  59 #define MO(op) op
  60 #define MOX(op,x) op(x)
  61 #endif
  62
  63         .text
  64 ENTRY(__cbrtf)
  65         movl    4(%esp), %eax
  66         xorl    %ecx, %ecx
  67         movl    %eax, %edx
  68         andl    $0x7fffffff, %eax
  69         jz      1f
  70         cmpl    $0x7f800000, %eax
  71         jae     1f
  72
  73 #ifdef PIC
  74         pushl   %ebx
  75         cfi_adjust_cfa_offset (4)
  76         cfi_rel_offset (ebx, 0)
  77         LOAD_PIC_REG (bx)
  78 #endif
  79
  80         cmpl    $0x00800000, %eax
  81         jae     2f
  82
  83 #ifdef PIC
  84         flds    8(%esp)
  85 #else
  86         flds    4(%esp)
  87 #endif
  88         fmuls   MO(two25)
  89         movl    $-25, %ecx
  90 #ifdef PIC
  91         fstps   8(%esp)
  92         movl    8(%esp), %eax
  93 #else
  94         fstps   4(%esp)
  95         movl    4(%esp), %eax
  96 #endif
  97         movl    %eax, %edx
  98         andl    $0x7fffffff, %eax
  99
 100 2:      shrl    $23, %eax
 101         andl    $0x807fffff, %edx
 102         subl    $126, %eax
 103         orl     $0x3f000000, %edx
 104         addl    %eax, %ecx
 105 #ifdef PIC
 106         movl    %edx, 8(%esp)
 107
 108         flds    8(%esp)                 /* xm */
 109 #else
 110         movl    %edx, 4(%esp)
 111
 112         flds    4(%esp)                 /* xm */
 113 #endif
 114         fabs
 115
 116         /* The following code has two tracks:
 117             a) compute the normalized cbrt value
 118             b) compute xe/3 and xe%3
 119            The right track computes the value for b) and this is done
 120            in an optimized way by avoiding division.
 121
 122            But why two tracks at all?  Very easy: efficiency.  Some FP
 123            instruction can overlap with a certain amount of integer (and
 124            FP) instructions.  So we get (except for the imull) all
 125            instructions for free.  */
 126
 127         fld     %st(0)                  /* xm : xm */
 128         fmull   MO(f3)                  /* f3*xm : xm */
 129                         movl    $1431655766, %eax
 130         fsubrl  MO(f2)                  /* f2-f3*xm : xm */
 131                         imull   %ecx
 132         fmul    %st(1)                  /* (f2-f3*xm)*xm : xm */
 133                         movl    %ecx, %eax
 134         faddl   MO(f1)                  /* u:=f1+(f2-f3*xm)*xm : xm */
 135                         sarl    $31, %eax
 136         fld     %st                     /* u : u : xm */
 137                         subl    %eax, %edx
 138         fmul    %st(1)                  /* u*u : u : xm */
 139         fld     %st(2)                  /* xm : u*u : u : xm */
 140         fadd    %st                     /* 2*xm : u*u : u : xm */
 141         fxch    %st(1)                  /* u*u : 2*xm : u : xm */
 142         fmul    %st(2)                  /* t2:=u*u*u : 2*xm : u : xm */
 143                         movl    %edx, %eax
 144         fadd    %st, %st(1)             /* t2 : t2+2*xm : u : xm */
 145                         leal    (%edx,%edx,2),%edx
 146         fadd    %st(0)                  /* 2*t2 : t2+2*xm : u : xm */
 147                         subl    %edx, %ecx
 148         faddp   %st, %st(3)             /* t2+2*xm : u : 2*t2+xm */
 149                         shll    $3, %ecx
 150         fmulp                           /* u*(t2+2*xm) : 2*t2+xm */
 151         fdivp   %st, %st(1)             /* u*(t2+2*xm)/(2*t2+xm) */
 152         fmull   MOX(16+factor,%ecx)     /* u*(t2+2*xm)/(2*t2+xm)*FACT */
 153         pushl   %eax
 154         cfi_adjust_cfa_offset (4)
 155         fildl   (%esp)                  /* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */
 156         fxch                            /* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */
 157         fscale                          /* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */
 158         popl    %edx
 159         cfi_adjust_cfa_offset (-4)
 160 #ifdef PIC
 161         movl    8(%esp), %eax
 162         popl    %ebx
 163         cfi_adjust_cfa_offset (-4)
 164         cfi_restore (ebx)
 165 #else
 166         movl    4(%esp), %eax
 167 #endif
 168         testl   %eax, %eax
 169         fstp    %st(1)
 170         jns     4f
 171         fchs
 172 4:      ret
 173
 174         /* Return the argument.  */
 175 1:      flds    4(%esp)
 176         ret
 177 END(__cbrtf)
 178 libm_alias_float (__cbrt, cbrt)