sysdeps/i386/fpu/s_cbrtf.S

   1 /* Compute cubic root of float value.
   2    Copyright (C) 1997-2021 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, see
  17    <https://www.gnu.org/licenses/>.  */
  18
  19 #include <machine/asm.h>
  20 #include <libm-alias-float.h>
  21
  22         .section .rodata
  23
  24         .align ALIGNARG(4)
  25         .type f3,@object
  26 f3:     .double 0.191502161678719066
  27         ASM_SIZE_DIRECTIVE(f3)
  28         .type f2,@object
  29 f2:     .double 0.697570460207922770
  30         ASM_SIZE_DIRECTIVE(f2)
  31         .type f1,@object
  32 f1:     .double 0.492659620528969547
  33         ASM_SIZE_DIRECTIVE(f1)
  34
  35 #define CBRT2           1.2599210498948731648
  36 #define ONE_CBRT2       0.793700525984099737355196796584
  37 #define SQR_CBRT2       1.5874010519681994748
  38 #define ONE_SQR_CBRT2   0.629960524947436582364439673883
  39
  40         .type factor,@object
  41         .align ALIGNARG(4)
  42 factor: .double ONE_SQR_CBRT2
  43         .double ONE_CBRT2
  44         .double 1.0
  45         .double CBRT2
  46         .double SQR_CBRT2
  47         ASM_SIZE_DIRECTIVE(factor)
  48
  49         .type two25,@object
  50 two25:  .byte 0, 0, 0, 0x4c
  51         ASM_SIZE_DIRECTIVE(two25)
  52
  53 #ifdef PIC
  54 #define MO(op) op##@GOTOFF(%ebx)
  55 #define MOX(op,x) op##@GOTOFF(%ebx,x,1)
  56 #else
  57 #define MO(op) op
  58 #define MOX(op,x) op(x)
  59 #endif
  60
  61         .text
  62 ENTRY(__cbrtf)
  63         movl    4(%esp), %eax
  64         xorl    %ecx, %ecx
  65         movl    %eax, %edx
  66         andl    $0x7fffffff, %eax
  67         jz      1f
  68         cmpl    $0x7f800000, %eax
  69         jae     1f
  70
  71 #ifdef PIC
  72         pushl   %ebx
  73         cfi_adjust_cfa_offset (4)
  74         cfi_rel_offset (ebx, 0)
  75         LOAD_PIC_REG (bx)
  76 #endif
  77
  78         cmpl    $0x00800000, %eax
  79         jae     2f
  80
  81 #ifdef PIC
  82         flds    8(%esp)
  83 #else
  84         flds    4(%esp)
  85 #endif
  86         fmuls   MO(two25)
  87         movl    $-25, %ecx
  88 #ifdef PIC
  89         fstps   8(%esp)
  90         movl    8(%esp), %eax
  91 #else
  92         fstps   4(%esp)
  93         movl    4(%esp), %eax
  94 #endif
  95         movl    %eax, %edx
  96         andl    $0x7fffffff, %eax
  97
  98 2:      shrl    $23, %eax
  99         andl    $0x807fffff, %edx
 100         subl    $126, %eax
 101         orl     $0x3f000000, %edx
 102         addl    %eax, %ecx
 103 #ifdef PIC
 104         movl    %edx, 8(%esp)
 105
 106         flds    8(%esp)                 /* xm */
 107 #else
 108         movl    %edx, 4(%esp)
 109
 110         flds    4(%esp)                 /* xm */
 111 #endif
 112         fabs
 113
 114         /* The following code has two tracks:
 115             a) compute the normalized cbrt value
 116             b) compute xe/3 and xe%3
 117            The right track computes the value for b) and this is done
 118            in an optimized way by avoiding division.
 119
 120            But why two tracks at all?  Very easy: efficiency.  Some FP
 121            instruction can overlap with a certain amount of integer (and
 122            FP) instructions.  So we get (except for the imull) all
 123            instructions for free.  */
 124
 125         fld     %st(0)                  /* xm : xm */
 126         fmull   MO(f3)                  /* f3*xm : xm */
 127                         movl    $1431655766, %eax
 128         fsubrl  MO(f2)                  /* f2-f3*xm : xm */
 129                         imull   %ecx
 130         fmul    %st(1)                  /* (f2-f3*xm)*xm : xm */
 131                         movl    %ecx, %eax
 132         faddl   MO(f1)                  /* u:=f1+(f2-f3*xm)*xm : xm */
 133                         sarl    $31, %eax
 134         fld     %st                     /* u : u : xm */
 135                         subl    %eax, %edx
 136         fmul    %st(1)                  /* u*u : u : xm */
 137         fld     %st(2)                  /* xm : u*u : u : xm */
 138         fadd    %st                     /* 2*xm : u*u : u : xm */
 139         fxch    %st(1)                  /* u*u : 2*xm : u : xm */
 140         fmul    %st(2)                  /* t2:=u*u*u : 2*xm : u : xm */
 141                         movl    %edx, %eax
 142         fadd    %st, %st(1)             /* t2 : t2+2*xm : u : xm */
 143                         leal    (%edx,%edx,2),%edx
 144         fadd    %st(0)                  /* 2*t2 : t2+2*xm : u : xm */
 145                         subl    %edx, %ecx
 146         faddp   %st, %st(3)             /* t2+2*xm : u : 2*t2+xm */
 147                         shll    $3, %ecx
 148         fmulp                           /* u*(t2+2*xm) : 2*t2+xm */
 149         fdivp   %st, %st(1)             /* u*(t2+2*xm)/(2*t2+xm) */
 150         fmull   MOX(16+factor,%ecx)     /* u*(t2+2*xm)/(2*t2+xm)*FACT */
 151         pushl   %eax
 152         cfi_adjust_cfa_offset (4)
 153         fildl   (%esp)                  /* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */
 154         fxch                            /* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */
 155         fscale                          /* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */
 156         popl    %edx
 157         cfi_adjust_cfa_offset (-4)
 158 #ifdef PIC
 159         movl    8(%esp), %eax
 160         popl    %ebx
 161         cfi_adjust_cfa_offset (-4)
 162         cfi_restore (ebx)
 163 #else
 164         movl    4(%esp), %eax
 165 #endif
 166         testl   %eax, %eax
 167         fstp    %st(1)
 168         jns     4f
 169         fchs
 170 4:      ret
 171
 172         /* Return the argument.  */
 173 1:      flds    4(%esp)
 174         ret
 175 END(__cbrtf)
 176 libm_alias_float (__cbrt, cbrt)