]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/i386/fpu/s_cbrtf.S
Prefer https to http for gnu.org and fsf.org URLs
[thirdparty/glibc.git] / sysdeps / i386 / fpu / s_cbrtf.S
CommitLineData
26dee9c4 1/* Compute cubic root of float value.
04277e02 2 Copyright (C) 1997-2019 Free Software Foundation, Inc.
26dee9c4
UD
3 This file is part of the GNU C Library.
4 Contributed by Dirk Alboth <dirka@uni-paderborn.de> and
5 Ulrich Drepper <drepper@cygnus.com>, 1997.
6
7 The GNU C Library is free software; you can redistribute it and/or
41bdb6e2
AJ
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
26dee9c4
UD
11
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
41bdb6e2 15 Lesser General Public License for more details.
26dee9c4 16
41bdb6e2 17 You should have received a copy of the GNU Lesser General Public
59ba27a6 18 License along with the GNU C Library; if not, see
5a82c748 19 <https://www.gnu.org/licenses/>. */
26dee9c4
UD
20
21#include <machine/asm.h>
e4602cba 22#include <libm-alias-float.h>
26dee9c4 23
26dee9c4 24 .section .rodata
26dee9c4
UD
25
26 .align ALIGNARG(4)
b67e9372 27 .type f3,@object
26dee9c4
UD
28f3: .double 0.191502161678719066
29 ASM_SIZE_DIRECTIVE(f3)
b67e9372 30 .type f2,@object
0413b54c
UD
31f2: .double 0.697570460207922770
32 ASM_SIZE_DIRECTIVE(f2)
b67e9372 33 .type f1,@object
0413b54c
UD
34f1: .double 0.492659620528969547
35 ASM_SIZE_DIRECTIVE(f1)
26dee9c4 36
0413b54c
UD
37#define CBRT2 1.2599210498948731648
38#define ONE_CBRT2 0.793700525984099737355196796584
39#define SQR_CBRT2 1.5874010519681994748
40#define ONE_SQR_CBRT2 0.629960524947436582364439673883
26dee9c4 41
b67e9372 42 .type factor,@object
0413b54c
UD
43 .align ALIGNARG(4)
44factor: .double ONE_SQR_CBRT2
45 .double ONE_CBRT2
26dee9c4
UD
46 .double 1.0
47 .double CBRT2
48 .double SQR_CBRT2
49 ASM_SIZE_DIRECTIVE(factor)
50
b67e9372 51 .type two25,@object
26dee9c4
UD
52two25: .byte 0, 0, 0, 0x4c
53 ASM_SIZE_DIRECTIVE(two25)
54
55#ifdef PIC
56#define MO(op) op##@GOTOFF(%ebx)
0413b54c 57#define MOX(op,x) op##@GOTOFF(%ebx,x,1)
26dee9c4
UD
58#else
59#define MO(op) op
0413b54c 60#define MOX(op,x) op(x)
26dee9c4
UD
61#endif
62
63 .text
64ENTRY(__cbrtf)
65 movl 4(%esp), %eax
66 xorl %ecx, %ecx
67 movl %eax, %edx
68 andl $0x7fffffff, %eax
69 jz 1f
70 cmpl $0x7f800000, %eax
71 jae 1f
72
73#ifdef PIC
74 pushl %ebx
fee732e5
UD
75 cfi_adjust_cfa_offset (4)
76 cfi_rel_offset (ebx, 0)
77 LOAD_PIC_REG (bx)
26dee9c4
UD
78#endif
79
80 cmpl $0x00800000, %eax
81 jae 2f
82
83#ifdef PIC
84 flds 8(%esp)
85#else
86 flds 4(%esp)
87#endif
88 fmuls MO(two25)
89 movl $-25, %ecx
90#ifdef PIC
91 fstps 8(%esp)
92 movl 8(%esp), %eax
93#else
94 fstps 4(%esp)
95 movl 4(%esp), %eax
96#endif
97 movl %eax, %edx
98 andl $0x7fffffff, %eax
99
1002: shrl $23, %eax
101 andl $0x807fffff, %edx
102 subl $126, %eax
103 orl $0x3f000000, %edx
104 addl %eax, %ecx
105#ifdef PIC
106 movl %edx, 8(%esp)
107
108 flds 8(%esp) /* xm */
109#else
110 movl %edx, 4(%esp)
111
112 flds 4(%esp) /* xm */
113#endif
114 fabs
115
0413b54c 116 /* The following code has two tracks:
26dee9c4
UD
117 a) compute the normalized cbrt value
118 b) compute xe/3 and xe%3
119 The right track computes the value for b) and this is done
0413b54c
UD
120 in an optimized way by avoiding division.
121
122 But why two tracks at all? Very easy: efficiency. Some FP
123 instruction can overlap with a certain amount of integer (and
124 FP) instructions. So we get (except for the imull) all
125 instructions for free. */
26dee9c4
UD
126
127 fld %st(0) /* xm : xm */
128 fmull MO(f3) /* f3*xm : xm */
129 movl $1431655766, %eax
130 fsubrl MO(f2) /* f2-f3*xm : xm */
131 imull %ecx
132 fmul %st(1) /* (f2-f3*xm)*xm : xm */
133 movl %ecx, %eax
134 faddl MO(f1) /* u:=f1+(f2-f3*xm)*xm : xm */
135 sarl $31, %eax
136 fld %st /* u : u : xm */
137 subl %eax, %edx
138 fmul %st(1) /* u*u : u : xm */
139 fld %st(2) /* xm : u*u : u : xm */
140 fadd %st /* 2*xm : u*u : u : xm */
141 fxch %st(1) /* u*u : 2*xm : u : xm */
142 fmul %st(2) /* t2:=u*u*u : 2*xm : u : xm */
143 movl %edx, %eax
144 fadd %st, %st(1) /* t2 : t2+2*xm : u : xm */
145 leal (%edx,%edx,2),%edx
146 fadd %st(0) /* 2*t2 : t2+2*xm : u : xm */
147 subl %edx, %ecx
148 faddp %st, %st(3) /* t2+2*xm : u : 2*t2+xm */
0413b54c 149 shll $3, %ecx
26dee9c4
UD
150 fmulp /* u*(t2+2*xm) : 2*t2+xm */
151 fdivp %st, %st(1) /* u*(t2+2*xm)/(2*t2+xm) */
0413b54c 152 fmull MOX(16+factor,%ecx) /* u*(t2+2*xm)/(2*t2+xm)*FACT */
26dee9c4 153 pushl %eax
fee732e5 154 cfi_adjust_cfa_offset (4)
26dee9c4
UD
155 fildl (%esp) /* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */
156 fxch /* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */
26dee9c4 157 fscale /* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */
0413b54c 158 popl %edx
fee732e5 159 cfi_adjust_cfa_offset (-4)
26dee9c4 160#ifdef PIC
0413b54c 161 movl 8(%esp), %eax
26dee9c4 162 popl %ebx
fee732e5
UD
163 cfi_adjust_cfa_offset (-4)
164 cfi_restore (ebx)
0413b54c
UD
165#else
166 movl 4(%esp), %eax
26dee9c4 167#endif
0413b54c
UD
168 testl %eax, %eax
169 fstp %st(1)
170 jns 4f
26dee9c4
UD
171 fchs
1724: ret
173
174 /* Return the argument. */
1751: flds 4(%esp)
176 ret
177END(__cbrtf)
e4602cba 178libm_alias_float (__cbrt, cbrt)