]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/i386/fpu/s_cbrtf.S
aa5ac7ee706ce3e952dcc757261e82b626ce4a57
[thirdparty/glibc.git] / sysdeps / i386 / fpu / s_cbrtf.S
1 /* Compute cubic root of float value.
2 Copyright (C) 1997-2019 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Dirk Alboth <dirka@uni-paderborn.de> and
5 Ulrich Drepper <drepper@cygnus.com>, 1997.
6
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
11
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with the GNU C Library; if not, see
19 <http://www.gnu.org/licenses/>. */
20
21 #include <machine/asm.h>
22 #include <libm-alias-float.h>
23
24 .section .rodata
25
26 .align ALIGNARG(4)
27 .type f3,@object
28 f3: .double 0.191502161678719066
29 ASM_SIZE_DIRECTIVE(f3)
30 .type f2,@object
31 f2: .double 0.697570460207922770
32 ASM_SIZE_DIRECTIVE(f2)
33 .type f1,@object
34 f1: .double 0.492659620528969547
35 ASM_SIZE_DIRECTIVE(f1)
36
37 #define CBRT2 1.2599210498948731648
38 #define ONE_CBRT2 0.793700525984099737355196796584
39 #define SQR_CBRT2 1.5874010519681994748
40 #define ONE_SQR_CBRT2 0.629960524947436582364439673883
41
42 .type factor,@object
43 .align ALIGNARG(4)
44 factor: .double ONE_SQR_CBRT2
45 .double ONE_CBRT2
46 .double 1.0
47 .double CBRT2
48 .double SQR_CBRT2
49 ASM_SIZE_DIRECTIVE(factor)
50
51 .type two25,@object
52 two25: .byte 0, 0, 0, 0x4c
53 ASM_SIZE_DIRECTIVE(two25)
54
55 #ifdef PIC
56 #define MO(op) op##@GOTOFF(%ebx)
57 #define MOX(op,x) op##@GOTOFF(%ebx,x,1)
58 #else
59 #define MO(op) op
60 #define MOX(op,x) op(x)
61 #endif
62
63 .text
64 ENTRY(__cbrtf)
65 movl 4(%esp), %eax
66 xorl %ecx, %ecx
67 movl %eax, %edx
68 andl $0x7fffffff, %eax
69 jz 1f
70 cmpl $0x7f800000, %eax
71 jae 1f
72
73 #ifdef PIC
74 pushl %ebx
75 cfi_adjust_cfa_offset (4)
76 cfi_rel_offset (ebx, 0)
77 LOAD_PIC_REG (bx)
78 #endif
79
80 cmpl $0x00800000, %eax
81 jae 2f
82
83 #ifdef PIC
84 flds 8(%esp)
85 #else
86 flds 4(%esp)
87 #endif
88 fmuls MO(two25)
89 movl $-25, %ecx
90 #ifdef PIC
91 fstps 8(%esp)
92 movl 8(%esp), %eax
93 #else
94 fstps 4(%esp)
95 movl 4(%esp), %eax
96 #endif
97 movl %eax, %edx
98 andl $0x7fffffff, %eax
99
100 2: shrl $23, %eax
101 andl $0x807fffff, %edx
102 subl $126, %eax
103 orl $0x3f000000, %edx
104 addl %eax, %ecx
105 #ifdef PIC
106 movl %edx, 8(%esp)
107
108 flds 8(%esp) /* xm */
109 #else
110 movl %edx, 4(%esp)
111
112 flds 4(%esp) /* xm */
113 #endif
114 fabs
115
116 /* The following code has two tracks:
117 a) compute the normalized cbrt value
118 b) compute xe/3 and xe%3
119 The right track computes the value for b) and this is done
120 in an optimized way by avoiding division.
121
122 But why two tracks at all? Very easy: efficiency. Some FP
123 instruction can overlap with a certain amount of integer (and
124 FP) instructions. So we get (except for the imull) all
125 instructions for free. */
126
127 fld %st(0) /* xm : xm */
128 fmull MO(f3) /* f3*xm : xm */
129 movl $1431655766, %eax
130 fsubrl MO(f2) /* f2-f3*xm : xm */
131 imull %ecx
132 fmul %st(1) /* (f2-f3*xm)*xm : xm */
133 movl %ecx, %eax
134 faddl MO(f1) /* u:=f1+(f2-f3*xm)*xm : xm */
135 sarl $31, %eax
136 fld %st /* u : u : xm */
137 subl %eax, %edx
138 fmul %st(1) /* u*u : u : xm */
139 fld %st(2) /* xm : u*u : u : xm */
140 fadd %st /* 2*xm : u*u : u : xm */
141 fxch %st(1) /* u*u : 2*xm : u : xm */
142 fmul %st(2) /* t2:=u*u*u : 2*xm : u : xm */
143 movl %edx, %eax
144 fadd %st, %st(1) /* t2 : t2+2*xm : u : xm */
145 leal (%edx,%edx,2),%edx
146 fadd %st(0) /* 2*t2 : t2+2*xm : u : xm */
147 subl %edx, %ecx
148 faddp %st, %st(3) /* t2+2*xm : u : 2*t2+xm */
149 shll $3, %ecx
150 fmulp /* u*(t2+2*xm) : 2*t2+xm */
151 fdivp %st, %st(1) /* u*(t2+2*xm)/(2*t2+xm) */
152 fmull MOX(16+factor,%ecx) /* u*(t2+2*xm)/(2*t2+xm)*FACT */
153 pushl %eax
154 cfi_adjust_cfa_offset (4)
155 fildl (%esp) /* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */
156 fxch /* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */
157 fscale /* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */
158 popl %edx
159 cfi_adjust_cfa_offset (-4)
160 #ifdef PIC
161 movl 8(%esp), %eax
162 popl %ebx
163 cfi_adjust_cfa_offset (-4)
164 cfi_restore (ebx)
165 #else
166 movl 4(%esp), %eax
167 #endif
168 testl %eax, %eax
169 fstp %st(1)
170 jns 4f
171 fchs
172 4: ret
173
174 /* Return the argument. */
175 1: flds 4(%esp)
176 ret
177 END(__cbrtf)
178 libm_alias_float (__cbrt, cbrt)