]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/i386/fpu/s_cbrt.S
Update copyright dates with scripts/update-copyrights
[thirdparty/glibc.git] / sysdeps / i386 / fpu / s_cbrt.S
CommitLineData
26dee9c4 1/* Compute cubic root of double value.
dff8da6b 2 Copyright (C) 1997-2024 Free Software Foundation, Inc.
26dee9c4 3 This file is part of the GNU C Library.
26dee9c4
UD
4
5 The GNU C Library is free software; you can redistribute it and/or
41bdb6e2
AJ
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
26dee9c4
UD
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
41bdb6e2 13 Lesser General Public License for more details.
26dee9c4 14
41bdb6e2 15 You should have received a copy of the GNU Lesser General Public
59ba27a6 16 License along with the GNU C Library; if not, see
5a82c748 17 <https://www.gnu.org/licenses/>. */
26dee9c4
UD
18
19#include <machine/asm.h>
bc4e8f9b 20#include <libm-alias-double.h>
26dee9c4 21
26dee9c4 22 .section .rodata
26dee9c4
UD
23
24 .align ALIGNARG(4)
b67e9372 25 .type f7,@object
26dee9c4
UD
26f7: .double -0.145263899385486377
27 ASM_SIZE_DIRECTIVE(f7)
b67e9372 28 .type f6,@object
0413b54c
UD
29f6: .double 0.784932344976639262
30 ASM_SIZE_DIRECTIVE(f6)
b67e9372 31 .type f5,@object
0413b54c
UD
32f5: .double -1.83469277483613086
33 ASM_SIZE_DIRECTIVE(f5)
b67e9372 34 .type f4,@object
0413b54c
UD
35f4: .double 2.44693122563534430
36 ASM_SIZE_DIRECTIVE(f4)
b67e9372 37 .type f3,@object
0413b54c
UD
38f3: .double -2.11499494167371287
39 ASM_SIZE_DIRECTIVE(f3)
b67e9372 40 .type f2,@object
0413b54c
UD
41f2: .double 1.50819193781584896
42 ASM_SIZE_DIRECTIVE(f2)
b67e9372 43 .type f1,@object
0413b54c
UD
44f1: .double 0.354895765043919860
45 ASM_SIZE_DIRECTIVE(f1)
26dee9c4 46
0413b54c
UD
47#define CBRT2 1.2599210498948731648
48#define ONE_CBRT2 0.793700525984099737355196796584
49#define SQR_CBRT2 1.5874010519681994748
50#define ONE_SQR_CBRT2 0.629960524947436582364439673883
26dee9c4 51
b67e9372 52 .type factor,@object
0413b54c
UD
53factor: .double ONE_SQR_CBRT2
54 .double ONE_CBRT2
26dee9c4
UD
55 .double 1.0
56 .double CBRT2
57 .double SQR_CBRT2
58 ASM_SIZE_DIRECTIVE(factor)
59
b67e9372 60 .type two54,@object
26dee9c4
UD
61two54: .byte 0, 0, 0, 0, 0, 0, 0x50, 0x43
62 ASM_SIZE_DIRECTIVE(two54)
63
64#ifdef PIC
65#define MO(op) op##@GOTOFF(%ebx)
0413b54c 66#define MOX(op,x) op##@GOTOFF(%ebx,x,1)
26dee9c4
UD
67#else
68#define MO(op) op
0413b54c 69#define MOX(op,x) op(x)
26dee9c4
UD
70#endif
71
72 .text
73ENTRY(__cbrt)
74 movl 4(%esp), %ecx
75 movl 8(%esp), %eax
76 movl %eax, %edx
77 andl $0x7fffffff, %eax
78 orl %eax, %ecx
79 jz 1f
80 xorl %ecx, %ecx
81 cmpl $0x7ff00000, %eax
82 jae 1f
83
84#ifdef PIC
85 pushl %ebx
fee732e5
UD
86 cfi_adjust_cfa_offset (4)
87 cfi_rel_offset (ebx, 0)
88 LOAD_PIC_REG (bx)
26dee9c4
UD
89#endif
90
91 cmpl $0x00100000, %eax
92 jae 2f
93
94#ifdef PIC
95 fldl 8(%esp)
96#else
97 fldl 4(%esp)
98#endif
99 fmull MO(two54)
100 movl $-54, %ecx
0413b54c
UD
101#ifdef PIC
102 fstpl 8(%esp)
103 movl 12(%esp), %eax
104#else
26dee9c4
UD
105 fstpl 4(%esp)
106 movl 8(%esp), %eax
0413b54c 107#endif
26dee9c4
UD
108 movl %eax, %edx
109 andl $0x7fffffff, %eax
110
1112: shrl $20, %eax
112 andl $0x800fffff, %edx
113 subl $1022, %eax
114 orl $0x3fe00000, %edx
115 addl %eax, %ecx
116#ifdef PIC
117 movl %edx, 12(%esp)
118
119 fldl 8(%esp) /* xm */
120#else
121 movl %edx, 8(%esp)
122
123 fldl 4(%esp) /* xm */
124#endif
125 fabs
126
0413b54c 127 /* The following code has two tracks:
26dee9c4
UD
128 a) compute the normalized cbrt value
129 b) compute xe/3 and xe%3
130 The right track computes the value for b) and this is done
0413b54c
UD
131 in an optimized way by avoiding division.
132
133 But why two tracks at all? Very easy: efficiency. Some FP
134 instruction can overlap with a certain amount of integer (and
135 FP) instructions. So we get (except for the imull) all
136 instructions for free. */
26dee9c4
UD
137
138 fld %st(0) /* xm : xm */
139
140 fmull MO(f7) /* f7*xm : xm */
141 movl $1431655766, %eax
142 faddl MO(f6) /* f6+f7*xm : xm */
143 imull %ecx
144 fmul %st(1) /* (f6+f7*xm)*xm : xm */
145 movl %ecx, %eax
146 faddl MO(f5) /* f5+(f6+f7*xm)*xm : xm */
147 sarl $31, %eax
148 fmul %st(1) /* (f5+(f6+f7*xm)*xm)*xm : xm */
149 subl %eax, %edx
150 faddl MO(f4) /* f4+(f5+(f6+f7*xm)*xm)*xm : xm */
151 fmul %st(1) /* (f4+(f5+(f6+f7*xm)*xm)*xm)*xm : xm */
152 faddl MO(f3) /* f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm : xm */
153 fmul %st(1) /* (f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm : xm */
154 faddl MO(f2) /* f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm : xm */
155 fmul %st(1) /* (f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm)*xm : xm */
156 faddl MO(f1) /* u:=f1+(f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm)*xm : xm */
157
158 fld %st /* u : u : xm */
159 fmul %st(1) /* u*u : u : xm */
160 fld %st(2) /* xm : u*u : u : xm */
161 fadd %st /* 2*xm : u*u : u : xm */
162 fxch %st(1) /* u*u : 2*xm : u : xm */
163 fmul %st(2) /* t2:=u*u*u : 2*xm : u : xm */
164 movl %edx, %eax
165 fadd %st, %st(1) /* t2 : t2+2*xm : u : xm */
166 leal (%edx,%edx,2),%edx
167 fadd %st(0) /* 2*t2 : t2+2*xm : u : xm */
168 subl %edx, %ecx
169 faddp %st, %st(3) /* t2+2*xm : u : 2*t2+xm */
0413b54c 170 shll $3, %ecx
26dee9c4
UD
171 fmulp /* u*(t2+2*xm) : 2*t2+xm */
172 fdivp %st, %st(1) /* u*(t2+2*xm)/(2*t2+xm) */
0413b54c 173 fmull MOX(16+factor,%ecx) /* u*(t2+2*xm)/(2*t2+xm)*FACT */
26dee9c4 174 pushl %eax
fee732e5 175 cfi_adjust_cfa_offset (4)
26dee9c4
UD
176 fildl (%esp) /* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */
177 fxch /* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */
26dee9c4 178 fscale /* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */
0413b54c 179 popl %edx
fee732e5 180 cfi_adjust_cfa_offset (-4)
26dee9c4 181#ifdef PIC
0413b54c 182 movl 12(%esp), %eax
26dee9c4 183 popl %ebx
fee732e5
UD
184 cfi_adjust_cfa_offset (-4)
185 cfi_restore (ebx)
0413b54c
UD
186#else
187 movl 8(%esp), %eax
26dee9c4 188#endif
0413b54c
UD
189 testl %eax, %eax
190 fstp %st(1)
191 jns 4f
26dee9c4
UD
192 fchs
1934: ret
194
195 /* Return the argument. */
1961: fldl 4(%esp)
197 ret
198END(__cbrt)
bc4e8f9b 199libm_alias_double (__cbrt, cbrt)