]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/i386/fpu/s_cbrtf.S
Remove "Contributed by" lines
[thirdparty/glibc.git] / sysdeps / i386 / fpu / s_cbrtf.S
CommitLineData
26dee9c4 1/* Compute cubic root of float value.
2b778ceb 2 Copyright (C) 1997-2021 Free Software Foundation, Inc.
26dee9c4 3 This file is part of the GNU C Library.
26dee9c4
UD
4
5 The GNU C Library is free software; you can redistribute it and/or
41bdb6e2
AJ
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
26dee9c4
UD
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
41bdb6e2 13 Lesser General Public License for more details.
26dee9c4 14
41bdb6e2 15 You should have received a copy of the GNU Lesser General Public
59ba27a6 16 License along with the GNU C Library; if not, see
5a82c748 17 <https://www.gnu.org/licenses/>. */
26dee9c4
UD
18
19#include <machine/asm.h>
e4602cba 20#include <libm-alias-float.h>
26dee9c4 21
26dee9c4 22 .section .rodata
26dee9c4
UD
23
24 .align ALIGNARG(4)
b67e9372 25 .type f3,@object
26dee9c4
UD
26f3: .double 0.191502161678719066
27 ASM_SIZE_DIRECTIVE(f3)
b67e9372 28 .type f2,@object
0413b54c
UD
29f2: .double 0.697570460207922770
30 ASM_SIZE_DIRECTIVE(f2)
b67e9372 31 .type f1,@object
0413b54c
UD
32f1: .double 0.492659620528969547
33 ASM_SIZE_DIRECTIVE(f1)
26dee9c4 34
0413b54c
UD
35#define CBRT2 1.2599210498948731648
36#define ONE_CBRT2 0.793700525984099737355196796584
37#define SQR_CBRT2 1.5874010519681994748
38#define ONE_SQR_CBRT2 0.629960524947436582364439673883
26dee9c4 39
b67e9372 40 .type factor,@object
0413b54c
UD
41 .align ALIGNARG(4)
42factor: .double ONE_SQR_CBRT2
43 .double ONE_CBRT2
26dee9c4
UD
44 .double 1.0
45 .double CBRT2
46 .double SQR_CBRT2
47 ASM_SIZE_DIRECTIVE(factor)
48
b67e9372 49 .type two25,@object
26dee9c4
UD
50two25: .byte 0, 0, 0, 0x4c
51 ASM_SIZE_DIRECTIVE(two25)
52
53#ifdef PIC
54#define MO(op) op##@GOTOFF(%ebx)
0413b54c 55#define MOX(op,x) op##@GOTOFF(%ebx,x,1)
26dee9c4
UD
56#else
57#define MO(op) op
0413b54c 58#define MOX(op,x) op(x)
26dee9c4
UD
59#endif
60
61 .text
62ENTRY(__cbrtf)
63 movl 4(%esp), %eax
64 xorl %ecx, %ecx
65 movl %eax, %edx
66 andl $0x7fffffff, %eax
67 jz 1f
68 cmpl $0x7f800000, %eax
69 jae 1f
70
71#ifdef PIC
72 pushl %ebx
fee732e5
UD
73 cfi_adjust_cfa_offset (4)
74 cfi_rel_offset (ebx, 0)
75 LOAD_PIC_REG (bx)
26dee9c4
UD
76#endif
77
78 cmpl $0x00800000, %eax
79 jae 2f
80
81#ifdef PIC
82 flds 8(%esp)
83#else
84 flds 4(%esp)
85#endif
86 fmuls MO(two25)
87 movl $-25, %ecx
88#ifdef PIC
89 fstps 8(%esp)
90 movl 8(%esp), %eax
91#else
92 fstps 4(%esp)
93 movl 4(%esp), %eax
94#endif
95 movl %eax, %edx
96 andl $0x7fffffff, %eax
97
982: shrl $23, %eax
99 andl $0x807fffff, %edx
100 subl $126, %eax
101 orl $0x3f000000, %edx
102 addl %eax, %ecx
103#ifdef PIC
104 movl %edx, 8(%esp)
105
106 flds 8(%esp) /* xm */
107#else
108 movl %edx, 4(%esp)
109
110 flds 4(%esp) /* xm */
111#endif
112 fabs
113
0413b54c 114 /* The following code has two tracks:
26dee9c4
UD
115 a) compute the normalized cbrt value
116 b) compute xe/3 and xe%3
117 The right track computes the value for b) and this is done
0413b54c
UD
118 in an optimized way by avoiding division.
119
120 But why two tracks at all? Very easy: efficiency. Some FP
121 instruction can overlap with a certain amount of integer (and
122 FP) instructions. So we get (except for the imull) all
123 instructions for free. */
26dee9c4
UD
124
125 fld %st(0) /* xm : xm */
126 fmull MO(f3) /* f3*xm : xm */
127 movl $1431655766, %eax
128 fsubrl MO(f2) /* f2-f3*xm : xm */
129 imull %ecx
130 fmul %st(1) /* (f2-f3*xm)*xm : xm */
131 movl %ecx, %eax
132 faddl MO(f1) /* u:=f1+(f2-f3*xm)*xm : xm */
133 sarl $31, %eax
134 fld %st /* u : u : xm */
135 subl %eax, %edx
136 fmul %st(1) /* u*u : u : xm */
137 fld %st(2) /* xm : u*u : u : xm */
138 fadd %st /* 2*xm : u*u : u : xm */
139 fxch %st(1) /* u*u : 2*xm : u : xm */
140 fmul %st(2) /* t2:=u*u*u : 2*xm : u : xm */
141 movl %edx, %eax
142 fadd %st, %st(1) /* t2 : t2+2*xm : u : xm */
143 leal (%edx,%edx,2),%edx
144 fadd %st(0) /* 2*t2 : t2+2*xm : u : xm */
145 subl %edx, %ecx
146 faddp %st, %st(3) /* t2+2*xm : u : 2*t2+xm */
0413b54c 147 shll $3, %ecx
26dee9c4
UD
148 fmulp /* u*(t2+2*xm) : 2*t2+xm */
149 fdivp %st, %st(1) /* u*(t2+2*xm)/(2*t2+xm) */
0413b54c 150 fmull MOX(16+factor,%ecx) /* u*(t2+2*xm)/(2*t2+xm)*FACT */
26dee9c4 151 pushl %eax
fee732e5 152 cfi_adjust_cfa_offset (4)
26dee9c4
UD
153 fildl (%esp) /* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */
154 fxch /* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */
26dee9c4 155 fscale /* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */
0413b54c 156 popl %edx
fee732e5 157 cfi_adjust_cfa_offset (-4)
26dee9c4 158#ifdef PIC
0413b54c 159 movl 8(%esp), %eax
26dee9c4 160 popl %ebx
fee732e5
UD
161 cfi_adjust_cfa_offset (-4)
162 cfi_restore (ebx)
0413b54c
UD
163#else
164 movl 4(%esp), %eax
26dee9c4 165#endif
0413b54c
UD
166 testl %eax, %eax
167 fstp %st(1)
168 jns 4f
26dee9c4
UD
169 fchs
1704: ret
171
172 /* Return the argument. */
1731: flds 4(%esp)
174 ret
175END(__cbrtf)
e4602cba 176libm_alias_float (__cbrt, cbrt)