]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/i386/fpu/s_cbrt.S
Update copyright dates with scripts/update-copyrights.
[thirdparty/glibc.git] / sysdeps / i386 / fpu / s_cbrt.S
1 /* Compute cubic root of double value.
2 Copyright (C) 1997-2019 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Dirk Alboth <dirka@uni-paderborn.de> and
5 Ulrich Drepper <drepper@cygnus.com>, 1997.
6
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
11
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with the GNU C Library; if not, see
19 <http://www.gnu.org/licenses/>. */
20
21 #include <machine/asm.h>
22 #include <libm-alias-double.h>
23
24 .section .rodata
25
26 .align ALIGNARG(4)
27 .type f7,@object
28 f7: .double -0.145263899385486377
29 ASM_SIZE_DIRECTIVE(f7)
30 .type f6,@object
31 f6: .double 0.784932344976639262
32 ASM_SIZE_DIRECTIVE(f6)
33 .type f5,@object
34 f5: .double -1.83469277483613086
35 ASM_SIZE_DIRECTIVE(f5)
36 .type f4,@object
37 f4: .double 2.44693122563534430
38 ASM_SIZE_DIRECTIVE(f4)
39 .type f3,@object
40 f3: .double -2.11499494167371287
41 ASM_SIZE_DIRECTIVE(f3)
42 .type f2,@object
43 f2: .double 1.50819193781584896
44 ASM_SIZE_DIRECTIVE(f2)
45 .type f1,@object
46 f1: .double 0.354895765043919860
47 ASM_SIZE_DIRECTIVE(f1)
48
49 #define CBRT2 1.2599210498948731648
50 #define ONE_CBRT2 0.793700525984099737355196796584
51 #define SQR_CBRT2 1.5874010519681994748
52 #define ONE_SQR_CBRT2 0.629960524947436582364439673883
53
54 .type factor,@object
55 factor: .double ONE_SQR_CBRT2
56 .double ONE_CBRT2
57 .double 1.0
58 .double CBRT2
59 .double SQR_CBRT2
60 ASM_SIZE_DIRECTIVE(factor)
61
62 .type two54,@object
63 two54: .byte 0, 0, 0, 0, 0, 0, 0x50, 0x43
64 ASM_SIZE_DIRECTIVE(two54)
65
66 #ifdef PIC
67 #define MO(op) op##@GOTOFF(%ebx)
68 #define MOX(op,x) op##@GOTOFF(%ebx,x,1)
69 #else
70 #define MO(op) op
71 #define MOX(op,x) op(x)
72 #endif
73
74 .text
75 ENTRY(__cbrt)
76 movl 4(%esp), %ecx
77 movl 8(%esp), %eax
78 movl %eax, %edx
79 andl $0x7fffffff, %eax
80 orl %eax, %ecx
81 jz 1f
82 xorl %ecx, %ecx
83 cmpl $0x7ff00000, %eax
84 jae 1f
85
86 #ifdef PIC
87 pushl %ebx
88 cfi_adjust_cfa_offset (4)
89 cfi_rel_offset (ebx, 0)
90 LOAD_PIC_REG (bx)
91 #endif
92
93 cmpl $0x00100000, %eax
94 jae 2f
95
96 #ifdef PIC
97 fldl 8(%esp)
98 #else
99 fldl 4(%esp)
100 #endif
101 fmull MO(two54)
102 movl $-54, %ecx
103 #ifdef PIC
104 fstpl 8(%esp)
105 movl 12(%esp), %eax
106 #else
107 fstpl 4(%esp)
108 movl 8(%esp), %eax
109 #endif
110 movl %eax, %edx
111 andl $0x7fffffff, %eax
112
113 2: shrl $20, %eax
114 andl $0x800fffff, %edx
115 subl $1022, %eax
116 orl $0x3fe00000, %edx
117 addl %eax, %ecx
118 #ifdef PIC
119 movl %edx, 12(%esp)
120
121 fldl 8(%esp) /* xm */
122 #else
123 movl %edx, 8(%esp)
124
125 fldl 4(%esp) /* xm */
126 #endif
127 fabs
128
129 /* The following code has two tracks:
130 a) compute the normalized cbrt value
131 b) compute xe/3 and xe%3
132 The right track computes the value for b) and this is done
133 in an optimized way by avoiding division.
134
135 But why two tracks at all? Very easy: efficiency. Some FP
136 instruction can overlap with a certain amount of integer (and
137 FP) instructions. So we get (except for the imull) all
138 instructions for free. */
139
140 fld %st(0) /* xm : xm */
141
142 fmull MO(f7) /* f7*xm : xm */
143 movl $1431655766, %eax
144 faddl MO(f6) /* f6+f7*xm : xm */
145 imull %ecx
146 fmul %st(1) /* (f6+f7*xm)*xm : xm */
147 movl %ecx, %eax
148 faddl MO(f5) /* f5+(f6+f7*xm)*xm : xm */
149 sarl $31, %eax
150 fmul %st(1) /* (f5+(f6+f7*xm)*xm)*xm : xm */
151 subl %eax, %edx
152 faddl MO(f4) /* f4+(f5+(f6+f7*xm)*xm)*xm : xm */
153 fmul %st(1) /* (f4+(f5+(f6+f7*xm)*xm)*xm)*xm : xm */
154 faddl MO(f3) /* f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm : xm */
155 fmul %st(1) /* (f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm : xm */
156 faddl MO(f2) /* f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm : xm */
157 fmul %st(1) /* (f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm)*xm : xm */
158 faddl MO(f1) /* u:=f1+(f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm)*xm : xm */
159
160 fld %st /* u : u : xm */
161 fmul %st(1) /* u*u : u : xm */
162 fld %st(2) /* xm : u*u : u : xm */
163 fadd %st /* 2*xm : u*u : u : xm */
164 fxch %st(1) /* u*u : 2*xm : u : xm */
165 fmul %st(2) /* t2:=u*u*u : 2*xm : u : xm */
166 movl %edx, %eax
167 fadd %st, %st(1) /* t2 : t2+2*xm : u : xm */
168 leal (%edx,%edx,2),%edx
169 fadd %st(0) /* 2*t2 : t2+2*xm : u : xm */
170 subl %edx, %ecx
171 faddp %st, %st(3) /* t2+2*xm : u : 2*t2+xm */
172 shll $3, %ecx
173 fmulp /* u*(t2+2*xm) : 2*t2+xm */
174 fdivp %st, %st(1) /* u*(t2+2*xm)/(2*t2+xm) */
175 fmull MOX(16+factor,%ecx) /* u*(t2+2*xm)/(2*t2+xm)*FACT */
176 pushl %eax
177 cfi_adjust_cfa_offset (4)
178 fildl (%esp) /* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */
179 fxch /* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */
180 fscale /* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */
181 popl %edx
182 cfi_adjust_cfa_offset (-4)
183 #ifdef PIC
184 movl 12(%esp), %eax
185 popl %ebx
186 cfi_adjust_cfa_offset (-4)
187 cfi_restore (ebx)
188 #else
189 movl 8(%esp), %eax
190 #endif
191 testl %eax, %eax
192 fstp %st(1)
193 jns 4f
194 fchs
195 4: ret
196
197 /* Return the argument. */
198 1: fldl 4(%esp)
199 ret
200 END(__cbrt)
201 libm_alias_double (__cbrt, cbrt)