]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/i386/fpu/s_cbrt.S
Update copyright notices with scripts/update-copyrights.
[thirdparty/glibc.git] / sysdeps / i386 / fpu / s_cbrt.S
1 /* Compute cubic root of double value.
2 Copyright (C) 1997-2013 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Dirk Alboth <dirka@uni-paderborn.de> and
5 Ulrich Drepper <drepper@cygnus.com>, 1997.
6
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
11
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with the GNU C Library; if not, see
19 <http://www.gnu.org/licenses/>. */
20
21 #include <machine/asm.h>
22
23 .section .rodata
24
25 .align ALIGNARG(4)
26 .type f7,@object
27 f7: .double -0.145263899385486377
28 ASM_SIZE_DIRECTIVE(f7)
29 .type f6,@object
30 f6: .double 0.784932344976639262
31 ASM_SIZE_DIRECTIVE(f6)
32 .type f5,@object
33 f5: .double -1.83469277483613086
34 ASM_SIZE_DIRECTIVE(f5)
35 .type f4,@object
36 f4: .double 2.44693122563534430
37 ASM_SIZE_DIRECTIVE(f4)
38 .type f3,@object
39 f3: .double -2.11499494167371287
40 ASM_SIZE_DIRECTIVE(f3)
41 .type f2,@object
42 f2: .double 1.50819193781584896
43 ASM_SIZE_DIRECTIVE(f2)
44 .type f1,@object
45 f1: .double 0.354895765043919860
46 ASM_SIZE_DIRECTIVE(f1)
47
48 #define CBRT2 1.2599210498948731648
49 #define ONE_CBRT2 0.793700525984099737355196796584
50 #define SQR_CBRT2 1.5874010519681994748
51 #define ONE_SQR_CBRT2 0.629960524947436582364439673883
52
53 .type factor,@object
54 factor: .double ONE_SQR_CBRT2
55 .double ONE_CBRT2
56 .double 1.0
57 .double CBRT2
58 .double SQR_CBRT2
59 ASM_SIZE_DIRECTIVE(factor)
60
61 .type two54,@object
62 two54: .byte 0, 0, 0, 0, 0, 0, 0x50, 0x43
63 ASM_SIZE_DIRECTIVE(two54)
64
65 #ifdef PIC
66 #define MO(op) op##@GOTOFF(%ebx)
67 #define MOX(op,x) op##@GOTOFF(%ebx,x,1)
68 #else
69 #define MO(op) op
70 #define MOX(op,x) op(x)
71 #endif
72
73 .text
74 ENTRY(__cbrt)
75 movl 4(%esp), %ecx
76 movl 8(%esp), %eax
77 movl %eax, %edx
78 andl $0x7fffffff, %eax
79 orl %eax, %ecx
80 jz 1f
81 xorl %ecx, %ecx
82 cmpl $0x7ff00000, %eax
83 jae 1f
84
85 #ifdef PIC
86 pushl %ebx
87 cfi_adjust_cfa_offset (4)
88 cfi_rel_offset (ebx, 0)
89 LOAD_PIC_REG (bx)
90 #endif
91
92 cmpl $0x00100000, %eax
93 jae 2f
94
95 #ifdef PIC
96 fldl 8(%esp)
97 #else
98 fldl 4(%esp)
99 #endif
100 fmull MO(two54)
101 movl $-54, %ecx
102 #ifdef PIC
103 fstpl 8(%esp)
104 movl 12(%esp), %eax
105 #else
106 fstpl 4(%esp)
107 movl 8(%esp), %eax
108 #endif
109 movl %eax, %edx
110 andl $0x7fffffff, %eax
111
112 2: shrl $20, %eax
113 andl $0x800fffff, %edx
114 subl $1022, %eax
115 orl $0x3fe00000, %edx
116 addl %eax, %ecx
117 #ifdef PIC
118 movl %edx, 12(%esp)
119
120 fldl 8(%esp) /* xm */
121 #else
122 movl %edx, 8(%esp)
123
124 fldl 4(%esp) /* xm */
125 #endif
126 fabs
127
128 /* The following code has two tracks:
129 a) compute the normalized cbrt value
130 b) compute xe/3 and xe%3
131 The right track computes the value for b) and this is done
132 in an optimized way by avoiding division.
133
134 But why two tracks at all? Very easy: efficiency. Some FP
135 instruction can overlap with a certain amount of integer (and
136 FP) instructions. So we get (except for the imull) all
137 instructions for free. */
138
139 fld %st(0) /* xm : xm */
140
141 fmull MO(f7) /* f7*xm : xm */
142 movl $1431655766, %eax
143 faddl MO(f6) /* f6+f7*xm : xm */
144 imull %ecx
145 fmul %st(1) /* (f6+f7*xm)*xm : xm */
146 movl %ecx, %eax
147 faddl MO(f5) /* f5+(f6+f7*xm)*xm : xm */
148 sarl $31, %eax
149 fmul %st(1) /* (f5+(f6+f7*xm)*xm)*xm : xm */
150 subl %eax, %edx
151 faddl MO(f4) /* f4+(f5+(f6+f7*xm)*xm)*xm : xm */
152 fmul %st(1) /* (f4+(f5+(f6+f7*xm)*xm)*xm)*xm : xm */
153 faddl MO(f3) /* f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm : xm */
154 fmul %st(1) /* (f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm : xm */
155 faddl MO(f2) /* f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm : xm */
156 fmul %st(1) /* (f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm)*xm : xm */
157 faddl MO(f1) /* u:=f1+(f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm)*xm : xm */
158
159 fld %st /* u : u : xm */
160 fmul %st(1) /* u*u : u : xm */
161 fld %st(2) /* xm : u*u : u : xm */
162 fadd %st /* 2*xm : u*u : u : xm */
163 fxch %st(1) /* u*u : 2*xm : u : xm */
164 fmul %st(2) /* t2:=u*u*u : 2*xm : u : xm */
165 movl %edx, %eax
166 fadd %st, %st(1) /* t2 : t2+2*xm : u : xm */
167 leal (%edx,%edx,2),%edx
168 fadd %st(0) /* 2*t2 : t2+2*xm : u : xm */
169 subl %edx, %ecx
170 faddp %st, %st(3) /* t2+2*xm : u : 2*t2+xm */
171 shll $3, %ecx
172 fmulp /* u*(t2+2*xm) : 2*t2+xm */
173 fdivp %st, %st(1) /* u*(t2+2*xm)/(2*t2+xm) */
174 fmull MOX(16+factor,%ecx) /* u*(t2+2*xm)/(2*t2+xm)*FACT */
175 pushl %eax
176 cfi_adjust_cfa_offset (4)
177 fildl (%esp) /* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */
178 fxch /* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */
179 fscale /* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */
180 popl %edx
181 cfi_adjust_cfa_offset (-4)
182 #ifdef PIC
183 movl 12(%esp), %eax
184 popl %ebx
185 cfi_adjust_cfa_offset (-4)
186 cfi_restore (ebx)
187 #else
188 movl 8(%esp), %eax
189 #endif
190 testl %eax, %eax
191 fstp %st(1)
192 jns 4f
193 fchs
194 4: ret
195
196 /* Return the argument. */
197 1: fldl 4(%esp)
198 ret
199 END(__cbrt)
200 weak_alias (__cbrt, cbrt)