]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/i386/fpu/s_cbrtl.S
Update copyright dates with scripts/update-copyrights.
[thirdparty/glibc.git] / sysdeps / i386 / fpu / s_cbrtl.S
CommitLineData
26dee9c4 1/* Compute cubic root of long double value.
b168057a 2 Copyright (C) 1997-2015 Free Software Foundation, Inc.
26dee9c4
UD
3 This file is part of the GNU C Library.
4 Contributed by Dirk Alboth <dirka@uni-paderborn.de> and
5 Ulrich Drepper <drepper@cygnus.com>, 1997.
6
7 The GNU C Library is free software; you can redistribute it and/or
41bdb6e2
AJ
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
26dee9c4
UD
11
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
41bdb6e2 15 Lesser General Public License for more details.
26dee9c4 16
41bdb6e2 17 You should have received a copy of the GNU Lesser General Public
59ba27a6
PE
18 License along with the GNU C Library; if not, see
19 <http://www.gnu.org/licenses/>. */
26dee9c4
UD
20
21#include <machine/asm.h>
22
26dee9c4 23 .section .rodata
26dee9c4
UD
24
25 .align ALIGNARG(4)
b67e9372 26 .type f8,@object
0413b54c 27f8: .tfloat 0.161617097923756032
26dee9c4 28 ASM_SIZE_DIRECTIVE(f8)
0413b54c 29 .align ALIGNARG(4)
b67e9372 30 .type f7,@object
0413b54c
UD
31f7: .tfloat -0.988553671195413709
32 ASM_SIZE_DIRECTIVE(f7)
33 .align ALIGNARG(4)
b67e9372 34 .type f6,@object
0413b54c
UD
35f6: .tfloat 2.65298938441952296
36 ASM_SIZE_DIRECTIVE(f6)
37 .align ALIGNARG(4)
b67e9372 38 .type f5,@object
0413b54c
UD
39f5: .tfloat -4.11151425200350531
40 ASM_SIZE_DIRECTIVE(f5)
41 .align ALIGNARG(4)
b67e9372 42 .type f4,@object
0413b54c
UD
43f4: .tfloat 4.09559907378707839
44 ASM_SIZE_DIRECTIVE(f4)
45 .align ALIGNARG(4)
b67e9372 46 .type f3,@object
0413b54c
UD
47f3: .tfloat -2.82414939754975962
48 ASM_SIZE_DIRECTIVE(f3)
49 .align ALIGNARG(4)
b67e9372 50 .type f2,@object
0413b54c
UD
51f2: .tfloat 1.67595307700780102
52 ASM_SIZE_DIRECTIVE(f2)
53 .align ALIGNARG(4)
b67e9372 54 .type f1,@object
0413b54c
UD
55f1: .tfloat 0.338058687610520237
56 ASM_SIZE_DIRECTIVE(f1)
26dee9c4 57
0413b54c
UD
58#define CBRT2 1.2599210498948731648
59#define ONE_CBRT2 0.793700525984099737355196796584
60#define SQR_CBRT2 1.5874010519681994748
61#define ONE_SQR_CBRT2 0.629960524947436582364439673883
26dee9c4 62
0413b54c
UD
63 /* We make the entries in the following table all 16 bytes
64 wide to avoid having to implement a multiplication by 10. */
b67e9372 65 .type factor,@object
0413b54c
UD
66 .align ALIGNARG(4)
67factor: .tfloat ONE_SQR_CBRT2
68 .byte 0, 0, 0, 0, 0, 0
69 .tfloat ONE_CBRT2
70 .byte 0, 0, 0, 0, 0, 0
71 .tfloat 1.0
72 .byte 0, 0, 0, 0, 0, 0
73 .tfloat CBRT2
74 .byte 0, 0, 0, 0, 0, 0
75 .tfloat SQR_CBRT2
26dee9c4
UD
76 ASM_SIZE_DIRECTIVE(factor)
77
b67e9372 78 .type two64,@object
0413b54c 79 .align ALIGNARG(4)
26dee9c4
UD
80two64: .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x43
81 ASM_SIZE_DIRECTIVE(two64)
82
83#ifdef PIC
84#define MO(op) op##@GOTOFF(%ebx)
0413b54c 85#define MOX(op,x) op##@GOTOFF(%ebx,x,1)
26dee9c4
UD
86#else
87#define MO(op) op
0413b54c 88#define MOX(op,x) op(x)
26dee9c4
UD
89#endif
90
91 .text
92ENTRY(__cbrtl)
93 movl 4(%esp), %ecx
94 movl 12(%esp), %eax
95 orl 8(%esp), %ecx
96 movl %eax, %edx
97 andl $0x7fff, %eax
98 orl %eax, %ecx
99 jz 1f
100 xorl %ecx, %ecx
101 cmpl $0x7fff, %eax
102 je 1f
103
104#ifdef PIC
105 pushl %ebx
fee732e5
UD
106 cfi_adjust_cfa_offset (4)
107 cfi_rel_offset (ebx, 0)
108 LOAD_PIC_REG (bx)
26dee9c4
UD
109#endif
110
111 cmpl $0, %eax
0413b54c 112 jne 2f
26dee9c4
UD
113
114#ifdef PIC
115 fldt 8(%esp)
116#else
117 fldt 4(%esp)
118#endif
119 fmull MO(two64)
120 movl $-64, %ecx
0413b54c
UD
121#ifdef PIC
122 fstpt 8(%esp)
123 movl 16(%esp), %eax
124#else
26dee9c4
UD
125 fstpt 4(%esp)
126 movl 12(%esp), %eax
0413b54c 127#endif
26dee9c4
UD
128 movl %eax, %edx
129 andl $0x7fff, %eax
130
1312: andl $0x8000, %edx
132 subl $16382, %eax
133 orl $0x3ffe, %edx
134 addl %eax, %ecx
135#ifdef PIC
136 movl %edx, 16(%esp)
137
138 fldt 8(%esp) /* xm */
139#else
140 movl %edx, 12(%esp)
141
142 fldt 4(%esp) /* xm */
143#endif
144 fabs
145
0413b54c 146 /* The following code has two tracks:
26dee9c4
UD
147 a) compute the normalized cbrt value
148 b) compute xe/3 and xe%3
149 The right track computes the value for b) and this is done
0413b54c 150 in an optimized way by avoiding division.
26dee9c4 151
0413b54c
UD
152 But why two tracks at all? Very easy: efficiency. Some FP
153 instruction can overlap with a certain amount of integer (and
154 FP) instructions. So we get (except for the imull) all
155 instructions for free. */
26dee9c4 156
0413b54c
UD
157 fldt MO(f8) /* f8 : xm */
158 fmul %st(1) /* f8*xm : xm */
159
160 fldt MO(f7)
161 faddp /* f7+f8*xm : xm */
162 fmul %st(1) /* (f7+f8*xm)*xm : xm */
26dee9c4 163 movl $1431655766, %eax
0413b54c
UD
164 fldt MO(f6)
165 faddp /* f6+(f7+f8*xm)*xm : xm */
26dee9c4 166 imull %ecx
0413b54c 167 fmul %st(1) /* (f6+(f7+f8*xm)*xm)*xm : xm */
26dee9c4 168 movl %ecx, %eax
0413b54c
UD
169 fldt MO(f5)
170 faddp /* f5+(f6+(f7+f8*xm)*xm)*xm : xm */
26dee9c4 171 sarl $31, %eax
0413b54c 172 fmul %st(1) /* (f5+(f6+(f7+f8*xm)*xm)*xm)*xm : xm */
26dee9c4 173 subl %eax, %edx
0413b54c
UD
174 fldt MO(f4)
175 faddp /* f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm : xm */
176 fmul %st(1) /* (f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm : xm */
177 fldt MO(f3)
178 faddp /* f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm : xm */
179 fmul %st(1) /* (f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm)*xm : xm */
180 fldt MO(f2)
181 faddp /* f2+(f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm)*xm : xm */
182 fmul %st(1) /* (f2+(f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm)*xm)*xm : xm */
183 fldt MO(f1)
184 faddp /* u:=f1+(f2+(f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm)*xm)*xm : xm */
26dee9c4
UD
185
186 fld %st /* u : u : xm */
187 fmul %st(1) /* u*u : u : xm */
188 fld %st(2) /* xm : u*u : u : xm */
189 fadd %st /* 2*xm : u*u : u : xm */
190 fxch %st(1) /* u*u : 2*xm : u : xm */
191 fmul %st(2) /* t2:=u*u*u : 2*xm : u : xm */
192 movl %edx, %eax
193 fadd %st, %st(1) /* t2 : t2+2*xm : u : xm */
194 leal (%edx,%edx,2),%edx
195 fadd %st(0) /* 2*t2 : t2+2*xm : u : xm */
196 subl %edx, %ecx
197 faddp %st, %st(3) /* t2+2*xm : u : 2*t2+xm */
0413b54c 198 shll $4, %ecx
26dee9c4
UD
199 fmulp /* u*(t2+2*xm) : 2*t2+xm */
200 fdivp %st, %st(1) /* u*(t2+2*xm)/(2*t2+xm) */
0413b54c
UD
201 fldt MOX(32+factor,%ecx)
202 fmulp /* u*(t2+2*xm)/(2*t2+xm)*FACT */
26dee9c4 203 pushl %eax
fee732e5 204 cfi_adjust_cfa_offset (4)
26dee9c4
UD
205 fildl (%esp) /* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */
206 fxch /* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */
26dee9c4 207 fscale /* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */
0413b54c 208 popl %edx
fee732e5 209 cfi_adjust_cfa_offset (-4)
26dee9c4 210#ifdef PIC
0413b54c 211 movl 16(%esp), %eax
26dee9c4 212 popl %ebx
fee732e5
UD
213 cfi_adjust_cfa_offset (-4)
214 cfi_restore (ebx)
0413b54c
UD
215#else
216 movl 12(%esp), %eax
26dee9c4 217#endif
0413b54c
UD
218 testl $0x8000, %eax
219 fstp %st(1)
26dee9c4
UD
220 jz 4f
221 fchs
2224: ret
223
224 /* Return the argument. */
2251: fldt 4(%esp)
226 ret
227END(__cbrtl)
228weak_alias (__cbrtl, cbrtl)