]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/i386/fpu/s_cbrtl.S
Update copyright dates with scripts/update-copyrights.
[thirdparty/glibc.git] / sysdeps / i386 / fpu / s_cbrtl.S
1 /* Compute cubic root of long double value.
2 Copyright (C) 1997-2015 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Dirk Alboth <dirka@uni-paderborn.de> and
5 Ulrich Drepper <drepper@cygnus.com>, 1997.
6
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
11
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with the GNU C Library; if not, see
19 <http://www.gnu.org/licenses/>. */
20
21 #include <machine/asm.h>
22
23 .section .rodata
24
25 .align ALIGNARG(4)
26 .type f8,@object
27 f8: .tfloat 0.161617097923756032
28 ASM_SIZE_DIRECTIVE(f8)
29 .align ALIGNARG(4)
30 .type f7,@object
31 f7: .tfloat -0.988553671195413709
32 ASM_SIZE_DIRECTIVE(f7)
33 .align ALIGNARG(4)
34 .type f6,@object
35 f6: .tfloat 2.65298938441952296
36 ASM_SIZE_DIRECTIVE(f6)
37 .align ALIGNARG(4)
38 .type f5,@object
39 f5: .tfloat -4.11151425200350531
40 ASM_SIZE_DIRECTIVE(f5)
41 .align ALIGNARG(4)
42 .type f4,@object
43 f4: .tfloat 4.09559907378707839
44 ASM_SIZE_DIRECTIVE(f4)
45 .align ALIGNARG(4)
46 .type f3,@object
47 f3: .tfloat -2.82414939754975962
48 ASM_SIZE_DIRECTIVE(f3)
49 .align ALIGNARG(4)
50 .type f2,@object
51 f2: .tfloat 1.67595307700780102
52 ASM_SIZE_DIRECTIVE(f2)
53 .align ALIGNARG(4)
54 .type f1,@object
55 f1: .tfloat 0.338058687610520237
56 ASM_SIZE_DIRECTIVE(f1)
57
58 #define CBRT2 1.2599210498948731648
59 #define ONE_CBRT2 0.793700525984099737355196796584
60 #define SQR_CBRT2 1.5874010519681994748
61 #define ONE_SQR_CBRT2 0.629960524947436582364439673883
62
63 /* We make the entries in the following table all 16 bytes
64 wide to avoid having to implement a multiplication by 10. */
65 .type factor,@object
66 .align ALIGNARG(4)
67 factor: .tfloat ONE_SQR_CBRT2
68 .byte 0, 0, 0, 0, 0, 0
69 .tfloat ONE_CBRT2
70 .byte 0, 0, 0, 0, 0, 0
71 .tfloat 1.0
72 .byte 0, 0, 0, 0, 0, 0
73 .tfloat CBRT2
74 .byte 0, 0, 0, 0, 0, 0
75 .tfloat SQR_CBRT2
76 ASM_SIZE_DIRECTIVE(factor)
77
78 .type two64,@object
79 .align ALIGNARG(4)
80 two64: .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x43
81 ASM_SIZE_DIRECTIVE(two64)
82
83 #ifdef PIC
84 #define MO(op) op##@GOTOFF(%ebx)
85 #define MOX(op,x) op##@GOTOFF(%ebx,x,1)
86 #else
87 #define MO(op) op
88 #define MOX(op,x) op(x)
89 #endif
90
91 .text
92 ENTRY(__cbrtl)
93 movl 4(%esp), %ecx
94 movl 12(%esp), %eax
95 orl 8(%esp), %ecx
96 movl %eax, %edx
97 andl $0x7fff, %eax
98 orl %eax, %ecx
99 jz 1f
100 xorl %ecx, %ecx
101 cmpl $0x7fff, %eax
102 je 1f
103
104 #ifdef PIC
105 pushl %ebx
106 cfi_adjust_cfa_offset (4)
107 cfi_rel_offset (ebx, 0)
108 LOAD_PIC_REG (bx)
109 #endif
110
111 cmpl $0, %eax
112 jne 2f
113
114 #ifdef PIC
115 fldt 8(%esp)
116 #else
117 fldt 4(%esp)
118 #endif
119 fmull MO(two64)
120 movl $-64, %ecx
121 #ifdef PIC
122 fstpt 8(%esp)
123 movl 16(%esp), %eax
124 #else
125 fstpt 4(%esp)
126 movl 12(%esp), %eax
127 #endif
128 movl %eax, %edx
129 andl $0x7fff, %eax
130
131 2: andl $0x8000, %edx
132 subl $16382, %eax
133 orl $0x3ffe, %edx
134 addl %eax, %ecx
135 #ifdef PIC
136 movl %edx, 16(%esp)
137
138 fldt 8(%esp) /* xm */
139 #else
140 movl %edx, 12(%esp)
141
142 fldt 4(%esp) /* xm */
143 #endif
144 fabs
145
146 /* The following code has two tracks:
147 a) compute the normalized cbrt value
148 b) compute xe/3 and xe%3
149 The right track computes the value for b) and this is done
150 in an optimized way by avoiding division.
151
152 But why two tracks at all? Very easy: efficiency. Some FP
153 instruction can overlap with a certain amount of integer (and
154 FP) instructions. So we get (except for the imull) all
155 instructions for free. */
156
157 fldt MO(f8) /* f8 : xm */
158 fmul %st(1) /* f8*xm : xm */
159
160 fldt MO(f7)
161 faddp /* f7+f8*xm : xm */
162 fmul %st(1) /* (f7+f8*xm)*xm : xm */
163 movl $1431655766, %eax
164 fldt MO(f6)
165 faddp /* f6+(f7+f8*xm)*xm : xm */
166 imull %ecx
167 fmul %st(1) /* (f6+(f7+f8*xm)*xm)*xm : xm */
168 movl %ecx, %eax
169 fldt MO(f5)
170 faddp /* f5+(f6+(f7+f8*xm)*xm)*xm : xm */
171 sarl $31, %eax
172 fmul %st(1) /* (f5+(f6+(f7+f8*xm)*xm)*xm)*xm : xm */
173 subl %eax, %edx
174 fldt MO(f4)
175 faddp /* f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm : xm */
176 fmul %st(1) /* (f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm : xm */
177 fldt MO(f3)
178 faddp /* f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm : xm */
179 fmul %st(1) /* (f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm)*xm : xm */
180 fldt MO(f2)
181 faddp /* f2+(f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm)*xm : xm */
182 fmul %st(1) /* (f2+(f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm)*xm)*xm : xm */
183 fldt MO(f1)
184 faddp /* u:=f1+(f2+(f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm)*xm)*xm : xm */
185
186 fld %st /* u : u : xm */
187 fmul %st(1) /* u*u : u : xm */
188 fld %st(2) /* xm : u*u : u : xm */
189 fadd %st /* 2*xm : u*u : u : xm */
190 fxch %st(1) /* u*u : 2*xm : u : xm */
191 fmul %st(2) /* t2:=u*u*u : 2*xm : u : xm */
192 movl %edx, %eax
193 fadd %st, %st(1) /* t2 : t2+2*xm : u : xm */
194 leal (%edx,%edx,2),%edx
195 fadd %st(0) /* 2*t2 : t2+2*xm : u : xm */
196 subl %edx, %ecx
197 faddp %st, %st(3) /* t2+2*xm : u : 2*t2+xm */
198 shll $4, %ecx
199 fmulp /* u*(t2+2*xm) : 2*t2+xm */
200 fdivp %st, %st(1) /* u*(t2+2*xm)/(2*t2+xm) */
201 fldt MOX(32+factor,%ecx)
202 fmulp /* u*(t2+2*xm)/(2*t2+xm)*FACT */
203 pushl %eax
204 cfi_adjust_cfa_offset (4)
205 fildl (%esp) /* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */
206 fxch /* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */
207 fscale /* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */
208 popl %edx
209 cfi_adjust_cfa_offset (-4)
210 #ifdef PIC
211 movl 16(%esp), %eax
212 popl %ebx
213 cfi_adjust_cfa_offset (-4)
214 cfi_restore (ebx)
215 #else
216 movl 12(%esp), %eax
217 #endif
218 testl $0x8000, %eax
219 fstp %st(1)
220 jz 4f
221 fchs
222 4: ret
223
224 /* Return the argument. */
225 1: fldt 4(%esp)
226 ret
227 END(__cbrtl)
228 weak_alias (__cbrtl, cbrtl)