]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/i386/fpu/s_cbrtl.S
(CFLAGS-tst-align.c): Add -mpreferred-stack-boundary=4.
[thirdparty/glibc.git] / sysdeps / i386 / fpu / s_cbrtl.S
1 /* Compute cubic root of long double value.
2 Copyright (C) 1997 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Dirk Alboth <dirka@uni-paderborn.de> and
5 Ulrich Drepper <drepper@cygnus.com>, 1997.
6
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
11
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with the GNU C Library; if not, write to the Free
19 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20 02111-1307 USA. */
21
22 #include <machine/asm.h>
23
24 #ifdef __ELF__
25 .section .rodata
26 #else
27 .text
28 #endif
29
30 .align ALIGNARG(4)
31 ASM_TYPE_DIRECTIVE(f8,@object)
32 f8: .tfloat 0.161617097923756032
33 ASM_SIZE_DIRECTIVE(f8)
34 .align ALIGNARG(4)
35 ASM_TYPE_DIRECTIVE(f7,@object)
36 f7: .tfloat -0.988553671195413709
37 ASM_SIZE_DIRECTIVE(f7)
38 .align ALIGNARG(4)
39 ASM_TYPE_DIRECTIVE(f6,@object)
40 f6: .tfloat 2.65298938441952296
41 ASM_SIZE_DIRECTIVE(f6)
42 .align ALIGNARG(4)
43 ASM_TYPE_DIRECTIVE(f5,@object)
44 f5: .tfloat -4.11151425200350531
45 ASM_SIZE_DIRECTIVE(f5)
46 .align ALIGNARG(4)
47 ASM_TYPE_DIRECTIVE(f4,@object)
48 f4: .tfloat 4.09559907378707839
49 ASM_SIZE_DIRECTIVE(f4)
50 .align ALIGNARG(4)
51 ASM_TYPE_DIRECTIVE(f3,@object)
52 f3: .tfloat -2.82414939754975962
53 ASM_SIZE_DIRECTIVE(f3)
54 .align ALIGNARG(4)
55 ASM_TYPE_DIRECTIVE(f2,@object)
56 f2: .tfloat 1.67595307700780102
57 ASM_SIZE_DIRECTIVE(f2)
58 .align ALIGNARG(4)
59 ASM_TYPE_DIRECTIVE(f1,@object)
60 f1: .tfloat 0.338058687610520237
61 ASM_SIZE_DIRECTIVE(f1)
62
63 #define CBRT2 1.2599210498948731648
64 #define ONE_CBRT2 0.793700525984099737355196796584
65 #define SQR_CBRT2 1.5874010519681994748
66 #define ONE_SQR_CBRT2 0.629960524947436582364439673883
67
68 /* We make the entries in the following table all 16 bytes
69 wide to avoid having to implement a multiplication by 10. */
70 ASM_TYPE_DIRECTIVE(factor,@object)
71 .align ALIGNARG(4)
72 factor: .tfloat ONE_SQR_CBRT2
73 .byte 0, 0, 0, 0, 0, 0
74 .tfloat ONE_CBRT2
75 .byte 0, 0, 0, 0, 0, 0
76 .tfloat 1.0
77 .byte 0, 0, 0, 0, 0, 0
78 .tfloat CBRT2
79 .byte 0, 0, 0, 0, 0, 0
80 .tfloat SQR_CBRT2
81 ASM_SIZE_DIRECTIVE(factor)
82
83 ASM_TYPE_DIRECTIVE(two64,@object)
84 .align ALIGNARG(4)
85 two64: .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x43
86 ASM_SIZE_DIRECTIVE(two64)
87
88 #ifdef PIC
89 #define MO(op) op##@GOTOFF(%ebx)
90 #define MOX(op,x) op##@GOTOFF(%ebx,x,1)
91 #else
92 #define MO(op) op
93 #define MOX(op,x) op(x)
94 #endif
95
96 .text
97 ENTRY(__cbrtl)
98 movl 4(%esp), %ecx
99 movl 12(%esp), %eax
100 orl 8(%esp), %ecx
101 movl %eax, %edx
102 andl $0x7fff, %eax
103 orl %eax, %ecx
104 jz 1f
105 xorl %ecx, %ecx
106 cmpl $0x7fff, %eax
107 je 1f
108
109 #ifdef PIC
110 pushl %ebx
111 call 3f
112 3: popl %ebx
113 addl $_GLOBAL_OFFSET_TABLE_+[.-3b], %ebx
114 #endif
115
116 cmpl $0, %eax
117 jne 2f
118
119 #ifdef PIC
120 fldt 8(%esp)
121 #else
122 fldt 4(%esp)
123 #endif
124 fmull MO(two64)
125 movl $-64, %ecx
126 #ifdef PIC
127 fstpt 8(%esp)
128 movl 16(%esp), %eax
129 #else
130 fstpt 4(%esp)
131 movl 12(%esp), %eax
132 #endif
133 movl %eax, %edx
134 andl $0x7fff, %eax
135
136 2: andl $0x8000, %edx
137 subl $16382, %eax
138 orl $0x3ffe, %edx
139 addl %eax, %ecx
140 #ifdef PIC
141 movl %edx, 16(%esp)
142
143 fldt 8(%esp) /* xm */
144 #else
145 movl %edx, 12(%esp)
146
147 fldt 4(%esp) /* xm */
148 #endif
149 fabs
150
151 /* The following code has two tracks:
152 a) compute the normalized cbrt value
153 b) compute xe/3 and xe%3
154 The right track computes the value for b) and this is done
155 in an optimized way by avoiding division.
156
157 But why two tracks at all? Very easy: efficiency. Some FP
158 instruction can overlap with a certain amount of integer (and
159 FP) instructions. So we get (except for the imull) all
160 instructions for free. */
161
162 fldt MO(f8) /* f8 : xm */
163 fmul %st(1) /* f8*xm : xm */
164
165 fldt MO(f7)
166 faddp /* f7+f8*xm : xm */
167 fmul %st(1) /* (f7+f8*xm)*xm : xm */
168 movl $1431655766, %eax
169 fldt MO(f6)
170 faddp /* f6+(f7+f8*xm)*xm : xm */
171 imull %ecx
172 fmul %st(1) /* (f6+(f7+f8*xm)*xm)*xm : xm */
173 movl %ecx, %eax
174 fldt MO(f5)
175 faddp /* f5+(f6+(f7+f8*xm)*xm)*xm : xm */
176 sarl $31, %eax
177 fmul %st(1) /* (f5+(f6+(f7+f8*xm)*xm)*xm)*xm : xm */
178 subl %eax, %edx
179 fldt MO(f4)
180 faddp /* f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm : xm */
181 fmul %st(1) /* (f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm : xm */
182 fldt MO(f3)
183 faddp /* f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm : xm */
184 fmul %st(1) /* (f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm)*xm : xm */
185 fldt MO(f2)
186 faddp /* f2+(f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm)*xm : xm */
187 fmul %st(1) /* (f2+(f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm)*xm)*xm : xm */
188 fldt MO(f1)
189 faddp /* u:=f1+(f2+(f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm)*xm)*xm : xm */
190
191 fld %st /* u : u : xm */
192 fmul %st(1) /* u*u : u : xm */
193 fld %st(2) /* xm : u*u : u : xm */
194 fadd %st /* 2*xm : u*u : u : xm */
195 fxch %st(1) /* u*u : 2*xm : u : xm */
196 fmul %st(2) /* t2:=u*u*u : 2*xm : u : xm */
197 movl %edx, %eax
198 fadd %st, %st(1) /* t2 : t2+2*xm : u : xm */
199 leal (%edx,%edx,2),%edx
200 fadd %st(0) /* 2*t2 : t2+2*xm : u : xm */
201 subl %edx, %ecx
202 faddp %st, %st(3) /* t2+2*xm : u : 2*t2+xm */
203 shll $4, %ecx
204 fmulp /* u*(t2+2*xm) : 2*t2+xm */
205 fdivp %st, %st(1) /* u*(t2+2*xm)/(2*t2+xm) */
206 fldt MOX(32+factor,%ecx)
207 fmulp /* u*(t2+2*xm)/(2*t2+xm)*FACT */
208 pushl %eax
209 fildl (%esp) /* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */
210 fxch /* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */
211 fscale /* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */
212 popl %edx
213 #ifdef PIC
214 movl 16(%esp), %eax
215 popl %ebx
216 #else
217 movl 12(%esp), %eax
218 #endif
219 testl $0x8000, %eax
220 fstp %st(1)
221 jz 4f
222 fchs
223 4: ret
224
225 /* Return the argument. */
226 1: fldt 4(%esp)
227 ret
228 END(__cbrtl)
229 weak_alias (__cbrtl, cbrtl)