]>
Commit | Line | Data |
---|---|---|
26dee9c4 | 1 | /* Compute cubic root of long double value. |
b168057a | 2 | Copyright (C) 1997-2015 Free Software Foundation, Inc. |
26dee9c4 UD |
3 | This file is part of the GNU C Library. |
4 | Contributed by Dirk Alboth <dirka@uni-paderborn.de> and | |
5 | Ulrich Drepper <drepper@cygnus.com>, 1997. | |
6 | ||
7 | The GNU C Library is free software; you can redistribute it and/or | |
41bdb6e2 AJ |
8 | modify it under the terms of the GNU Lesser General Public |
9 | License as published by the Free Software Foundation; either | |
10 | version 2.1 of the License, or (at your option) any later version. | |
26dee9c4 UD |
11 | |
12 | The GNU C Library is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
41bdb6e2 | 15 | Lesser General Public License for more details. |
26dee9c4 | 16 | |
41bdb6e2 | 17 | You should have received a copy of the GNU Lesser General Public |
59ba27a6 PE |
18 | License along with the GNU C Library; if not, see |
19 | <http://www.gnu.org/licenses/>. */ | |
26dee9c4 UD |
20 | |
21 | #include <machine/asm.h> | |
22 | ||
26dee9c4 | 23 | .section .rodata |
26dee9c4 UD |
24 | |
25 | .align ALIGNARG(4) | |
b67e9372 | 26 | .type f8,@object |
0413b54c | 27 | f8: .tfloat 0.161617097923756032 |
26dee9c4 | 28 | ASM_SIZE_DIRECTIVE(f8) |
0413b54c | 29 | .align ALIGNARG(4) |
b67e9372 | 30 | .type f7,@object |
0413b54c UD |
31 | f7: .tfloat -0.988553671195413709 |
32 | ASM_SIZE_DIRECTIVE(f7) | |
33 | .align ALIGNARG(4) | |
b67e9372 | 34 | .type f6,@object |
0413b54c UD |
35 | f6: .tfloat 2.65298938441952296 |
36 | ASM_SIZE_DIRECTIVE(f6) | |
37 | .align ALIGNARG(4) | |
b67e9372 | 38 | .type f5,@object |
0413b54c UD |
39 | f5: .tfloat -4.11151425200350531 |
40 | ASM_SIZE_DIRECTIVE(f5) | |
41 | .align ALIGNARG(4) | |
b67e9372 | 42 | .type f4,@object |
0413b54c UD |
43 | f4: .tfloat 4.09559907378707839 |
44 | ASM_SIZE_DIRECTIVE(f4) | |
45 | .align ALIGNARG(4) | |
b67e9372 | 46 | .type f3,@object |
0413b54c UD |
47 | f3: .tfloat -2.82414939754975962 |
48 | ASM_SIZE_DIRECTIVE(f3) | |
49 | .align ALIGNARG(4) | |
b67e9372 | 50 | .type f2,@object |
0413b54c UD |
51 | f2: .tfloat 1.67595307700780102 |
52 | ASM_SIZE_DIRECTIVE(f2) | |
53 | .align ALIGNARG(4) | |
b67e9372 | 54 | .type f1,@object |
0413b54c UD |
55 | f1: .tfloat 0.338058687610520237 |
56 | ASM_SIZE_DIRECTIVE(f1) | |
26dee9c4 | 57 | |
0413b54c UD |
58 | #define CBRT2 1.2599210498948731648 |
59 | #define ONE_CBRT2 0.793700525984099737355196796584 | |
60 | #define SQR_CBRT2 1.5874010519681994748 | |
61 | #define ONE_SQR_CBRT2 0.629960524947436582364439673883 | |
26dee9c4 | 62 | |
0413b54c UD |
63 | /* We make the entries in the following table all 16 bytes |
64 | wide to avoid having to implement a multiplication by 10. */ | |
b67e9372 | 65 | .type factor,@object |
0413b54c UD |
66 | .align ALIGNARG(4) |
67 | factor: .tfloat ONE_SQR_CBRT2 | |
68 | .byte 0, 0, 0, 0, 0, 0 | |
69 | .tfloat ONE_CBRT2 | |
70 | .byte 0, 0, 0, 0, 0, 0 | |
71 | .tfloat 1.0 | |
72 | .byte 0, 0, 0, 0, 0, 0 | |
73 | .tfloat CBRT2 | |
74 | .byte 0, 0, 0, 0, 0, 0 | |
75 | .tfloat SQR_CBRT2 | |
26dee9c4 UD |
76 | ASM_SIZE_DIRECTIVE(factor) |
77 | ||
b67e9372 | 78 | .type two64,@object |
0413b54c | 79 | .align ALIGNARG(4) |
26dee9c4 UD |
80 | two64: .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x43 |
81 | ASM_SIZE_DIRECTIVE(two64) | |
82 | ||
83 | #ifdef PIC | |
84 | #define MO(op) op##@GOTOFF(%ebx) | |
0413b54c | 85 | #define MOX(op,x) op##@GOTOFF(%ebx,x,1) |
26dee9c4 UD |
86 | #else |
87 | #define MO(op) op | |
0413b54c | 88 | #define MOX(op,x) op(x) |
26dee9c4 UD |
89 | #endif |
90 | ||
91 | .text | |
92 | ENTRY(__cbrtl) | |
93 | movl 4(%esp), %ecx | |
94 | movl 12(%esp), %eax | |
95 | orl 8(%esp), %ecx | |
96 | movl %eax, %edx | |
97 | andl $0x7fff, %eax | |
98 | orl %eax, %ecx | |
99 | jz 1f | |
100 | xorl %ecx, %ecx | |
101 | cmpl $0x7fff, %eax | |
102 | je 1f | |
103 | ||
104 | #ifdef PIC | |
105 | pushl %ebx | |
fee732e5 UD |
106 | cfi_adjust_cfa_offset (4) |
107 | cfi_rel_offset (ebx, 0) | |
108 | LOAD_PIC_REG (bx) | |
26dee9c4 UD |
109 | #endif |
110 | ||
111 | cmpl $0, %eax | |
0413b54c | 112 | jne 2f |
26dee9c4 UD |
113 | |
114 | #ifdef PIC | |
115 | fldt 8(%esp) | |
116 | #else | |
117 | fldt 4(%esp) | |
118 | #endif | |
119 | fmull MO(two64) | |
120 | movl $-64, %ecx | |
0413b54c UD |
121 | #ifdef PIC |
122 | fstpt 8(%esp) | |
123 | movl 16(%esp), %eax | |
124 | #else | |
26dee9c4 UD |
125 | fstpt 4(%esp) |
126 | movl 12(%esp), %eax | |
0413b54c | 127 | #endif |
26dee9c4 UD |
128 | movl %eax, %edx |
129 | andl $0x7fff, %eax | |
130 | ||
131 | 2: andl $0x8000, %edx | |
132 | subl $16382, %eax | |
133 | orl $0x3ffe, %edx | |
134 | addl %eax, %ecx | |
135 | #ifdef PIC | |
136 | movl %edx, 16(%esp) | |
137 | ||
138 | fldt 8(%esp) /* xm */ | |
139 | #else | |
140 | movl %edx, 12(%esp) | |
141 | ||
142 | fldt 4(%esp) /* xm */ | |
143 | #endif | |
144 | fabs | |
145 | ||
0413b54c | 146 | /* The following code has two tracks: |
26dee9c4 UD |
147 | a) compute the normalized cbrt value |
148 | b) compute xe/3 and xe%3 | |
149 | The right track computes the value for b) and this is done | |
0413b54c | 150 | in an optimized way by avoiding division. |
26dee9c4 | 151 | |
0413b54c UD |
152 | But why two tracks at all? Very easy: efficiency. Some FP |
153 | instruction can overlap with a certain amount of integer (and | |
154 | FP) instructions. So we get (except for the imull) all | |
155 | instructions for free. */ | |
26dee9c4 | 156 | |
0413b54c UD |
157 | fldt MO(f8) /* f8 : xm */ |
158 | fmul %st(1) /* f8*xm : xm */ | |
159 | ||
160 | fldt MO(f7) | |
161 | faddp /* f7+f8*xm : xm */ | |
162 | fmul %st(1) /* (f7+f8*xm)*xm : xm */ | |
26dee9c4 | 163 | movl $1431655766, %eax |
0413b54c UD |
164 | fldt MO(f6) |
165 | faddp /* f6+(f7+f8*xm)*xm : xm */ | |
26dee9c4 | 166 | imull %ecx |
0413b54c | 167 | fmul %st(1) /* (f6+(f7+f8*xm)*xm)*xm : xm */ |
26dee9c4 | 168 | movl %ecx, %eax |
0413b54c UD |
169 | fldt MO(f5) |
170 | faddp /* f5+(f6+(f7+f8*xm)*xm)*xm : xm */ | |
26dee9c4 | 171 | sarl $31, %eax |
0413b54c | 172 | fmul %st(1) /* (f5+(f6+(f7+f8*xm)*xm)*xm)*xm : xm */ |
26dee9c4 | 173 | subl %eax, %edx |
0413b54c UD |
174 | fldt MO(f4) |
175 | faddp /* f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm : xm */ | |
176 | fmul %st(1) /* (f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm : xm */ | |
177 | fldt MO(f3) | |
178 | faddp /* f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm : xm */ | |
179 | fmul %st(1) /* (f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm)*xm : xm */ | |
180 | fldt MO(f2) | |
181 | faddp /* f2+(f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm)*xm : xm */ | |
182 | fmul %st(1) /* (f2+(f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm)*xm)*xm : xm */ | |
183 | fldt MO(f1) | |
184 | faddp /* u:=f1+(f2+(f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm)*xm)*xm : xm */ | |
26dee9c4 UD |
185 | |
186 | fld %st /* u : u : xm */ | |
187 | fmul %st(1) /* u*u : u : xm */ | |
188 | fld %st(2) /* xm : u*u : u : xm */ | |
189 | fadd %st /* 2*xm : u*u : u : xm */ | |
190 | fxch %st(1) /* u*u : 2*xm : u : xm */ | |
191 | fmul %st(2) /* t2:=u*u*u : 2*xm : u : xm */ | |
192 | movl %edx, %eax | |
193 | fadd %st, %st(1) /* t2 : t2+2*xm : u : xm */ | |
194 | leal (%edx,%edx,2),%edx | |
195 | fadd %st(0) /* 2*t2 : t2+2*xm : u : xm */ | |
196 | subl %edx, %ecx | |
197 | faddp %st, %st(3) /* t2+2*xm : u : 2*t2+xm */ | |
0413b54c | 198 | shll $4, %ecx |
26dee9c4 UD |
199 | fmulp /* u*(t2+2*xm) : 2*t2+xm */ |
200 | fdivp %st, %st(1) /* u*(t2+2*xm)/(2*t2+xm) */ | |
0413b54c UD |
201 | fldt MOX(32+factor,%ecx) |
202 | fmulp /* u*(t2+2*xm)/(2*t2+xm)*FACT */ | |
26dee9c4 | 203 | pushl %eax |
fee732e5 | 204 | cfi_adjust_cfa_offset (4) |
26dee9c4 UD |
205 | fildl (%esp) /* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */ |
206 | fxch /* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */ | |
26dee9c4 | 207 | fscale /* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */ |
0413b54c | 208 | popl %edx |
fee732e5 | 209 | cfi_adjust_cfa_offset (-4) |
26dee9c4 | 210 | #ifdef PIC |
0413b54c | 211 | movl 16(%esp), %eax |
26dee9c4 | 212 | popl %ebx |
fee732e5 UD |
213 | cfi_adjust_cfa_offset (-4) |
214 | cfi_restore (ebx) | |
0413b54c UD |
215 | #else |
216 | movl 12(%esp), %eax | |
26dee9c4 | 217 | #endif |
0413b54c UD |
218 | testl $0x8000, %eax |
219 | fstp %st(1) | |
26dee9c4 UD |
220 | jz 4f |
221 | fchs | |
222 | 4: ret | |
223 | ||
224 | /* Return the argument. */ | |
225 | 1: fldt 4(%esp) | |
226 | ret | |
227 | END(__cbrtl) | |
228 | weak_alias (__cbrtl, cbrtl) |