]>
Commit | Line | Data |
---|---|---|
26dee9c4 | 1 | /* Compute cubic root of double value. |
dff8da6b | 2 | Copyright (C) 1997-2024 Free Software Foundation, Inc. |
26dee9c4 | 3 | This file is part of the GNU C Library. |
26dee9c4 UD |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or | |
41bdb6e2 AJ |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
26dee9c4 UD |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
41bdb6e2 | 13 | Lesser General Public License for more details. |
26dee9c4 | 14 | |
41bdb6e2 | 15 | You should have received a copy of the GNU Lesser General Public |
59ba27a6 | 16 | License along with the GNU C Library; if not, see |
5a82c748 | 17 | <https://www.gnu.org/licenses/>. */ |
26dee9c4 UD |
18 | |
19 | #include <machine/asm.h> | |
bc4e8f9b | 20 | #include <libm-alias-double.h> |
26dee9c4 | 21 | |
26dee9c4 | 22 | .section .rodata |
26dee9c4 UD |
23 | |
24 | .align ALIGNARG(4) | |
b67e9372 | 25 | .type f7,@object |
26dee9c4 UD |
26 | f7: .double -0.145263899385486377 |
27 | ASM_SIZE_DIRECTIVE(f7) | |
b67e9372 | 28 | .type f6,@object |
0413b54c UD |
29 | f6: .double 0.784932344976639262 |
30 | ASM_SIZE_DIRECTIVE(f6) | |
b67e9372 | 31 | .type f5,@object |
0413b54c UD |
32 | f5: .double -1.83469277483613086 |
33 | ASM_SIZE_DIRECTIVE(f5) | |
b67e9372 | 34 | .type f4,@object |
0413b54c UD |
35 | f4: .double 2.44693122563534430 |
36 | ASM_SIZE_DIRECTIVE(f4) | |
b67e9372 | 37 | .type f3,@object |
0413b54c UD |
38 | f3: .double -2.11499494167371287 |
39 | ASM_SIZE_DIRECTIVE(f3) | |
b67e9372 | 40 | .type f2,@object |
0413b54c UD |
41 | f2: .double 1.50819193781584896 |
42 | ASM_SIZE_DIRECTIVE(f2) | |
b67e9372 | 43 | .type f1,@object |
0413b54c UD |
44 | f1: .double 0.354895765043919860 |
45 | ASM_SIZE_DIRECTIVE(f1) | |
26dee9c4 | 46 | |
0413b54c UD |
47 | #define CBRT2 1.2599210498948731648 |
48 | #define ONE_CBRT2 0.793700525984099737355196796584 | |
49 | #define SQR_CBRT2 1.5874010519681994748 | |
50 | #define ONE_SQR_CBRT2 0.629960524947436582364439673883 | |
26dee9c4 | 51 | |
b67e9372 | 52 | .type factor,@object |
0413b54c UD |
53 | factor: .double ONE_SQR_CBRT2 |
54 | .double ONE_CBRT2 | |
26dee9c4 UD |
55 | .double 1.0 |
56 | .double CBRT2 | |
57 | .double SQR_CBRT2 | |
58 | ASM_SIZE_DIRECTIVE(factor) | |
59 | ||
b67e9372 | 60 | .type two54,@object |
26dee9c4 UD |
61 | two54: .byte 0, 0, 0, 0, 0, 0, 0x50, 0x43 |
62 | ASM_SIZE_DIRECTIVE(two54) | |
63 | ||
64 | #ifdef PIC | |
65 | #define MO(op) op##@GOTOFF(%ebx) | |
0413b54c | 66 | #define MOX(op,x) op##@GOTOFF(%ebx,x,1) |
26dee9c4 UD |
67 | #else |
68 | #define MO(op) op | |
0413b54c | 69 | #define MOX(op,x) op(x) |
26dee9c4 UD |
70 | #endif |
71 | ||
72 | .text | |
73 | ENTRY(__cbrt) | |
74 | movl 4(%esp), %ecx | |
75 | movl 8(%esp), %eax | |
76 | movl %eax, %edx | |
77 | andl $0x7fffffff, %eax | |
78 | orl %eax, %ecx | |
79 | jz 1f | |
80 | xorl %ecx, %ecx | |
81 | cmpl $0x7ff00000, %eax | |
82 | jae 1f | |
83 | ||
84 | #ifdef PIC | |
85 | pushl %ebx | |
fee732e5 UD |
86 | cfi_adjust_cfa_offset (4) |
87 | cfi_rel_offset (ebx, 0) | |
88 | LOAD_PIC_REG (bx) | |
26dee9c4 UD |
89 | #endif |
90 | ||
91 | cmpl $0x00100000, %eax | |
92 | jae 2f | |
93 | ||
94 | #ifdef PIC | |
95 | fldl 8(%esp) | |
96 | #else | |
97 | fldl 4(%esp) | |
98 | #endif | |
99 | fmull MO(two54) | |
100 | movl $-54, %ecx | |
0413b54c UD |
101 | #ifdef PIC |
102 | fstpl 8(%esp) | |
103 | movl 12(%esp), %eax | |
104 | #else | |
26dee9c4 UD |
105 | fstpl 4(%esp) |
106 | movl 8(%esp), %eax | |
0413b54c | 107 | #endif |
26dee9c4 UD |
108 | movl %eax, %edx |
109 | andl $0x7fffffff, %eax | |
110 | ||
111 | 2: shrl $20, %eax | |
112 | andl $0x800fffff, %edx | |
113 | subl $1022, %eax | |
114 | orl $0x3fe00000, %edx | |
115 | addl %eax, %ecx | |
116 | #ifdef PIC | |
117 | movl %edx, 12(%esp) | |
118 | ||
119 | fldl 8(%esp) /* xm */ | |
120 | #else | |
121 | movl %edx, 8(%esp) | |
122 | ||
123 | fldl 4(%esp) /* xm */ | |
124 | #endif | |
125 | fabs | |
126 | ||
0413b54c | 127 | /* The following code has two tracks: |
26dee9c4 UD |
128 | a) compute the normalized cbrt value |
129 | b) compute xe/3 and xe%3 | |
130 | The right track computes the value for b) and this is done | |
0413b54c UD |
131 | in an optimized way by avoiding division. |
132 | ||
133 | But why two tracks at all? Very easy: efficiency. Some FP | |
134 | instruction can overlap with a certain amount of integer (and | |
135 | FP) instructions. So we get (except for the imull) all | |
136 | instructions for free. */ | |
26dee9c4 UD |
137 | |
138 | fld %st(0) /* xm : xm */ | |
139 | ||
140 | fmull MO(f7) /* f7*xm : xm */ | |
141 | movl $1431655766, %eax | |
142 | faddl MO(f6) /* f6+f7*xm : xm */ | |
143 | imull %ecx | |
144 | fmul %st(1) /* (f6+f7*xm)*xm : xm */ | |
145 | movl %ecx, %eax | |
146 | faddl MO(f5) /* f5+(f6+f7*xm)*xm : xm */ | |
147 | sarl $31, %eax | |
148 | fmul %st(1) /* (f5+(f6+f7*xm)*xm)*xm : xm */ | |
149 | subl %eax, %edx | |
150 | faddl MO(f4) /* f4+(f5+(f6+f7*xm)*xm)*xm : xm */ | |
151 | fmul %st(1) /* (f4+(f5+(f6+f7*xm)*xm)*xm)*xm : xm */ | |
152 | faddl MO(f3) /* f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm : xm */ | |
153 | fmul %st(1) /* (f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm : xm */ | |
154 | faddl MO(f2) /* f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm : xm */ | |
155 | fmul %st(1) /* (f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm)*xm : xm */ | |
156 | faddl MO(f1) /* u:=f1+(f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm)*xm : xm */ | |
157 | ||
158 | fld %st /* u : u : xm */ | |
159 | fmul %st(1) /* u*u : u : xm */ | |
160 | fld %st(2) /* xm : u*u : u : xm */ | |
161 | fadd %st /* 2*xm : u*u : u : xm */ | |
162 | fxch %st(1) /* u*u : 2*xm : u : xm */ | |
163 | fmul %st(2) /* t2:=u*u*u : 2*xm : u : xm */ | |
164 | movl %edx, %eax | |
165 | fadd %st, %st(1) /* t2 : t2+2*xm : u : xm */ | |
166 | leal (%edx,%edx,2),%edx | |
167 | fadd %st(0) /* 2*t2 : t2+2*xm : u : xm */ | |
168 | subl %edx, %ecx | |
169 | faddp %st, %st(3) /* t2+2*xm : u : 2*t2+xm */ | |
0413b54c | 170 | shll $3, %ecx |
26dee9c4 UD |
171 | fmulp /* u*(t2+2*xm) : 2*t2+xm */ |
172 | fdivp %st, %st(1) /* u*(t2+2*xm)/(2*t2+xm) */ | |
0413b54c | 173 | fmull MOX(16+factor,%ecx) /* u*(t2+2*xm)/(2*t2+xm)*FACT */ |
26dee9c4 | 174 | pushl %eax |
fee732e5 | 175 | cfi_adjust_cfa_offset (4) |
26dee9c4 UD |
176 | fildl (%esp) /* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */ |
177 | fxch /* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */ | |
26dee9c4 | 178 | fscale /* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */ |
0413b54c | 179 | popl %edx |
fee732e5 | 180 | cfi_adjust_cfa_offset (-4) |
26dee9c4 | 181 | #ifdef PIC |
0413b54c | 182 | movl 12(%esp), %eax |
26dee9c4 | 183 | popl %ebx |
fee732e5 UD |
184 | cfi_adjust_cfa_offset (-4) |
185 | cfi_restore (ebx) | |
0413b54c UD |
186 | #else |
187 | movl 8(%esp), %eax | |
26dee9c4 | 188 | #endif |
0413b54c UD |
189 | testl %eax, %eax |
190 | fstp %st(1) | |
191 | jns 4f | |
26dee9c4 UD |
192 | fchs |
193 | 4: ret | |
194 | ||
195 | /* Return the argument. */ | |
196 | 1: fldl 4(%esp) | |
197 | ret | |
198 | END(__cbrt) | |
bc4e8f9b | 199 | libm_alias_double (__cbrt, cbrt) |