]>
Commit | Line | Data |
---|---|---|
26dee9c4 | 1 | /* Compute cubic root of float value. |
04277e02 | 2 | Copyright (C) 1997-2019 Free Software Foundation, Inc. |
26dee9c4 UD |
3 | This file is part of the GNU C Library. |
4 | Contributed by Dirk Alboth <dirka@uni-paderborn.de> and | |
5 | Ulrich Drepper <drepper@cygnus.com>, 1997. | |
6 | ||
7 | The GNU C Library is free software; you can redistribute it and/or | |
41bdb6e2 AJ |
8 | modify it under the terms of the GNU Lesser General Public |
9 | License as published by the Free Software Foundation; either | |
10 | version 2.1 of the License, or (at your option) any later version. | |
26dee9c4 UD |
11 | |
12 | The GNU C Library is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
41bdb6e2 | 15 | Lesser General Public License for more details. |
26dee9c4 | 16 | |
41bdb6e2 | 17 | You should have received a copy of the GNU Lesser General Public |
59ba27a6 | 18 | License along with the GNU C Library; if not, see |
5a82c748 | 19 | <https://www.gnu.org/licenses/>. */ |
26dee9c4 UD |
20 | |
21 | #include <machine/asm.h> | |
e4602cba | 22 | #include <libm-alias-float.h> |
26dee9c4 | 23 | |
26dee9c4 | 24 | .section .rodata |
26dee9c4 UD |
25 | |
26 | .align ALIGNARG(4) | |
b67e9372 | 27 | .type f3,@object |
26dee9c4 UD |
28 | f3: .double 0.191502161678719066 |
29 | ASM_SIZE_DIRECTIVE(f3) | |
b67e9372 | 30 | .type f2,@object |
0413b54c UD |
31 | f2: .double 0.697570460207922770 |
32 | ASM_SIZE_DIRECTIVE(f2) | |
b67e9372 | 33 | .type f1,@object |
0413b54c UD |
34 | f1: .double 0.492659620528969547 |
35 | ASM_SIZE_DIRECTIVE(f1) | |
26dee9c4 | 36 | |
0413b54c UD |
37 | #define CBRT2 1.2599210498948731648 |
38 | #define ONE_CBRT2 0.793700525984099737355196796584 | |
39 | #define SQR_CBRT2 1.5874010519681994748 | |
40 | #define ONE_SQR_CBRT2 0.629960524947436582364439673883 | |
26dee9c4 | 41 | |
b67e9372 | 42 | .type factor,@object |
0413b54c UD |
43 | .align ALIGNARG(4) |
44 | factor: .double ONE_SQR_CBRT2 | |
45 | .double ONE_CBRT2 | |
26dee9c4 UD |
46 | .double 1.0 |
47 | .double CBRT2 | |
48 | .double SQR_CBRT2 | |
49 | ASM_SIZE_DIRECTIVE(factor) | |
50 | ||
b67e9372 | 51 | .type two25,@object |
26dee9c4 UD |
52 | two25: .byte 0, 0, 0, 0x4c |
53 | ASM_SIZE_DIRECTIVE(two25) | |
54 | ||
55 | #ifdef PIC | |
56 | #define MO(op) op##@GOTOFF(%ebx) | |
0413b54c | 57 | #define MOX(op,x) op##@GOTOFF(%ebx,x,1) |
26dee9c4 UD |
58 | #else |
59 | #define MO(op) op | |
0413b54c | 60 | #define MOX(op,x) op(x) |
26dee9c4 UD |
61 | #endif |
62 | ||
63 | .text | |
64 | ENTRY(__cbrtf) | |
65 | movl 4(%esp), %eax | |
66 | xorl %ecx, %ecx | |
67 | movl %eax, %edx | |
68 | andl $0x7fffffff, %eax | |
69 | jz 1f | |
70 | cmpl $0x7f800000, %eax | |
71 | jae 1f | |
72 | ||
73 | #ifdef PIC | |
74 | pushl %ebx | |
fee732e5 UD |
75 | cfi_adjust_cfa_offset (4) |
76 | cfi_rel_offset (ebx, 0) | |
77 | LOAD_PIC_REG (bx) | |
26dee9c4 UD |
78 | #endif |
79 | ||
80 | cmpl $0x00800000, %eax | |
81 | jae 2f | |
82 | ||
83 | #ifdef PIC | |
84 | flds 8(%esp) | |
85 | #else | |
86 | flds 4(%esp) | |
87 | #endif | |
88 | fmuls MO(two25) | |
89 | movl $-25, %ecx | |
90 | #ifdef PIC | |
91 | fstps 8(%esp) | |
92 | movl 8(%esp), %eax | |
93 | #else | |
94 | fstps 4(%esp) | |
95 | movl 4(%esp), %eax | |
96 | #endif | |
97 | movl %eax, %edx | |
98 | andl $0x7fffffff, %eax | |
99 | ||
100 | 2: shrl $23, %eax | |
101 | andl $0x807fffff, %edx | |
102 | subl $126, %eax | |
103 | orl $0x3f000000, %edx | |
104 | addl %eax, %ecx | |
105 | #ifdef PIC | |
106 | movl %edx, 8(%esp) | |
107 | ||
108 | flds 8(%esp) /* xm */ | |
109 | #else | |
110 | movl %edx, 4(%esp) | |
111 | ||
112 | flds 4(%esp) /* xm */ | |
113 | #endif | |
114 | fabs | |
115 | ||
0413b54c | 116 | /* The following code has two tracks: |
26dee9c4 UD |
117 | a) compute the normalized cbrt value |
118 | b) compute xe/3 and xe%3 | |
119 | The right track computes the value for b) and this is done | |
0413b54c UD |
120 | in an optimized way by avoiding division. |
121 | ||
122 | But why two tracks at all? Very easy: efficiency. Some FP | |
123 | instruction can overlap with a certain amount of integer (and | |
124 | FP) instructions. So we get (except for the imull) all | |
125 | instructions for free. */ | |
26dee9c4 UD |
126 | |
127 | fld %st(0) /* xm : xm */ | |
128 | fmull MO(f3) /* f3*xm : xm */ | |
129 | movl $1431655766, %eax | |
130 | fsubrl MO(f2) /* f2-f3*xm : xm */ | |
131 | imull %ecx | |
132 | fmul %st(1) /* (f2-f3*xm)*xm : xm */ | |
133 | movl %ecx, %eax | |
134 | faddl MO(f1) /* u:=f1+(f2-f3*xm)*xm : xm */ | |
135 | sarl $31, %eax | |
136 | fld %st /* u : u : xm */ | |
137 | subl %eax, %edx | |
138 | fmul %st(1) /* u*u : u : xm */ | |
139 | fld %st(2) /* xm : u*u : u : xm */ | |
140 | fadd %st /* 2*xm : u*u : u : xm */ | |
141 | fxch %st(1) /* u*u : 2*xm : u : xm */ | |
142 | fmul %st(2) /* t2:=u*u*u : 2*xm : u : xm */ | |
143 | movl %edx, %eax | |
144 | fadd %st, %st(1) /* t2 : t2+2*xm : u : xm */ | |
145 | leal (%edx,%edx,2),%edx | |
146 | fadd %st(0) /* 2*t2 : t2+2*xm : u : xm */ | |
147 | subl %edx, %ecx | |
148 | faddp %st, %st(3) /* t2+2*xm : u : 2*t2+xm */ | |
0413b54c | 149 | shll $3, %ecx |
26dee9c4 UD |
150 | fmulp /* u*(t2+2*xm) : 2*t2+xm */ |
151 | fdivp %st, %st(1) /* u*(t2+2*xm)/(2*t2+xm) */ | |
0413b54c | 152 | fmull MOX(16+factor,%ecx) /* u*(t2+2*xm)/(2*t2+xm)*FACT */ |
26dee9c4 | 153 | pushl %eax |
fee732e5 | 154 | cfi_adjust_cfa_offset (4) |
26dee9c4 UD |
155 | fildl (%esp) /* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */ |
156 | fxch /* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */ | |
26dee9c4 | 157 | fscale /* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */ |
0413b54c | 158 | popl %edx |
fee732e5 | 159 | cfi_adjust_cfa_offset (-4) |
26dee9c4 | 160 | #ifdef PIC |
0413b54c | 161 | movl 8(%esp), %eax |
26dee9c4 | 162 | popl %ebx |
fee732e5 UD |
163 | cfi_adjust_cfa_offset (-4) |
164 | cfi_restore (ebx) | |
0413b54c UD |
165 | #else |
166 | movl 4(%esp), %eax | |
26dee9c4 | 167 | #endif |
0413b54c UD |
168 | testl %eax, %eax |
169 | fstp %st(1) | |
170 | jns 4f | |
26dee9c4 UD |
171 | fchs |
172 | 4: ret | |
173 | ||
174 | /* Return the argument. */ | |
175 | 1: flds 4(%esp) | |
176 | ret | |
177 | END(__cbrtf) | |
e4602cba | 178 | libm_alias_float (__cbrt, cbrt) |