]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/libm-i387/s_cbrtf.S
Update.
[thirdparty/glibc.git] / sysdeps / libm-i387 / s_cbrtf.S
1 /* Compute cubic root of float value.
2 Copyright (C) 1997 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Dirk Alboth <dirka@uni-paderborn.de> and
5 Ulrich Drepper <drepper@cygnus.com>, 1997.
6
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Library General Public License as
9 published by the Free Software Foundation; either version 2 of the
10 License, or (at your option) any later version.
11
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Library General Public License for more details.
16
17 You should have received a copy of the GNU Library General Public
18 License along with the GNU C Library; see the file COPYING.LIB. If not,
19 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include <machine/asm.h>
23
24 #ifdef __ELF__
25 .section .rodata
26 #else
27 .text
28 #endif
29
30 .align ALIGNARG(4)
31 ASM_TYPE_DIRECTIVE(f3,@object)
32 f3: .double 0.191502161678719066
33 ASM_SIZE_DIRECTIVE(f3)
34 ASM_TYPE_DIRECTIVE(f2,@object)
35 f2: .double 0.697570460207922770
36 ASM_SIZE_DIRECTIVE(f2)
37 ASM_TYPE_DIRECTIVE(f1,@object)
38 f1: .double 0.492659620528969547
39 ASM_SIZE_DIRECTIVE(f1)
40
41 #define CBRT2 1.2599210498948731648
42 #define ONE_CBRT2 0.793700525984099737355196796584
43 #define SQR_CBRT2 1.5874010519681994748
44 #define ONE_SQR_CBRT2 0.629960524947436582364439673883
45
46 ASM_TYPE_DIRECTIVE(factor,@object)
47 .align ALIGNARG(4)
48 factor: .double ONE_SQR_CBRT2
49 .double ONE_CBRT2
50 .double 1.0
51 .double CBRT2
52 .double SQR_CBRT2
53 ASM_SIZE_DIRECTIVE(factor)
54
55 ASM_TYPE_DIRECTIVE(two25,@object)
56 two25: .byte 0, 0, 0, 0x4c
57 ASM_SIZE_DIRECTIVE(two25)
58
59 #ifdef PIC
60 #define MO(op) op##@GOTOFF(%ebx)
61 #define MOX(op,x) op##@GOTOFF(%ebx,x,1)
62 #else
63 #define MO(op) op
64 #define MOX(op,x) op(x)
65 #endif
66
67 .text
68 ENTRY(__cbrtf)
69 movl 4(%esp), %eax
70 xorl %ecx, %ecx
71 movl %eax, %edx
72 andl $0x7fffffff, %eax
73 jz 1f
74 cmpl $0x7f800000, %eax
75 jae 1f
76
77 #ifdef PIC
78 pushl %ebx
79 call 3f
80 3: popl %ebx
81 addl $_GLOBAL_OFFSET_TABLE_+[.-3b], %ebx
82 #endif
83
84 cmpl $0x00800000, %eax
85 jae 2f
86
87 #ifdef PIC
88 flds 8(%esp)
89 #else
90 flds 4(%esp)
91 #endif
92 fmuls MO(two25)
93 movl $-25, %ecx
94 #ifdef PIC
95 fstps 8(%esp)
96 movl 8(%esp), %eax
97 #else
98 fstps 4(%esp)
99 movl 4(%esp), %eax
100 #endif
101 movl %eax, %edx
102 andl $0x7fffffff, %eax
103
104 2: shrl $23, %eax
105 andl $0x807fffff, %edx
106 subl $126, %eax
107 orl $0x3f000000, %edx
108 addl %eax, %ecx
109 #ifdef PIC
110 movl %edx, 8(%esp)
111
112 flds 8(%esp) /* xm */
113 #else
114 movl %edx, 4(%esp)
115
116 flds 4(%esp) /* xm */
117 #endif
118 fabs
119
120 /* The following code has two tracks:
121 a) compute the normalized cbrt value
122 b) compute xe/3 and xe%3
123 The right track computes the value for b) and this is done
124 in an optimized way by avoiding division.
125
126 But why two tracks at all? Very easy: efficiency. Some FP
127 instruction can overlap with a certain amount of integer (and
128 FP) instructions. So we get (except for the imull) all
129 instructions for free. */
130
131 fld %st(0) /* xm : xm */
132 fmull MO(f3) /* f3*xm : xm */
133 movl $1431655766, %eax
134 fsubrl MO(f2) /* f2-f3*xm : xm */
135 imull %ecx
136 fmul %st(1) /* (f2-f3*xm)*xm : xm */
137 movl %ecx, %eax
138 faddl MO(f1) /* u:=f1+(f2-f3*xm)*xm : xm */
139 sarl $31, %eax
140 fld %st /* u : u : xm */
141 subl %eax, %edx
142 fmul %st(1) /* u*u : u : xm */
143 fld %st(2) /* xm : u*u : u : xm */
144 fadd %st /* 2*xm : u*u : u : xm */
145 fxch %st(1) /* u*u : 2*xm : u : xm */
146 fmul %st(2) /* t2:=u*u*u : 2*xm : u : xm */
147 movl %edx, %eax
148 fadd %st, %st(1) /* t2 : t2+2*xm : u : xm */
149 leal (%edx,%edx,2),%edx
150 fadd %st(0) /* 2*t2 : t2+2*xm : u : xm */
151 subl %edx, %ecx
152 faddp %st, %st(3) /* t2+2*xm : u : 2*t2+xm */
153 shll $3, %ecx
154 fmulp /* u*(t2+2*xm) : 2*t2+xm */
155 fdivp %st, %st(1) /* u*(t2+2*xm)/(2*t2+xm) */
156 fmull MOX(16+factor,%ecx) /* u*(t2+2*xm)/(2*t2+xm)*FACT */
157 pushl %eax
158 fildl (%esp) /* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */
159 fxch /* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */
160 fscale /* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */
161 popl %edx
162 #ifdef PIC
163 movl 8(%esp), %eax
164 popl %ebx
165 #else
166 movl 4(%esp), %eax
167 #endif
168 testl %eax, %eax
169 fstp %st(1)
170 jns 4f
171 fchs
172 4: ret
173
174 /* Return the argument. */
175 1: flds 4(%esp)
176 ret
177 END(__cbrtf)
178 weak_alias (__cbrtf, cbrtf)