]>
Commit | Line | Data |
---|---|---|
d5efd131 MF |
1 | .file "sqrtl.s" |
2 | ||
3 | ||
4 | // Copyright (c) 2000 - 2003, Intel Corporation | |
5 | // All rights reserved. | |
6 | // | |
d5efd131 MF |
7 | // |
8 | // Redistribution and use in source and binary forms, with or without | |
9 | // modification, are permitted provided that the following conditions are | |
10 | // met: | |
11 | // | |
12 | // * Redistributions of source code must retain the above copyright | |
13 | // notice, this list of conditions and the following disclaimer. | |
14 | // | |
15 | // * Redistributions in binary form must reproduce the above copyright | |
16 | // notice, this list of conditions and the following disclaimer in the | |
17 | // documentation and/or other materials provided with the distribution. | |
18 | // | |
19 | // * The name of Intel Corporation may not be used to endorse or promote | |
20 | // products derived from this software without specific prior written | |
21 | // permission. | |
22 | ||
0347518d MF |
23 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
24 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
d5efd131 | 25 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
0347518d | 26 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS |
d5efd131 | 27 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
0347518d MF |
28 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
29 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
30 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
d5efd131 | 31 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING |
0347518d MF |
32 | // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
33 | // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
34 | // | |
d5efd131 | 35 | // Intel Corporation is the author of this code, and requests that all |
0347518d | 36 | // problem reports or change requests be submitted to it directly at |
d5efd131 MF |
37 | // http://www.intel.com/software/products/opensource/libraries/num.htm. |
38 | // | |
39 | //******************************************************************** | |
40 | // | |
41 | // History: | |
42 | // 02/02/00 (hand-optimized) | |
43 | // 04/04/00 Unwind support added | |
44 | // 08/15/00 Bundle added after call to __libm_error_support to properly | |
45 | // set [the previously overwritten] GR_Parameter_RESULT. | |
46 | // 05/20/02 Cleaned up namespace and sf0 syntax | |
47 | // 02/10/03 Reordered header: .section, .global, .proc, .align | |
48 | // | |
49 | //******************************************************************** | |
50 | // | |
51 | // Function: Combined sqrtl(x), where | |
52 | // _ | |
53 | // sqrtl(x) = |x, for double-extended precision x values | |
54 | // | |
55 | //******************************************************************** | |
56 | // | |
57 | // Resources Used: | |
58 | // | |
59 | // Floating-Point Registers: f8 (Input and Return Value) | |
60 | // f7 -f14 | |
61 | // | |
62 | // General Purpose Registers: | |
63 | // r32-r36 (Locals) | |
64 | // r37-r40 (Used to pass arguments to error handling routine) | |
65 | // | |
66 | // Predicate Registers: p6, p7, p8 | |
67 | // | |
68 | //******************************************************************** | |
69 | // | |
70 | // IEEE Special Conditions: | |
71 | // | |
72 | // All faults and exceptions should be raised correctly. | |
73 | // sqrtl(QNaN) = QNaN | |
74 | // sqrtl(SNaN) = QNaN | |
75 | // sqrtl(+/-0) = +/-0 | |
76 | // sqrtl(negative) = QNaN and error handling is called | |
77 | // | |
78 | //******************************************************************** | |
79 | // | |
80 | // Implementation: | |
81 | // | |
82 | // Modified Newton-Raphson Algorithm | |
83 | // | |
84 | //******************************************************************** | |
85 | ||
86 | GR_SAVE_PFS = r33 | |
87 | GR_SAVE_B0 = r34 | |
88 | GR_SAVE_GP = r35 | |
89 | GR_Parameter_X = r37 | |
90 | GR_Parameter_Y = r38 | |
91 | GR_Parameter_RESULT = r39 | |
92 | GR_Parameter_TAG = r40 | |
93 | ||
94 | FR_X = f15 | |
95 | FR_Y = f0 | |
96 | FR_RESULT = f8 | |
97 | ||
98 | .section .text | |
99 | GLOBAL_IEEE754_ENTRY(sqrtl) | |
100 | { .mlx | |
101 | alloc r32= ar.pfs,0,5,4,0 | |
102 | // exponent of +1/2 in r2 | |
103 | movl r2 = 0x0fffe;; | |
104 | } { .mfi | |
105 | // +1/2 in f10 | |
106 | setf.exp f12 = r2 | |
107 | // Step (1) | |
108 | // y0 = 1/sqrt(a) in f7 | |
109 | frsqrta.s0 f7,p6=f8 | |
110 | nop.i 0;; | |
111 | } { .mfi | |
112 | nop.m 0 | |
113 | // Step (2) | |
114 | // H0 = +1/2 * y0 in f9 | |
115 | (p6) fma.s1 f9=f12,f7,f0 | |
116 | nop.i 0 | |
117 | } { .mfi | |
118 | nop.m 0 | |
119 | // Step (3) | |
120 | // S0 = a * y0 in f7 | |
121 | (p6) fma.s1 f7=f8,f7,f0 | |
122 | nop.i 0;; | |
123 | } { .mfi | |
124 | nop.m 0 | |
0347518d MF |
125 | // Make copy input x |
126 | mov f13=f8 | |
d5efd131 MF |
127 | nop.i 0 |
128 | } { .mfi | |
129 | nop.m 0 | |
130 | fclass.m.unc p7,p8 = f8,0x3A | |
131 | nop.i 0;; | |
132 | } { .mfi | |
133 | nop.m 0 | |
134 | // Step (4) | |
135 | // d0 = 1/2 - S0 * H0 in f10 | |
136 | (p6) fnma.s1 f10=f7,f9,f12 | |
137 | nop.i 0;; | |
0347518d | 138 | } |
d5efd131 MF |
139 | { .mfi |
140 | nop.m 0 | |
141 | mov f15=f8 | |
142 | nop.i 0;; | |
143 | } { .mfi | |
144 | nop.m 0 | |
145 | // Step (5) | |
146 | // H1 = H0 + d0 * H0 in f9 | |
147 | (p6) fma.s1 f9=f10,f9,f9 | |
148 | nop.i 0 | |
149 | } { .mfi | |
150 | nop.m 0 | |
151 | // Step (6) | |
152 | // S1 = S0 + d0 * S0 in f7 | |
153 | (p6) fma.s1 f7=f10,f7,f7 | |
154 | nop.i 0;; | |
155 | } { .mfi | |
156 | nop.m 0 | |
157 | // Step (7) | |
158 | // d1 = 1/2 - S1 * H1 in f10 | |
159 | (p6) fnma.s1 f10=f7,f9,f12 | |
160 | nop.i 0;; | |
161 | } { .mfi | |
162 | nop.m 0 | |
163 | // Step (8) | |
164 | // H2 = H1 + d1 * H1 in f9 | |
165 | (p6) fma.s1 f9=f10,f9,f9 | |
166 | nop.i 0 | |
167 | } { .mfi | |
168 | nop.m 0 | |
169 | // Step (9) | |
170 | // S2 = S1 + d1 * S1 in f7 | |
171 | (p6) fma.s1 f7=f10,f7,f7 | |
172 | nop.i 0;; | |
173 | } { .mfi | |
174 | nop.m 0 | |
175 | // Step (10) | |
176 | // d2 = 1/2 - S2 * H2 in f10 | |
177 | (p6) fnma.s1 f10=f7,f9,f12 | |
178 | nop.i 0 | |
179 | } { .mfi | |
180 | nop.m 0 | |
181 | // Step (11) | |
182 | // e2 = a - S2 * S2 in f12 | |
183 | (p6) fnma.s1 f12=f7,f7,f8 | |
184 | nop.i 0;; | |
185 | } { .mfi | |
186 | nop.m 0 | |
187 | // Step (12) | |
188 | // S3 = S2 + d2 * S2 in f7 | |
189 | (p6) fma.s1 f7=f12,f9,f7 | |
190 | nop.i 0 | |
191 | } { .mfi | |
192 | nop.m 0 | |
193 | // Step (13) | |
194 | // H3 = H2 + d2 * H2 in f9 | |
195 | (p6) fma.s1 f9=f10,f9,f9 | |
196 | nop.i 0;; | |
197 | } { .mfi | |
198 | nop.m 0 | |
199 | // Step (14) | |
200 | // e3 = a - S3 * S3 in f12 | |
201 | (p6) fnma.s1 f12=f7,f7,f8 | |
202 | nop.i 0;; | |
203 | } { .mfb | |
204 | nop.m 0 | |
205 | // Step (15) | |
206 | // S = S3 + e3 * H3 in f7 | |
207 | (p6) fma.s0 f8=f12,f9,f7 | |
208 | (p6) br.ret.sptk b0 ;; | |
209 | } | |
210 | { .mfb | |
211 | mov GR_Parameter_TAG = 48 | |
212 | mov f8 = f7 | |
213 | (p8) br.ret.sptk b0 ;; | |
214 | } | |
215 | // | |
216 | // This branch includes all those special values that are not negative, | |
217 | // with the result equal to frcpa(x) | |
218 | // | |
219 | ||
220 | ||
221 | // END DOUBLE EXTENDED PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM | |
222 | GLOBAL_IEEE754_END(sqrtl) | |
3a327316 | 223 | libm_alias_ldouble_other (__sqrt, sqrt) |
d5efd131 MF |
224 | |
225 | LOCAL_LIBM_ENTRY(__libm_error_region) | |
226 | .prologue | |
227 | { .mfi | |
228 | add GR_Parameter_Y=-32,sp // Parameter 2 value | |
229 | nop.f 0 | |
230 | .save ar.pfs,GR_SAVE_PFS | |
231 | mov GR_SAVE_PFS=ar.pfs // Save ar.pfs | |
232 | } | |
233 | { .mfi | |
234 | .fframe 64 | |
235 | add sp=-64,sp // Create new stack | |
236 | nop.f 0 | |
237 | mov GR_SAVE_GP=gp // Save gp | |
238 | };; | |
239 | { .mmi | |
240 | stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack | |
241 | add GR_Parameter_X = 16,sp // Parameter 1 address | |
242 | .save b0, GR_SAVE_B0 | |
243 | mov GR_SAVE_B0=b0 // Save b0 | |
244 | };; | |
245 | .body | |
246 | { .mib | |
247 | stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack | |
248 | add GR_Parameter_RESULT = 0,GR_Parameter_Y | |
249 | nop.b 0 // Parameter 3 address | |
250 | } | |
251 | { .mib | |
252 | stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack | |
253 | add GR_Parameter_Y = -16,GR_Parameter_Y | |
254 | br.call.sptk b0=__libm_error_support# // Call error handling function | |
255 | };; | |
256 | { .mmi | |
257 | nop.m 0 | |
258 | nop.m 0 | |
259 | add GR_Parameter_RESULT = 48,sp | |
260 | };; | |
261 | { .mmi | |
262 | ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack | |
263 | .restore sp | |
264 | add sp = 64,sp // Restore stack pointer | |
265 | mov b0 = GR_SAVE_B0 // Restore return address | |
266 | };; | |
267 | { .mib | |
268 | mov gp = GR_SAVE_GP // Restore gp | |
269 | mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs | |
270 | br.ret.sptk b0 // Return | |
271 | };; | |
272 | ||
273 | LOCAL_LIBM_END(__libm_error_region#) | |
274 | .type __libm_error_support#,@function | |
275 | .global __libm_error_support# |