]>
Commit | Line | Data |
---|---|---|
d5efd131 MF |
1 | .file "libm_ldexpl.s" |
2 | ||
3 | ||
4 | // Copyright (c) 2000 - 2003, Intel Corporation | |
5 | // All rights reserved. | |
6 | // | |
d5efd131 MF |
7 | // |
8 | // Redistribution and use in source and binary forms, with or without | |
9 | // modification, are permitted provided that the following conditions are | |
10 | // met: | |
11 | // | |
12 | // * Redistributions of source code must retain the above copyright | |
13 | // notice, this list of conditions and the following disclaimer. | |
14 | // | |
15 | // * Redistributions in binary form must reproduce the above copyright | |
16 | // notice, this list of conditions and the following disclaimer in the | |
17 | // documentation and/or other materials provided with the distribution. | |
18 | // | |
19 | // * The name of Intel Corporation may not be used to endorse or promote | |
20 | // products derived from this software without specific prior written | |
21 | // permission. | |
22 | ||
23 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
24 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
25 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
26 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS | |
27 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
28 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
29 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
30 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
31 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING | |
32 | // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
33 | // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
34 | // | |
35 | // Intel Corporation is the author of this code, and requests that all | |
36 | // problem reports or change requests be submitted to it directly at | |
37 | // http://www.intel.com/software/products/opensource/libraries/num.htm. | |
38 | // | |
39 | // History | |
40 | //============================================================== | |
41 | // 02/02/00 Initial version | |
42 | // 01/26/01 ldexpl completely reworked and now standalone version | |
43 | // 01/04/02 Added handling for int 32 or 64 bits | |
44 | // 05/20/02 Cleaned up namespace and sf0 syntax | |
45 | // 02/10/03 Reordered header: .section, .global, .proc, .align | |
46 | // 08/04/03 Improved performance | |
47 | // | |
48 | // API | |
49 | //============================================================== | |
50 | // long double __libm_ldexpl (long double x, int n, int int_type) | |
51 | // input floating point f8 and int n (r34), int int_type (r35) | |
52 | // output floating point f8 | |
53 | // | |
54 | // int_type = 0 if int is 32 bits | |
55 | // int_type = 1 if int is 64 bits | |
56 | // | |
57 | // Returns x* 2**n using an fma and detects overflow | |
58 | // and underflow. | |
59 | // | |
60 | // | |
61 | // Strategy: | |
62 | // Compute biased exponent of result exp_Result = N + exp_X | |
63 | // Break into ranges: | |
64 | // exp_Result > 0x13ffe -> Certain overflow | |
65 | // exp_Result = 0x13ffe -> Possible overflow | |
66 | // 0x0c001 <= exp_Result < 0x13ffe -> No over/underflow (main path) | |
67 | // 0x0c001 - 63 <= exp_Result < 0x0c001 -> Possible underflow | |
68 | // exp_Result < 0x0c001 - 63 -> Certain underflow | |
69 | ||
70 | FR_Big = f6 | |
71 | FR_NBig = f7 | |
72 | FR_Floating_X = f8 | |
73 | FR_Result = f8 | |
74 | FR_Result2 = f9 | |
75 | FR_Result3 = f10 | |
76 | FR_Norm_X = f11 | |
77 | FR_Two_N = f12 | |
78 | ||
79 | GR_neg_ov_limit= r14 | |
80 | GR_N_Biased = r15 | |
81 | GR_Big = r16 | |
82 | GR_NBig = r17 | |
83 | GR_exp_Result = r18 | |
84 | GR_pos_ov_limit= r19 | |
85 | GR_Bias = r20 | |
86 | GR_N_as_int = r21 | |
87 | GR_signexp_X = r22 | |
88 | GR_exp_X = r23 | |
89 | GR_exp_mask = r24 | |
90 | GR_max_exp = r25 | |
91 | GR_min_exp = r26 | |
92 | GR_min_den_exp = r27 | |
93 | ||
94 | GR_SAVE_B0 = r32 | |
95 | GR_SAVE_GP = r33 | |
96 | GR_SAVE_PFS = r34 | |
97 | GR_Parameter_X = r35 | |
98 | GR_Parameter_Y = r36 | |
99 | GR_Parameter_RESULT = r37 | |
100 | GR_Tag = r38 | |
101 | ||
102 | .section .text | |
103 | GLOBAL_LIBM_ENTRY(__libm_ldexpl) | |
104 | ||
105 | // | |
106 | // Is x NAN, INF, ZERO, +-? | |
107 | // Build the exponent Bias | |
108 | // | |
109 | { .mfi | |
110 | getf.exp GR_signexp_X = FR_Floating_X // Get signexp of x | |
111 | fclass.m p6,p0 = FR_Floating_X, 0xe7 // @snan | @qnan | @inf | @zero | |
112 | mov GR_Bias = 0x0ffff | |
113 | } | |
114 | // | |
115 | // Normalize x | |
116 | // Is integer type 32 bits? | |
117 | // | |
118 | { .mfi | |
119 | mov GR_Big = 35000 // If N this big then certain overflow | |
120 | fnorm.s1 FR_Norm_X = FR_Floating_X | |
121 | cmp.eq p8,p9 = r35,r0 | |
122 | } | |
123 | ;; | |
124 | ||
125 | // Sign extend N if int is 32 bits | |
126 | { .mfi | |
127 | (p9) mov GR_N_as_int = r34 // Copy N if int is 64 bits | |
128 | fclass.m p9,p0 = FR_Floating_X, 0x0b // Test for x=unorm | |
129 | (p8) sxt4 GR_N_as_int = r34 // Sign extend N if int is 32 bits | |
130 | } | |
131 | { .mfi | |
132 | mov GR_NBig = -35000 // If N this small then certain underflow | |
133 | nop.f 0 | |
134 | mov GR_max_exp = 0x13ffe // Exponent of maximum long double | |
135 | } | |
136 | ;; | |
137 | ||
138 | // Create biased exponent for 2**N | |
139 | { .mfi | |
140 | add GR_N_Biased = GR_Bias,GR_N_as_int | |
141 | nop.f 0 | |
142 | cmp.ge p7, p0 = GR_N_as_int, GR_Big // Certain overflow? | |
143 | } | |
144 | { .mib | |
145 | cmp.le p8, p0 = GR_N_as_int, GR_NBig // Certain underflow? | |
146 | mov GR_min_exp = 0x0c001 // Exponent of minimum long double | |
147 | (p9) br.cond.spnt LDEXPL_UNORM // Branch if x=unorm | |
148 | } | |
149 | ;; | |
150 | ||
151 | LDEXPL_COMMON: | |
152 | // Main path continues. Also return here from x=unorm path. | |
153 | // Create 2**N | |
154 | .pred.rel "mutex",p7,p8 | |
155 | { .mfi | |
156 | setf.exp FR_Two_N = GR_N_Biased | |
157 | nop.f 0 | |
158 | (p7) mov GR_N_as_int = GR_Big // Limit max N | |
159 | } | |
160 | { .mfi | |
161 | (p8) mov GR_N_as_int = GR_NBig // Limit min N | |
162 | nop.f 0 | |
163 | (p8) cmp.eq p7,p0 = r0,r0 // Set p7 if |N| big | |
164 | } | |
165 | ;; | |
166 | ||
167 | // | |
168 | // Create biased exponent for 2**N for N big | |
169 | // Is N zero? | |
170 | // | |
171 | { .mfi | |
172 | (p7) add GR_N_Biased = GR_Bias,GR_N_as_int | |
173 | nop.f 0 | |
174 | cmp.eq.or p6,p0 = r34,r0 | |
175 | } | |
176 | { .mfi | |
177 | mov GR_pos_ov_limit = 0x13fff // Exponent for positive overflow | |
178 | nop.f 0 | |
179 | mov GR_exp_mask = 0x1ffff // Exponent mask | |
180 | } | |
181 | ;; | |
182 | ||
183 | // | |
184 | // Create 2**N for N big | |
185 | // Return x when N = 0 or X = Nan, Inf, Zero | |
186 | // | |
187 | { .mfi | |
188 | (p7) setf.exp FR_Two_N = GR_N_Biased | |
189 | nop.f 0 | |
190 | mov GR_min_den_exp = 0x0c001 - 63 // Exp of min denorm long dble | |
191 | } | |
192 | { .mfb | |
193 | and GR_exp_X = GR_exp_mask, GR_signexp_X | |
194 | (p6) fma.s0 FR_Result = FR_Floating_X, f1, f0 | |
195 | (p6) br.ret.spnt b0 | |
196 | } | |
197 | ;; | |
198 | ||
199 | // | |
200 | // Raise Denormal operand flag with compare | |
201 | // Compute biased result exponent | |
202 | // | |
203 | { .mfi | |
204 | add GR_exp_Result = GR_exp_X, GR_N_as_int | |
205 | fcmp.ge.s0 p0,p11 = FR_Floating_X,f0 | |
206 | mov GR_neg_ov_limit = 0x33fff // Exponent for negative overflow | |
207 | } | |
208 | ;; | |
209 | ||
210 | // | |
211 | // Do final operation | |
212 | // | |
213 | { .mfi | |
214 | cmp.lt p7,p6 = GR_exp_Result, GR_max_exp // Test no overflow | |
215 | fma.s0 FR_Result = FR_Two_N,FR_Norm_X,f0 | |
216 | cmp.lt p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow | |
217 | } | |
218 | { .mfb | |
219 | nop.m 0 | |
220 | nop.f 0 | |
221 | (p9) br.cond.spnt LDEXPL_UNDERFLOW // Branch if certain underflow | |
222 | } | |
223 | ;; | |
224 | ||
225 | { .mib | |
226 | (p6) cmp.gt.unc p6,p8 = GR_exp_Result, GR_max_exp // Test sure overflow | |
227 | (p7) cmp.ge.unc p7,p9 = GR_exp_Result, GR_min_exp // Test no over/underflow | |
228 | (p7) br.ret.sptk b0 // Return from main path | |
229 | } | |
230 | ;; | |
231 | ||
232 | { .bbb | |
233 | (p6) br.cond.spnt LDEXPL_OVERFLOW // Branch if certain overflow | |
234 | (p8) br.cond.spnt LDEXPL_POSSIBLE_OVERFLOW // Branch if possible overflow | |
235 | (p9) br.cond.spnt LDEXPL_POSSIBLE_UNDERFLOW // Branch if possible underflow | |
236 | } | |
237 | ;; | |
238 | ||
239 | // Here if possible underflow. | |
240 | // Resulting exponent: 0x0c001-63 <= exp_Result < 0x0c001 | |
241 | LDEXPL_POSSIBLE_UNDERFLOW: | |
242 | // | |
243 | // Here if possible overflow. | |
244 | // Resulting exponent: 0x13ffe = exp_Result | |
245 | LDEXPL_POSSIBLE_OVERFLOW: | |
246 | ||
247 | // Set up necessary status fields | |
248 | // | |
249 | // S0 user supplied status | |
250 | // S2 user supplied status + WRE + TD (Overflows) | |
251 | // S3 user supplied status + FZ + TD (Underflows) | |
252 | // | |
253 | { .mfi | |
254 | nop.m 0 | |
255 | fsetc.s3 0x7F,0x41 | |
256 | nop.i 0 | |
257 | } | |
258 | { .mfi | |
259 | nop.m 0 | |
260 | fsetc.s2 0x7F,0x42 | |
261 | nop.i 0 | |
262 | } | |
263 | ;; | |
264 | ||
265 | // | |
266 | // Do final operation with s2 and s3 | |
267 | // | |
268 | { .mfi | |
269 | setf.exp FR_NBig = GR_neg_ov_limit | |
270 | fma.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0 | |
271 | nop.i 0 | |
272 | } | |
273 | { .mfi | |
274 | setf.exp FR_Big = GR_pos_ov_limit | |
275 | fma.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0 | |
276 | nop.i 0 | |
277 | } | |
278 | ;; | |
279 | ||
280 | // Check for overflow or underflow. | |
281 | // Restore s3 | |
282 | // Restore s2 | |
283 | // | |
284 | { .mfi | |
285 | nop.m 0 | |
286 | fsetc.s3 0x7F,0x40 | |
287 | nop.i 0 | |
288 | } | |
289 | { .mfi | |
290 | nop.m 0 | |
291 | fsetc.s2 0x7F,0x40 | |
292 | nop.i 0 | |
293 | } | |
294 | ;; | |
295 | ||
296 | // | |
297 | // Is the result zero? | |
298 | // | |
299 | { .mfi | |
300 | nop.m 0 | |
301 | fclass.m p6, p0 = FR_Result3, 0x007 | |
302 | nop.i 0 | |
303 | } | |
304 | { .mfi | |
305 | nop.m 0 | |
306 | fcmp.ge.s1 p7, p8 = FR_Result2 , FR_Big | |
307 | nop.i 0 | |
308 | } | |
309 | ;; | |
310 | ||
311 | // | |
312 | // Detect masked underflow - Tiny + Inexact Only | |
313 | // | |
314 | { .mfi | |
315 | nop.m 0 | |
316 | (p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2 | |
317 | nop.i 0 | |
318 | } | |
319 | ;; | |
320 | ||
321 | // | |
322 | // Is result bigger the allowed range? | |
323 | // Branch out for underflow | |
324 | // | |
325 | { .mfb | |
326 | nop.m 0 | |
327 | (p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig | |
328 | (p6) br.cond.spnt LDEXPL_UNDERFLOW | |
329 | } | |
330 | ;; | |
331 | ||
332 | // | |
333 | // Branch out for overflow | |
334 | // | |
335 | { .bbb | |
336 | (p7) br.cond.spnt LDEXPL_OVERFLOW | |
337 | (p9) br.cond.spnt LDEXPL_OVERFLOW | |
338 | br.ret.sptk b0 // Return from main path. | |
339 | } | |
340 | ;; | |
341 | ||
342 | // Here if result overflows | |
343 | LDEXPL_OVERFLOW: | |
344 | { .mib | |
345 | alloc r32=ar.pfs,3,0,4,0 | |
346 | addl GR_Tag = 144, r0 // Set error tag for overflow | |
347 | br.cond.sptk __libm_error_region // Call error support for overflow | |
348 | } | |
349 | ;; | |
350 | ||
351 | // Here if result underflows | |
352 | LDEXPL_UNDERFLOW: | |
353 | { .mib | |
354 | alloc r32=ar.pfs,3,0,4,0 | |
355 | addl GR_Tag = 145, r0 // Set error tag for underflow | |
356 | br.cond.sptk __libm_error_region // Call error support for underflow | |
357 | } | |
358 | ;; | |
359 | ||
360 | // Here if x=unorm | |
361 | LDEXPL_UNORM: | |
362 | { .mib | |
363 | getf.exp GR_signexp_X = FR_Norm_X // Get signexp of normalized x | |
364 | nop.i 0 | |
365 | br.cond.sptk LDEXPL_COMMON // Return to main path | |
366 | } | |
367 | ;; | |
368 | ||
369 | ||
370 | GLOBAL_LIBM_END(__libm_ldexpl) | |
371 | LOCAL_LIBM_ENTRY(__libm_error_region) | |
372 | ||
373 | // | |
374 | // Get stack address of N | |
375 | // | |
376 | .prologue | |
377 | { .mfi | |
378 | add GR_Parameter_Y=-32,sp | |
379 | nop.f 0 | |
380 | .save ar.pfs,GR_SAVE_PFS | |
381 | mov GR_SAVE_PFS=ar.pfs | |
382 | } | |
383 | // | |
384 | // Adjust sp | |
385 | // | |
386 | { .mfi | |
387 | .fframe 64 | |
388 | add sp=-64,sp | |
389 | nop.f 0 | |
390 | mov GR_SAVE_GP=gp | |
391 | };; | |
392 | ||
393 | // | |
394 | // Store N on stack in correct position | |
395 | // Locate the address of x on stack | |
396 | // | |
397 | { .mmi | |
398 | st8 [GR_Parameter_Y] = GR_N_as_int,16 | |
399 | add GR_Parameter_X = 16,sp | |
400 | .save b0, GR_SAVE_B0 | |
401 | mov GR_SAVE_B0=b0 | |
402 | };; | |
403 | ||
404 | // | |
405 | // Store x on the stack. | |
406 | // Get address for result on stack. | |
407 | // | |
408 | .body | |
409 | { .mib | |
410 | stfe [GR_Parameter_X] = FR_Norm_X | |
411 | add GR_Parameter_RESULT = 0,GR_Parameter_Y | |
412 | nop.b 0 | |
413 | } | |
414 | { .mib | |
415 | stfe [GR_Parameter_Y] = FR_Result | |
416 | add GR_Parameter_Y = -16,GR_Parameter_Y | |
417 | br.call.sptk b0=__libm_error_support# | |
418 | };; | |
419 | ||
420 | // | |
421 | // Get location of result on stack | |
422 | // | |
423 | { .mmi | |
424 | add GR_Parameter_RESULT = 48,sp | |
425 | nop.m 0 | |
426 | nop.i 0 | |
427 | };; | |
428 | ||
429 | // | |
430 | // Get the new result | |
431 | // | |
432 | { .mmi | |
433 | ldfe FR_Result = [GR_Parameter_RESULT] | |
434 | .restore sp | |
435 | add sp = 64,sp | |
436 | mov b0 = GR_SAVE_B0 | |
437 | };; | |
438 | ||
439 | // | |
440 | // Restore gp, ar.pfs and return | |
441 | // | |
442 | { .mib | |
443 | mov gp = GR_SAVE_GP | |
444 | mov ar.pfs = GR_SAVE_PFS | |
445 | br.ret.sptk b0 | |
446 | };; | |
447 | ||
448 | LOCAL_LIBM_END(__libm_error_region) | |
449 | ||
450 | .type __libm_error_support#,@function | |
451 | .global __libm_error_support# |