]>
Commit | Line | Data |
---|---|---|
0ecb606c JJ |
1 | .file "nextafter.s" |
2 | ||
3 | ||
4 | // Copyright (c) 2000 - 2004, Intel Corporation | |
5 | // All rights reserved. | |
6 | // | |
7 | // Contributed 2000 by the Intel Numerics Group, Intel Corporation | |
8 | // | |
9 | // Redistribution and use in source and binary forms, with or without | |
10 | // modification, are permitted provided that the following conditions are | |
11 | // met: | |
12 | // | |
13 | // * Redistributions of source code must retain the above copyright | |
14 | // notice, this list of conditions and the following disclaimer. | |
15 | // | |
16 | // * Redistributions in binary form must reproduce the above copyright | |
17 | // notice, this list of conditions and the following disclaimer in the | |
18 | // documentation and/or other materials provided with the distribution. | |
19 | // | |
20 | // * The name of Intel Corporation may not be used to endorse or promote | |
21 | // products derived from this software without specific prior written | |
22 | // permission. | |
23 | ||
24 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
25 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
26 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
27 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS | |
28 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
29 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
30 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
31 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
32 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING | |
33 | // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
34 | // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
35 | // | |
36 | // Intel Corporation is the author of this code, and requests that all | |
37 | // problem reports or change requests be submitted to it directly at | |
38 | // http://www.intel.com/software/products/opensource/libraries/num.htm. | |
39 | // | |
40 | // History | |
41 | //============================================================== | |
42 | // 02/02/00 Initial version | |
43 | // 03/03/00 Modified to conform to C9X, and improve speed of main path | |
44 | // 03/14/00 Fixed case where x is a power of 2, and x > y, improved speed | |
45 | // 04/04/00 Unwind support added | |
46 | // 05/12/00 Fixed erroneous denormal flag setting for exponent change cases 1,3 | |
47 | // 08/15/00 Bundle added after call to __libm_error_support to properly | |
48 | // set [the previously overwritten] GR_Parameter_RESULT. | |
49 | // 09/09/00 Updated fcmp so that qnans do not raise invalid | |
50 | // 12/15/00 Corrected behavior when both args are zero to conform to C99, and | |
51 | // fixed flag settings for several cases | |
52 | // 05/20/02 Cleaned up namespace and sf0 syntax | |
53 | // 02/10/03 Reordered header: .section, .global, .proc, .align | |
54 | // 12/14/04 Added error handling on underflow. | |
55 | // | |
56 | // API | |
57 | //============================================================== | |
58 | // double nextafter( double x, double y ); | |
59 | // input floating point f8, f9 | |
60 | // output floating point f8 | |
61 | // | |
62 | // Registers used | |
63 | //============================================================== | |
64 | GR_max_pexp = r14 | |
65 | GR_min_pexp = r15 | |
66 | GR_exp = r16 | |
67 | GR_sig = r17 | |
68 | GR_lnorm_sig = r18 | |
69 | GR_sign_mask = r19 | |
70 | GR_exp_mask = r20 | |
71 | GR_sden_sig = r21 | |
72 | GR_new_sig = r22 | |
73 | GR_new_exp = r23 | |
74 | GR_lden_sig = r24 | |
75 | GR_snorm_sig = r25 | |
76 | GR_exp1 = r26 | |
77 | GR_x_exp = r27 | |
78 | GR_min_den_rexp = r28 | |
79 | // r36-39 parameters for libm_error_support | |
80 | ||
81 | GR_SAVE_B0 = r34 | |
82 | GR_SAVE_GP = r35 | |
83 | GR_SAVE_PFS = r32 | |
84 | ||
85 | GR_Parameter_X = r36 | |
86 | GR_Parameter_Y = r37 | |
87 | GR_Parameter_RESULT = r38 | |
88 | GR_Parameter_TAG = r39 | |
89 | ||
90 | FR_lnorm_sig = f10 | |
91 | FR_lnorm_exp = f11 | |
92 | FR_lnorm = f12 | |
93 | FR_sden_sig = f13 | |
94 | FR_sden_exp = f14 | |
95 | FR_sden = f15 | |
96 | FR_save_f8 = f33 | |
97 | FR_new_exp = f34 | |
98 | FR_new_sig = f35 | |
99 | FR_lden_sig = f36 | |
100 | FR_snorm_sig = f37 | |
101 | FR_exp1 = f38 | |
102 | FR_tmp = f39 | |
103 | ||
104 | // | |
105 | // Overview of operation | |
106 | //============================================================== | |
107 | // nextafter determines the next representable value | |
108 | // after x in the direction of y. | |
109 | ||
110 | ||
111 | .section .text | |
112 | GLOBAL_LIBM_ENTRY(nextafter) | |
113 | ||
114 | // Extract signexp from x | |
115 | // Is x < y ? p10 if yes, p11 if no | |
116 | // Form smallest denormal significand = ulp size | |
117 | { .mfi | |
118 | getf.exp GR_exp = f8 | |
119 | fcmp.lt.s1 p10,p11 = f8, f9 | |
120 | addl GR_sden_sig = 0x800, r0 | |
121 | } | |
122 | // Form largest normal significand 0xfffffffffffff800 | |
123 | // Form smallest normal exponent | |
124 | { .mfi | |
125 | addl GR_lnorm_sig = -0x800,r0 | |
126 | nop.f 999 | |
127 | addl GR_min_pexp = 0x0fc01, r0 ;; | |
128 | } | |
129 | // Extract significand from x | |
130 | // Is x=y? | |
131 | // Form largest normal exponent | |
132 | { .mfi | |
133 | getf.sig GR_sig = f8 | |
134 | fcmp.eq.s0 p6,p0 = f8, f9 | |
135 | addl GR_max_pexp = 0x103fe, r0 | |
136 | } | |
137 | // Move largest normal significand to fp reg for special cases | |
138 | { .mfi | |
139 | setf.sig FR_lnorm_sig = GR_lnorm_sig | |
140 | nop.f 999 | |
141 | addl GR_sign_mask = 0x20000, r0 ;; | |
142 | } | |
143 | ||
144 | // Move smallest denormal significand and signexp to fp regs | |
145 | // Is x=nan? | |
146 | // Set p12 and p13 based on whether significand increases or decreases | |
147 | // It increases (p12 set) if x<y and x>=0 or if x>y and x<0 | |
148 | // It decreases (p13 set) if x<y and x<0 or if x>y and x>=0 | |
149 | { .mfi | |
150 | setf.sig FR_sden_sig = GR_sden_sig | |
151 | fclass.m p8,p0 = f8, 0xc3 | |
152 | (p10) cmp.lt p12,p13 = GR_exp, GR_sign_mask | |
153 | } | |
154 | { .mfi | |
155 | setf.exp FR_sden_exp = GR_min_pexp | |
156 | (p11) cmp.ge p12,p13 = GR_exp, GR_sign_mask ;; | |
157 | } | |
158 | ||
159 | .pred.rel "mutex",p12,p13 | |
160 | ||
161 | // Form expected new significand, adding or subtracting 1 ulp increment | |
162 | // If x=y set result to y | |
163 | // Form smallest normal significand and largest denormal significand | |
164 | { .mfi | |
165 | (p12) add GR_new_sig = GR_sig, GR_sden_sig | |
166 | (p6) fmerge.s f8=f9,f9 | |
167 | dep.z GR_snorm_sig = 1,63,1 // 0x8000000000000000 | |
168 | } | |
169 | { .mlx | |
170 | (p13) sub GR_new_sig = GR_sig, GR_sden_sig | |
171 | movl GR_lden_sig = 0x7ffffffffffff800 ;; | |
172 | } | |
173 | ||
174 | // Move expected result significand and signexp to fp regs | |
175 | // Is y=nan? | |
176 | // Form new exponent in case result exponent needs incrementing or decrementing | |
177 | { .mfi | |
178 | setf.exp FR_new_exp = GR_exp | |
179 | fclass.m p9,p0 = f9, 0xc3 | |
180 | (p12) add GR_exp1 = 1, GR_exp | |
181 | } | |
182 | { .mib | |
183 | setf.sig FR_new_sig = GR_new_sig | |
184 | (p13) add GR_exp1 = -1, GR_exp | |
185 | (p6) br.ret.spnt b0 ;; // Exit if x=y | |
186 | } | |
187 | ||
188 | // Move largest normal signexp to fp reg for special cases | |
189 | // Is x=zero? | |
190 | { .mfi | |
191 | setf.exp FR_lnorm_exp = GR_max_pexp | |
192 | fclass.m p7,p0 = f8, 0x7 | |
193 | nop.i 999 | |
194 | } | |
195 | { .mfb | |
196 | nop.m 999 | |
197 | (p8) fma.s0 f8 = f8,f1,f9 | |
198 | (p8) br.ret.spnt b0 ;; // Exit if x=nan | |
199 | } | |
200 | ||
201 | // Move exp+-1 and smallest normal significand to fp regs for special cases | |
202 | // Is x=inf? | |
203 | { .mfi | |
204 | setf.exp FR_exp1 = GR_exp1 | |
205 | fclass.m p6,p0 = f8, 0x23 | |
206 | addl GR_exp_mask = 0x1ffff, r0 | |
207 | } | |
208 | { .mfb | |
209 | setf.sig FR_snorm_sig = GR_snorm_sig | |
210 | (p9) fma.s0 f8 = f8,f1,f9 | |
211 | (p9) br.ret.spnt b0 ;; // Exit if y=nan | |
212 | } | |
213 | ||
214 | // Move largest denormal significand to fp regs for special cases | |
215 | // Save x | |
216 | { .mfb | |
217 | setf.sig FR_lden_sig = GR_lden_sig | |
218 | mov FR_save_f8 = f8 | |
219 | (p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0 | |
220 | } | |
221 | ||
222 | // Mask off the sign to get x_exp | |
223 | { .mfb | |
224 | and GR_x_exp = GR_exp_mask, GR_exp | |
225 | nop.f 999 | |
226 | (p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf | |
227 | } | |
228 | ||
229 | // Check 6 special cases when significand rolls over: | |
230 | // 1 sig size incr, x_sig=max_sig, x_exp < max_exp | |
231 | // Set p6, result is sig=min_sig, exp++ | |
232 | // 2 sig size incr, x_sig=max_sig, x_exp >= max_exp | |
233 | // Set p7, result is inf, signal overflow | |
234 | // 3 sig size decr, x_sig=min_sig, x_exp > min_exp | |
235 | // Set p8, result is sig=max_sig, exp-- | |
236 | // 4 sig size decr, x_sig=min_sig, x_exp = min_exp | |
237 | // Set p9, result is sig=max_den_sig, exp same, signal underflow and inexact | |
238 | // 5 sig size decr, x_sig=min_den_sig, x_exp = min_exp | |
239 | // Set p10, result is zero, sign of x, signal underflow and inexact | |
240 | // 6 sig size decr, x_sig=min_sig, x_exp < min_exp | |
241 | // Set p14, result is zero, sign of x, signal underflow and inexact | |
242 | // | |
243 | // Form exponent of smallest double denormal (if normalized register format) | |
244 | { .mmi | |
245 | adds GR_min_den_rexp = -52, GR_min_pexp | |
246 | (p12) cmp.eq.unc p6,p0 = GR_new_sig, r0 | |
247 | (p13) cmp.eq.unc p8,p10 = GR_new_sig, GR_lden_sig ;; | |
248 | } | |
249 | ||
250 | { .mmi | |
251 | (p6) cmp.lt.unc p6,p7 = GR_x_exp, GR_max_pexp | |
252 | (p8) cmp.gt.unc p8,p9 = GR_x_exp, GR_min_pexp | |
253 | (p10) cmp.eq.unc p10,p0 = GR_new_sig, r0 ;; | |
254 | } | |
255 | ||
256 | // Create small normal in case need to generate underflow flag | |
257 | { .mfi | |
258 | (p10) cmp.le.unc p10,p0 = GR_x_exp, GR_min_pexp | |
259 | fmerge.se FR_tmp = FR_sden_exp, FR_lnorm_sig | |
260 | (p9) cmp.gt.unc p9,p14 = GR_x_exp, GR_min_den_rexp | |
261 | } | |
262 | // Branch if cases 1, 2, 3 | |
263 | { .bbb | |
264 | (p6) br.cond.spnt NEXT_EXPUP | |
265 | (p7) br.cond.spnt NEXT_OVERFLOW | |
266 | (p8) br.cond.spnt NEXT_EXPDOWN ;; | |
267 | } | |
268 | ||
269 | // Branch if cases 4, 5, 6 | |
270 | { .bbb | |
271 | (p9) br.cond.spnt NEXT_NORM_TO_DENORM | |
272 | (p10) br.cond.spnt NEXT_UNDERFLOW_TO_ZERO | |
273 | (p14) br.cond.spnt NEXT_UNDERFLOW_TO_ZERO ;; | |
274 | } | |
275 | ||
276 | // Here if no special cases | |
277 | // Set p6 if result will be a denormal, so can force underflow flag | |
278 | // Case 1: x_exp=min_exp, x_sig=unnormalized | |
279 | // Case 2: x_exp<min_exp | |
280 | { .mfi | |
281 | cmp.lt p6,p7 = GR_x_exp, GR_min_pexp | |
282 | fmerge.se f8 = FR_new_exp, FR_new_sig | |
283 | nop.i 999 ;; | |
284 | } | |
285 | ||
286 | { .mfi | |
287 | nop.m 999 | |
288 | nop.f 999 | |
289 | (p7) tbit.z p6,p0 = GR_new_sig, 63 ;; | |
290 | } | |
291 | ||
292 | NEXT_COMMON_FINISH: | |
293 | // Force underflow and inexact if denormal result | |
294 | { .mfi | |
295 | nop.m 999 | |
296 | (p6) fma.d.s0 FR_tmp = FR_tmp,FR_tmp,f0 | |
297 | nop.i 999 | |
298 | } | |
299 | { .mfb | |
300 | nop.m 999 | |
301 | fnorm.d.s0 f8 = f8 // Final normalization to result precision | |
302 | (p6) br.cond.spnt NEXT_UNDERFLOW ;; | |
303 | } | |
304 | ||
305 | { .mfb | |
306 | nop.m 999 | |
307 | nop.f 999 | |
308 | br.ret.sptk b0;; | |
309 | } | |
310 | ||
311 | //Special cases | |
312 | NEXT_EXPUP: | |
313 | { .mfb | |
314 | cmp.lt p6,p7 = GR_x_exp, GR_min_pexp | |
315 | fmerge.se f8 = FR_exp1, FR_snorm_sig | |
316 | br.cond.sptk NEXT_COMMON_FINISH ;; | |
317 | } | |
318 | ||
319 | NEXT_EXPDOWN: | |
320 | { .mfb | |
321 | cmp.lt p6,p7 = GR_x_exp, GR_min_pexp | |
322 | fmerge.se f8 = FR_exp1, FR_lnorm_sig | |
323 | br.cond.sptk NEXT_COMMON_FINISH ;; | |
324 | } | |
325 | ||
326 | NEXT_NORM_TO_DENORM: | |
327 | { .mfi | |
328 | nop.m 999 | |
329 | fmerge.se f8 = FR_new_exp, FR_lden_sig | |
330 | nop.i 999 | |
331 | } | |
332 | // Force underflow and inexact if denormal result | |
333 | { .mfb | |
334 | nop.m 999 | |
335 | fma.d.s0 FR_tmp = FR_tmp,FR_tmp,f0 | |
336 | br.cond.sptk NEXT_UNDERFLOW ;; | |
337 | } | |
338 | ||
339 | NEXT_UNDERFLOW_TO_ZERO: | |
340 | { .mfb | |
341 | cmp.eq p6,p0 = r0,r0 | |
342 | fmerge.s f8 = FR_save_f8,f0 | |
343 | br.cond.sptk NEXT_COMMON_FINISH ;; | |
344 | } | |
345 | ||
346 | NEXT_INF: | |
347 | // Here if f8 is +- infinity | |
348 | // INF | |
349 | // if f8 is +inf, no matter what y is return largest double | |
350 | // if f8 is -inf, no matter what y is return -largest double | |
351 | ||
352 | { .mfi | |
353 | nop.m 999 | |
354 | fmerge.se FR_lnorm = FR_lnorm_exp,FR_lnorm_sig | |
355 | nop.i 999 ;; | |
356 | } | |
357 | ||
358 | { .mfb | |
359 | nop.m 999 | |
360 | fmerge.s f8 = f8,FR_lnorm | |
361 | br.ret.sptk b0 ;; | |
362 | } | |
363 | ||
364 | NEXT_ZERO: | |
365 | ||
366 | // Here if f8 is +- zero | |
367 | // ZERO | |
368 | // if f8 is zero and y is +, return + smallest double denormal | |
369 | // if f8 is zero and y is -, return - smallest double denormal | |
370 | ||
371 | { .mfi | |
372 | nop.m 999 | |
373 | fmerge.se FR_sden = FR_sden_exp,FR_sden_sig | |
374 | nop.i 999 ;; | |
375 | } | |
376 | ||
377 | // Create small normal to generate underflow flag | |
378 | { .mfi | |
379 | nop.m 999 | |
380 | fmerge.se FR_tmp = FR_sden_exp, FR_lnorm_sig | |
381 | nop.i 999 ;; | |
382 | } | |
383 | ||
384 | // Add correct sign from direction arg | |
385 | { .mfi | |
386 | nop.m 999 | |
387 | fmerge.s f8 = f9,FR_sden | |
388 | nop.i 999 ;; | |
389 | } | |
390 | ||
391 | // Force underflow and inexact flags | |
392 | { .mfb | |
393 | nop.m 999 | |
394 | fma.d.s0 FR_tmp = FR_tmp,FR_tmp,f0 | |
395 | br.cond.sptk NEXT_UNDERFLOW ;; | |
396 | } | |
397 | ||
398 | NEXT_UNDERFLOW: | |
399 | // Here if result is a denorm, or input is finite and result is zero | |
400 | // Call error support to report possible range error | |
401 | { .mib | |
402 | alloc r32=ar.pfs,2,2,4,0 | |
403 | mov GR_Parameter_TAG = 268 // Error code | |
404 | br.cond.sptk __libm_error_region // Branch to error call | |
405 | } | |
406 | ;; | |
407 | ||
408 | NEXT_OVERFLOW: | |
409 | // Here if input is finite, but result will be infinite | |
410 | // Use frcpa to generate infinity of correct sign | |
411 | // Call error support to report possible range error | |
412 | { .mfi | |
413 | alloc r32=ar.pfs,2,2,4,0 | |
414 | frcpa.s1 f8,p6 = FR_save_f8, f0 | |
415 | nop.i 999 ;; | |
416 | } | |
417 | ||
418 | // Create largest double | |
419 | { .mfi | |
420 | nop.m 999 | |
421 | fmerge.se FR_lnorm = FR_lnorm_exp,FR_lnorm_sig | |
422 | nop.i 999 ;; | |
423 | } | |
424 | ||
425 | // Force overflow and inexact flags to be set | |
426 | { .mfb | |
427 | mov GR_Parameter_TAG = 154 // Error code | |
428 | fma.d.s0 FR_tmp = FR_lnorm,FR_lnorm,f0 | |
429 | br.cond.sptk __libm_error_region // Branch to error call | |
430 | } | |
431 | ;; | |
432 | ||
433 | GLOBAL_LIBM_END(nextafter) | |
434 | ||
435 | ||
436 | LOCAL_LIBM_ENTRY(__libm_error_region) | |
437 | .prologue | |
438 | ||
439 | // (1) | |
440 | { .mfi | |
441 | add GR_Parameter_Y=-32,sp // Parameter 2 value | |
442 | nop.f 0 | |
443 | .save ar.pfs,GR_SAVE_PFS | |
444 | mov GR_SAVE_PFS=ar.pfs // Save ar.pfs | |
445 | } | |
446 | { .mfi | |
447 | .fframe 64 | |
448 | add sp=-64,sp // Create new stack | |
449 | nop.f 0 | |
450 | mov GR_SAVE_GP=gp // Save gp | |
451 | };; | |
452 | ||
453 | ||
454 | // (2) | |
455 | { .mmi | |
456 | stfd [GR_Parameter_Y] = f9,16 // STORE Parameter 2 on stack | |
457 | add GR_Parameter_X = 16,sp // Parameter 1 address | |
458 | .save b0, GR_SAVE_B0 | |
459 | mov GR_SAVE_B0=b0 // Save b0 | |
460 | };; | |
461 | ||
462 | .body | |
463 | // (3) | |
464 | { .mib | |
465 | stfd [GR_Parameter_X] = FR_save_f8 // STORE Parameter 1 on stack | |
466 | add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address | |
467 | nop.b 0 | |
468 | } | |
469 | { .mib | |
470 | stfd [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack | |
471 | add GR_Parameter_Y = -16,GR_Parameter_Y | |
472 | br.call.sptk b0=__libm_error_support# // Call error handling function | |
473 | };; | |
474 | { .mmi | |
475 | nop.m 0 | |
476 | nop.m 0 | |
477 | add GR_Parameter_RESULT = 48,sp | |
478 | };; | |
479 | ||
480 | // (4) | |
481 | { .mmi | |
482 | ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack | |
483 | .restore sp | |
484 | add sp = 64,sp // Restore stack pointer | |
485 | mov b0 = GR_SAVE_B0 // Restore return address | |
486 | };; | |
487 | { .mib | |
488 | mov gp = GR_SAVE_GP // Restore gp | |
489 | mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs | |
490 | br.ret.sptk b0 // Return | |
491 | };; | |
492 | ||
493 | LOCAL_LIBM_END(__libm_error_region) | |
494 | ||
495 | ||
496 | .type __libm_error_support#,@function | |
497 | .global __libm_error_support# | |
498 |