]>
Commit | Line | Data |
---|---|---|
d5efd131 MF |
1 | .file "expl_m1.s" |
2 | ||
3 | ||
4 | // Copyright (c) 2000 - 2003, Intel Corporation | |
5 | // All rights reserved. | |
6 | // | |
7 | // Contributed 2000 by the Intel Numerics Group, Intel Corporation | |
8 | // | |
9 | // Redistribution and use in source and binary forms, with or without | |
10 | // modification, are permitted provided that the following conditions are | |
11 | // met: | |
12 | // | |
13 | // * Redistributions of source code must retain the above copyright | |
14 | // notice, this list of conditions and the following disclaimer. | |
15 | // | |
16 | // * Redistributions in binary form must reproduce the above copyright | |
17 | // notice, this list of conditions and the following disclaimer in the | |
18 | // documentation and/or other materials provided with the distribution. | |
19 | // | |
20 | // * The name of Intel Corporation may not be used to endorse or promote | |
21 | // products derived from this software without specific prior written | |
22 | // permission. | |
23 | ||
0347518d MF |
24 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
25 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
d5efd131 | 26 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
0347518d | 27 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS |
d5efd131 | 28 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
0347518d MF |
29 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
30 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
31 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
d5efd131 | 32 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING |
0347518d MF |
33 | // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
34 | // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
35 | // | |
d5efd131 | 36 | // Intel Corporation is the author of this code, and requests that all |
0347518d | 37 | // problem reports or change requests be submitted to it directly at |
d5efd131 MF |
38 | // http://www.intel.com/software/products/opensource/libraries/num.htm. |
39 | // | |
40 | // History | |
41 | //============================================================== | |
42 | // 02/02/00 Initial Version | |
43 | // 04/04/00 Unwind support added | |
44 | // 08/15/00 Bundle added after call to __libm_error_support to properly | |
45 | // set [the previously overwritten] GR_Parameter_RESULT. | |
46 | // 07/07/01 Improved speed of all paths | |
47 | // 05/20/02 Cleaned up namespace and sf0 syntax | |
48 | // 02/10/03 Reordered header: .section, .global, .proc, .align; | |
49 | // used data8 for long double table values | |
50 | // 03/11/03 Improved accuracy and performance, corrected missing inexact flags | |
51 | // 04/17/03 Eliminated misplaced and unused data label | |
52 | // 12/15/03 Eliminated call to error support on expm1l underflow | |
53 | // | |
0347518d | 54 | //********************************************************************* |
d5efd131 MF |
55 | // |
56 | // Function: Combined expl(x) and expm1l(x), where | |
0347518d | 57 | // x |
d5efd131 MF |
58 | // expl(x) = e , for double-extended precision x values |
59 | // x | |
60 | // expm1l(x) = e - 1 for double-extended precision x values | |
61 | // | |
0347518d | 62 | //********************************************************************* |
d5efd131 MF |
63 | // |
64 | // Resources Used: | |
65 | // | |
0347518d MF |
66 | // Floating-Point Registers: f8 (Input and Return Value) |
67 | // f9-f15,f32-f77 | |
d5efd131 | 68 | // |
0347518d | 69 | // General Purpose Registers: |
d5efd131 MF |
70 | // r14-r38 |
71 | // r35-r38 (Used to pass arguments to error handling routine) | |
0347518d | 72 | // |
d5efd131 MF |
73 | // Predicate Registers: p6-p15 |
74 | // | |
0347518d | 75 | //********************************************************************* |
d5efd131 MF |
76 | // |
77 | // IEEE Special Conditions: | |
78 | // | |
0347518d | 79 | // Denormal fault raised on denormal inputs |
d5efd131 MF |
80 | // Overflow exceptions raised when appropriate for exp and expm1 |
81 | // Underflow exceptions raised when appropriate for exp and expm1 | |
82 | // (Error Handling Routine called for overflow and Underflow) | |
0347518d | 83 | // Inexact raised when appropriate by algorithm |
d5efd131 MF |
84 | // |
85 | // exp(inf) = inf | |
86 | // exp(-inf) = +0 | |
87 | // exp(SNaN) = QNaN | |
88 | // exp(QNaN) = QNaN | |
89 | // exp(0) = 1 | |
90 | // exp(EM_special Values) = QNaN | |
91 | // exp(inf) = inf | |
0347518d | 92 | // expm1(-inf) = -1 |
d5efd131 MF |
93 | // expm1(SNaN) = QNaN |
94 | // expm1(QNaN) = QNaN | |
95 | // expm1(0) = 0 | |
96 | // expm1(EM_special Values) = QNaN | |
0347518d MF |
97 | // |
98 | //********************************************************************* | |
d5efd131 MF |
99 | // |
100 | // Implementation and Algorithm Notes: | |
101 | // | |
102 | // ker_exp_64( in_FR : X, | |
103 | // out_FR : Y_hi, | |
104 | // out_FR : Y_lo, | |
105 | // out_FR : scale, | |
106 | // out_PR : Safe ) | |
107 | // | |
108 | // On input, X is in register format | |
109 | // p6 for exp, | |
110 | // p7 for expm1, | |
111 | // | |
0347518d | 112 | // On output, |
d5efd131 MF |
113 | // |
114 | // scale*(Y_hi + Y_lo) approximates exp(X) if exp | |
115 | // scale*(Y_hi + Y_lo) approximates exp(X)-1 if expm1 | |
116 | // | |
117 | // The accuracy is sufficient for a highly accurate 64 sig. | |
0347518d MF |
118 | // bit implementation. Safe is set if there is no danger of |
119 | // overflow/underflow when the result is composed from scale, | |
120 | // Y_hi and Y_lo. Thus, we can have a fast return if Safe is set. | |
121 | // Otherwise, one must prepare to handle the possible exception | |
122 | // appropriately. Note that SAFE not set (false) does not mean | |
d5efd131 MF |
123 | // that overflow/underflow will occur; only the setting of SAFE |
124 | // guarantees the opposite. | |
125 | // | |
0347518d | 126 | // **** High Level Overview **** |
d5efd131 MF |
127 | // |
128 | // The method consists of three cases. | |
0347518d | 129 | // |
d5efd131 MF |
130 | // If |X| < Tiny use case exp_tiny; |
131 | // else if |X| < 2^(-m) use case exp_small; m=12 for exp, m=7 for expm1 | |
132 | // else use case exp_regular; | |
133 | // | |
134 | // Case exp_tiny: | |
135 | // | |
0347518d | 136 | // 1 + X can be used to approximate exp(X) |
d5efd131 MF |
137 | // X + X^2/2 can be used to approximate exp(X) - 1 |
138 | // | |
139 | // Case exp_small: | |
140 | // | |
0347518d | 141 | // Here, exp(X) and exp(X) - 1 can all be |
d5efd131 MF |
142 | // approximated by a relatively simple polynomial. |
143 | // | |
144 | // This polynomial resembles the truncated Taylor series | |
145 | // | |
146 | // exp(w) = 1 + w + w^2/2! + w^3/3! + ... + w^n/n! | |
147 | // | |
148 | // Case exp_regular: | |
149 | // | |
150 | // Here we use a table lookup method. The basic idea is that in | |
151 | // order to compute exp(X), we accurately decompose X into | |
152 | // | |
153 | // X = N * log(2)/(2^12) + r, |r| <= log(2)/2^13. | |
154 | // | |
155 | // Hence | |
156 | // | |
157 | // exp(X) = 2^( N / 2^12 ) * exp(r). | |
158 | // | |
159 | // The value 2^( N / 2^12 ) is obtained by simple combinations | |
160 | // of values calculated beforehand and stored in table; exp(r) | |
161 | // is approximated by a short polynomial because |r| is small. | |
162 | // | |
163 | // We elaborate this method in 4 steps. | |
164 | // | |
165 | // Step 1: Reduction | |
166 | // | |
167 | // The value 2^12/log(2) is stored as a double-extended number | |
168 | // L_Inv. | |
169 | // | |
170 | // N := round_to_nearest_integer( X * L_Inv ) | |
171 | // | |
172 | // The value log(2)/2^12 is stored as two numbers L_hi and L_lo so | |
173 | // that r can be computed accurately via | |
174 | // | |
175 | // r := (X - N*L_hi) - N*L_lo | |
176 | // | |
177 | // We pick L_hi such that N*L_hi is representable in 64 sig. bits | |
0347518d MF |
178 | // and thus the FMA X - N*L_hi is error free. So r is the |
179 | // 1 rounding error from an exact reduction with respect to | |
180 | // | |
d5efd131 MF |
181 | // L_hi + L_lo. |
182 | // | |
183 | // In particular, L_hi has 30 significant bit and can be stored | |
184 | // as a double-precision number; L_lo has 64 significant bits and | |
185 | // stored as a double-extended number. | |
186 | // | |
187 | // Step 2: Approximation | |
188 | // | |
189 | // exp(r) - 1 is approximated by a short polynomial of the form | |
0347518d | 190 | // |
d5efd131 MF |
191 | // r + A_1 r^2 + A_2 r^3 + A_3 r^4 . |
192 | // | |
0347518d | 193 | // Step 3: Composition from Table Values |
d5efd131 MF |
194 | // |
195 | // The value 2^( N / 2^12 ) can be composed from a couple of tables | |
196 | // of precalculated values. First, express N as three integers | |
197 | // K, M_1, and M_2 as | |
198 | // | |
199 | // N = K * 2^12 + M_1 * 2^6 + M_2 | |
200 | // | |
201 | // Where 0 <= M_1, M_2 < 2^6; and K can be positive or negative. | |
202 | // When N is represented in 2's complement, M_2 is simply the 6 | |
203 | // lsb's, M_1 is the next 6, and K is simply N shifted right | |
204 | // arithmetically (sign extended) by 12 bits. | |
205 | // | |
0347518d MF |
206 | // Now, 2^( N / 2^12 ) is simply |
207 | // | |
d5efd131 MF |
208 | // 2^K * 2^( M_1 / 2^6 ) * 2^( M_2 / 2^12 ) |
209 | // | |
210 | // Clearly, 2^K needs no tabulation. The other two values are less | |
211 | // trivial because if we store each accurately to more than working | |
212 | // precision, than its product is too expensive to calculate. We | |
213 | // use the following method. | |
214 | // | |
215 | // Define two mathematical values, delta_1 and delta_2, implicitly | |
216 | // such that | |
217 | // | |
0347518d | 218 | // T_1 = exp( [M_1 log(2)/2^6] - delta_1 ) |
d5efd131 MF |
219 | // T_2 = exp( [M_2 log(2)/2^12] - delta_2 ) |
220 | // | |
221 | // are representable as 24 significant bits. To illustrate the idea, | |
0347518d | 222 | // we show how we define delta_1: |
d5efd131 MF |
223 | // |
224 | // T_1 := round_to_24_bits( exp( M_1 log(2)/2^6 ) ) | |
0347518d | 225 | // delta_1 = (M_1 log(2)/2^6) - log( T_1 ) |
d5efd131 MF |
226 | // |
227 | // The last equality means mathematical equality. We then tabulate | |
228 | // | |
229 | // W_1 := exp(delta_1) - 1 | |
230 | // W_2 := exp(delta_2) - 1 | |
231 | // | |
232 | // Both in double precision. | |
233 | // | |
234 | // From the tabulated values T_1, T_2, W_1, W_2, we compose the values | |
235 | // T and W via | |
236 | // | |
237 | // T := T_1 * T_2 ...exactly | |
0347518d | 238 | // W := W_1 + (1 + W_1)*W_2 |
d5efd131 MF |
239 | // |
240 | // W approximates exp( delta ) - 1 where delta = delta_1 + delta_2. | |
241 | // The mathematical product of T and (W+1) is an accurate representation | |
242 | // of 2^(M_1/2^6) * 2^(M_2/2^12). | |
243 | // | |
244 | // Step 4. Reconstruction | |
245 | // | |
0347518d | 246 | // Finally, we can reconstruct exp(X), exp(X) - 1. |
d5efd131 MF |
247 | // Because |
248 | // | |
0347518d | 249 | // X = K * log(2) + (M_1*log(2)/2^6 - delta_1) |
d5efd131 MF |
250 | // + (M_2*log(2)/2^12 - delta_2) |
251 | // + delta_1 + delta_2 + r ...accurately | |
252 | // We have | |
253 | // | |
254 | // exp(X) ~=~ 2^K * ( T + T*[exp(delta_1+delta_2+r) - 1] ) | |
255 | // ~=~ 2^K * ( T + T*[exp(delta + r) - 1] ) | |
0347518d | 256 | // ~=~ 2^K * ( T + T*[(exp(delta)-1) |
d5efd131 MF |
257 | // + exp(delta)*(exp(r)-1)] ) |
258 | // ~=~ 2^K * ( T + T*( W + (1+W)*poly(r) ) ) | |
259 | // ~=~ 2^K * ( Y_hi + Y_lo ) | |
260 | // | |
261 | // where Y_hi = T and Y_lo = T*(W + (1+W)*poly(r)) | |
262 | // | |
263 | // For exp(X)-1, we have | |
264 | // | |
265 | // exp(X)-1 ~=~ 2^K * ( Y_hi + Y_lo ) - 1 | |
266 | // ~=~ 2^K * ( Y_hi + Y_lo - 2^(-K) ) | |
267 | // | |
0347518d | 268 | // and we combine Y_hi + Y_lo - 2^(-N) into the form of two |
d5efd131 MF |
269 | // numbers Y_hi + Y_lo carefully. |
270 | // | |
271 | // **** Algorithm Details **** | |
272 | // | |
273 | // A careful algorithm must be used to realize the mathematical ideas | |
274 | // accurately. We describe each of the three cases. We assume SAFE | |
275 | // is preset to be TRUE. | |
276 | // | |
277 | // Case exp_tiny: | |
278 | // | |
0347518d MF |
279 | // The important points are to ensure an accurate result under |
280 | // different rounding directions and a correct setting of the SAFE | |
d5efd131 MF |
281 | // flag. |
282 | // | |
283 | // If expm1 is 1, then | |
284 | // SAFE := False ...possibility of underflow | |
285 | // Scale := 1.0 | |
286 | // Y_hi := X | |
287 | // Y_lo := 2^(-17000) | |
288 | // Else | |
289 | // Scale := 1.0 | |
290 | // Y_hi := 1.0 | |
291 | // Y_lo := X ...for different rounding modes | |
292 | // Endif | |
293 | // | |
294 | // Case exp_small: | |
295 | // | |
296 | // Here we compute a simple polynomial. To exploit parallelism, we split | |
297 | // the polynomial into several portions. | |
298 | // | |
0347518d | 299 | // Let r = X |
d5efd131 MF |
300 | // |
301 | // If exp ...i.e. exp( argument ) | |
302 | // | |
0347518d | 303 | // rsq := r * r; |
d5efd131 MF |
304 | // r4 := rsq*rsq |
305 | // poly_lo := P_3 + r*(P_4 + r*(P_5 + r*P_6)) | |
306 | // poly_hi := r + rsq*(P_1 + r*P_2) | |
307 | // Y_lo := poly_hi + r4 * poly_lo | |
308 | // Y_hi := 1.0 | |
309 | // Scale := 1.0 | |
310 | // | |
311 | // Else ...i.e. exp( argument ) - 1 | |
312 | // | |
313 | // rsq := r * r | |
314 | // r4 := rsq * rsq | |
315 | // poly_lo := Q_7 + r*(Q_8 + r*Q_9)) | |
316 | // poly_med:= Q_3 + r*Q_4 + rsq*(Q_5 + r*Q_6) | |
317 | // poly_med:= poly_med + r4*poly_lo | |
318 | // poly_hi := Q_1 + r*Q_2 | |
319 | // Y_lo := rsq*(poly_hi + rsq*poly_lo) | |
320 | // Y_hi := X | |
321 | // Scale := 1.0 | |
322 | // | |
323 | // Endif | |
324 | // | |
325 | // Case exp_regular: | |
326 | // | |
327 | // The previous description contain enough information except the | |
328 | // computation of poly and the final Y_hi and Y_lo in the case for | |
329 | // exp(X)-1. | |
330 | // | |
331 | // The computation of poly for Step 2: | |
332 | // | |
333 | // rsq := r*r | |
334 | // poly := r + rsq*(A_1 + r*(A_2 + r*A_3)) | |
335 | // | |
336 | // For the case exp(X) - 1, we need to incorporate 2^(-K) into | |
337 | // Y_hi and Y_lo at the end of Step 4. | |
338 | // | |
339 | // If K > 10 then | |
340 | // Y_lo := Y_lo - 2^(-K) | |
341 | // Else | |
342 | // If K < -10 then | |
343 | // Y_lo := Y_hi + Y_lo | |
344 | // Y_hi := -2^(-K) | |
345 | // Else | |
346 | // Y_hi := Y_hi - 2^(-K) | |
347 | // End If | |
348 | // End If | |
349 | // | |
350 | //======================================================= | |
351 | // General Purpose Registers | |
352 | // | |
353 | GR_ad_Arg = r14 | |
354 | GR_ad_A = r15 | |
355 | GR_sig_inv_ln2 = r15 | |
356 | GR_rshf_2to51 = r16 | |
357 | GR_ad_PQ = r16 | |
358 | GR_ad_Q = r16 | |
359 | GR_signexp_x = r17 | |
360 | GR_exp_x = r17 | |
361 | GR_small_exp = r18 | |
362 | GR_rshf = r18 | |
363 | GR_exp_mask = r19 | |
364 | GR_ad_W1 = r20 | |
365 | GR_exp_2tom51 = r20 | |
366 | GR_ad_W2 = r21 | |
367 | GR_exp_underflow = r21 | |
368 | GR_M2 = r22 | |
369 | GR_huge_exp = r22 | |
370 | GR_M1 = r23 | |
371 | GR_huge_signif = r23 | |
372 | GR_K = r24 | |
373 | GR_one = r24 | |
374 | GR_minus_one = r24 | |
375 | GR_exp_bias = r25 | |
376 | GR_ad_Limits = r26 | |
377 | GR_N_fix = r26 | |
378 | GR_exp_2_mk = r26 | |
379 | GR_ad_P = r27 | |
380 | GR_exp_2_k = r27 | |
381 | GR_big_expo_neg = r28 | |
382 | GR_very_small_exp = r29 | |
383 | GR_exp_half = r29 | |
384 | GR_ad_T1 = r30 | |
385 | GR_ad_T2 = r31 | |
386 | ||
387 | GR_SAVE_PFS = r32 | |
388 | GR_SAVE_B0 = r33 | |
389 | GR_SAVE_GP = r34 | |
390 | GR_Parameter_X = r35 | |
391 | GR_Parameter_Y = r36 | |
392 | GR_Parameter_RESULT = r37 | |
0347518d | 393 | GR_Parameter_TAG = r38 |
d5efd131 MF |
394 | |
395 | // Floating Point Registers | |
396 | // | |
397 | FR_norm_x = f9 | |
398 | FR_RSHF_2TO51 = f10 | |
399 | FR_INV_LN2_2TO63 = f11 | |
400 | FR_W_2TO51_RSH = f12 | |
401 | FR_2TOM51 = f13 | |
402 | FR_RSHF = f14 | |
403 | FR_Y_hi = f34 | |
404 | FR_Y_lo = f35 | |
405 | FR_scale = f36 | |
406 | FR_tmp = f37 | |
407 | FR_float_N = f38 | |
408 | FR_N_signif = f39 | |
409 | FR_L_hi = f40 | |
410 | FR_L_lo = f41 | |
411 | FR_r = f42 | |
412 | FR_W1 = f43 | |
413 | FR_T1 = f44 | |
414 | FR_W2 = f45 | |
415 | FR_T2 = f46 | |
416 | FR_W1_p1 = f47 | |
417 | FR_rsq = f48 | |
418 | FR_A2 = f49 | |
419 | FR_r4 = f50 | |
420 | FR_A3 = f51 | |
421 | FR_poly = f52 | |
422 | FR_T = f53 | |
423 | FR_W = f54 | |
424 | FR_Wp1 = f55 | |
425 | FR_p21 = f59 | |
426 | FR_p210 = f59 | |
427 | FR_p65 = f60 | |
428 | FR_p654 = f60 | |
429 | FR_p6543 = f60 | |
430 | FR_2_mk = f61 | |
431 | FR_P4Q7 = f61 | |
432 | FR_P4 = f61 | |
433 | FR_Q7 = f61 | |
434 | FR_P3Q6 = f62 | |
435 | FR_P3 = f62 | |
436 | FR_Q6 = f62 | |
437 | FR_q65 = f62 | |
438 | FR_q6543 = f62 | |
439 | FR_P2Q5 = f63 | |
440 | FR_P2 = f63 | |
441 | FR_Q5 = f63 | |
442 | FR_P1Q4 = f64 | |
443 | FR_P1 = f64 | |
444 | FR_Q4 = f64 | |
445 | FR_q43 = f64 | |
446 | FR_Q3 = f65 | |
447 | FR_Q2 = f66 | |
448 | FR_q21 = f66 | |
449 | FR_Q1 = f67 | |
450 | FR_A1 = f68 | |
451 | FR_P6Q9 = f68 | |
452 | FR_P6 = f68 | |
453 | FR_Q9 = f68 | |
454 | FR_P5Q8 = f69 | |
455 | FR_P5 = f69 | |
456 | FR_Q8 = f69 | |
457 | FR_q987 = f69 | |
458 | FR_q98 = f69 | |
459 | FR_q9876543 = f69 | |
460 | FR_min_oflow_x = f70 | |
461 | FR_huge_exp = f70 | |
462 | FR_zero_uflow_x = f71 | |
463 | FR_huge_signif = f71 | |
464 | FR_huge = f72 | |
465 | FR_small = f72 | |
466 | FR_half = f73 | |
467 | FR_T_scale = f74 | |
468 | FR_result_lo = f75 | |
469 | FR_W_T_scale = f76 | |
470 | FR_Wp1_T_scale = f77 | |
471 | FR_ftz = f77 | |
472 | FR_half_x = f77 | |
473 | // | |
474 | ||
475 | FR_X = f9 | |
476 | FR_Y = f0 | |
477 | FR_RESULT = f15 | |
478 | ||
479 | // ************* DO NOT CHANGE ORDER OF THESE TABLES ******************** | |
480 | ||
481 | // double-extended 1/ln(2) | |
482 | // 3fff b8aa 3b29 5c17 f0bb be87fed0691d3e88 | |
0347518d | 483 | // 3fff b8aa 3b29 5c17 f0bc |
d5efd131 MF |
484 | // For speed the significand will be loaded directly with a movl and setf.sig |
485 | // and the exponent will be bias+63 instead of bias+0. Thus subsequent | |
486 | // computations need to scale appropriately. | |
0347518d | 487 | // The constant 2^12/ln(2) is needed for the computation of N. This is also |
d5efd131 MF |
488 | // obtained by scaling the computations. |
489 | // | |
0347518d MF |
490 | // Two shifting constants are loaded directly with movl and setf.d. |
491 | // 1. RSHF_2TO51 = 1.1000..00 * 2^(63-12) | |
d5efd131 MF |
492 | // This constant is added to x*1/ln2 to shift the integer part of |
493 | // x*2^12/ln2 into the rightmost bits of the significand. | |
494 | // The result of this fma is N_signif. | |
0347518d | 495 | // 2. RSHF = 1.1000..00 * 2^(63) |
d5efd131 MF |
496 | // This constant is subtracted from N_signif * 2^(-51) to give |
497 | // the integer part of N, N_fix, as a floating-point number. | |
498 | // The result of this fms is float_N. | |
499 | ||
500 | RODATA | |
0347518d | 501 | .align 64 |
d5efd131 MF |
502 | LOCAL_OBJECT_START(Constants_exp_64_Arg) |
503 | //data8 0xB8AA3B295C17F0BC,0x0000400B // Inv_L = 2^12/log(2) | |
504 | data8 0xB17217F400000000,0x00003FF2 // L_hi = hi part log(2)/2^12 | |
505 | data8 0xF473DE6AF278ECE6,0x00003FD4 // L_lo = lo part log(2)/2^12 | |
506 | LOCAL_OBJECT_END(Constants_exp_64_Arg) | |
507 | ||
508 | LOCAL_OBJECT_START(Constants_exp_64_Limits) | |
509 | data8 0xb17217f7d1cf79ac,0x0000400c // Smallest long dbl oflow x | |
510 | data8 0xb220000000000000,0x0000c00c // Small long dbl uflow zero x | |
511 | LOCAL_OBJECT_END(Constants_exp_64_Limits) | |
512 | ||
513 | LOCAL_OBJECT_START(Constants_exp_64_A) | |
514 | data8 0xAAAAAAABB1B736A0,0x00003FFA // A3 | |
515 | data8 0xAAAAAAAB90CD6327,0x00003FFC // A2 | |
516 | data8 0xFFFFFFFFFFFFFFFF,0x00003FFD // A1 | |
517 | LOCAL_OBJECT_END(Constants_exp_64_A) | |
518 | ||
519 | LOCAL_OBJECT_START(Constants_exp_64_P) | |
520 | data8 0xD00D6C8143914A8A,0x00003FF2 // P6 | |
521 | data8 0xB60BC4AC30304B30,0x00003FF5 // P5 | |
522 | data8 0x888888887474C518,0x00003FF8 // P4 | |
523 | data8 0xAAAAAAAA8DAE729D,0x00003FFA // P3 | |
524 | data8 0xAAAAAAAAAAAAAF61,0x00003FFC // P2 | |
525 | data8 0x80000000000004C7,0x00003FFE // P1 | |
526 | LOCAL_OBJECT_END(Constants_exp_64_P) | |
527 | ||
528 | LOCAL_OBJECT_START(Constants_exp_64_Q) | |
529 | data8 0x93F2AC5F7471F32E, 0x00003FE9 // Q9 | |
530 | data8 0xB8DA0F3550B3E764, 0x00003FEC // Q8 | |
531 | data8 0xD00D00D0028E89C4, 0x00003FEF // Q7 | |
532 | data8 0xD00D00DAEB8C4E91, 0x00003FF2 // Q6 | |
533 | data8 0xB60B60B60B60B6F5, 0x00003FF5 // Q5 | |
534 | data8 0x888888888886CC23, 0x00003FF8 // Q4 | |
535 | data8 0xAAAAAAAAAAAAAAAB, 0x00003FFA // Q3 | |
536 | data8 0xAAAAAAAAAAAAAAAB, 0x00003FFC // Q2 | |
537 | data8 0x8000000000000000, 0x00003FFE // Q1 | |
538 | LOCAL_OBJECT_END(Constants_exp_64_Q) | |
539 | ||
540 | LOCAL_OBJECT_START(Constants_exp_64_T1) | |
0347518d MF |
541 | data4 0x3F800000,0x3F8164D2,0x3F82CD87,0x3F843A29 |
542 | data4 0x3F85AAC3,0x3F871F62,0x3F88980F,0x3F8A14D5 | |
d5efd131 MF |
543 | data4 0x3F8B95C2,0x3F8D1ADF,0x3F8EA43A,0x3F9031DC |
544 | data4 0x3F91C3D3,0x3F935A2B,0x3F94F4F0,0x3F96942D | |
545 | data4 0x3F9837F0,0x3F99E046,0x3F9B8D3A,0x3F9D3EDA | |
546 | data4 0x3F9EF532,0x3FA0B051,0x3FA27043,0x3FA43516 | |
547 | data4 0x3FA5FED7,0x3FA7CD94,0x3FA9A15B,0x3FAB7A3A | |
548 | data4 0x3FAD583F,0x3FAF3B79,0x3FB123F6,0x3FB311C4 | |
549 | data4 0x3FB504F3,0x3FB6FD92,0x3FB8FBAF,0x3FBAFF5B | |
550 | data4 0x3FBD08A4,0x3FBF179A,0x3FC12C4D,0x3FC346CD | |
551 | data4 0x3FC5672A,0x3FC78D75,0x3FC9B9BE,0x3FCBEC15 | |
552 | data4 0x3FCE248C,0x3FD06334,0x3FD2A81E,0x3FD4F35B | |
553 | data4 0x3FD744FD,0x3FD99D16,0x3FDBFBB8,0x3FDE60F5 | |
554 | data4 0x3FE0CCDF,0x3FE33F89,0x3FE5B907,0x3FE8396A | |
555 | data4 0x3FEAC0C7,0x3FED4F30,0x3FEFE4BA,0x3FF28177 | |
556 | data4 0x3FF5257D,0x3FF7D0DF,0x3FFA83B3,0x3FFD3E0C | |
557 | LOCAL_OBJECT_END(Constants_exp_64_T1) | |
558 | ||
559 | LOCAL_OBJECT_START(Constants_exp_64_T2) | |
0347518d MF |
560 | data4 0x3F800000,0x3F80058C,0x3F800B18,0x3F8010A4 |
561 | data4 0x3F801630,0x3F801BBD,0x3F80214A,0x3F8026D7 | |
562 | data4 0x3F802C64,0x3F8031F2,0x3F803780,0x3F803D0E | |
563 | data4 0x3F80429C,0x3F80482B,0x3F804DB9,0x3F805349 | |
564 | data4 0x3F8058D8,0x3F805E67,0x3F8063F7,0x3F806987 | |
565 | data4 0x3F806F17,0x3F8074A8,0x3F807A39,0x3F807FCA | |
566 | data4 0x3F80855B,0x3F808AEC,0x3F80907E,0x3F809610 | |
567 | data4 0x3F809BA2,0x3F80A135,0x3F80A6C7,0x3F80AC5A | |
568 | data4 0x3F80B1ED,0x3F80B781,0x3F80BD14,0x3F80C2A8 | |
569 | data4 0x3F80C83C,0x3F80CDD1,0x3F80D365,0x3F80D8FA | |
570 | data4 0x3F80DE8F,0x3F80E425,0x3F80E9BA,0x3F80EF50 | |
571 | data4 0x3F80F4E6,0x3F80FA7C,0x3F810013,0x3F8105AA | |
572 | data4 0x3F810B41,0x3F8110D8,0x3F81166F,0x3F811C07 | |
573 | data4 0x3F81219F,0x3F812737,0x3F812CD0,0x3F813269 | |
574 | data4 0x3F813802,0x3F813D9B,0x3F814334,0x3F8148CE | |
d5efd131 MF |
575 | data4 0x3F814E68,0x3F815402,0x3F81599C,0x3F815F37 |
576 | LOCAL_OBJECT_END(Constants_exp_64_T2) | |
577 | ||
578 | LOCAL_OBJECT_START(Constants_exp_64_W1) | |
579 | data8 0x0000000000000000, 0xBE384454171EC4B4 | |
580 | data8 0xBE6947414AA72766, 0xBE5D32B6D42518F8 | |
581 | data8 0x3E68D96D3A319149, 0xBE68F4DA62415F36 | |
582 | data8 0xBE6DDA2FC9C86A3B, 0x3E6B2E50F49228FE | |
583 | data8 0xBE49C0C21188B886, 0x3E64BFC21A4C2F1F | |
584 | data8 0xBE6A2FBB2CB98B54, 0x3E5DC5DE9A55D329 | |
585 | data8 0x3E69649039A7AACE, 0x3E54728B5C66DBA5 | |
586 | data8 0xBE62B0DBBA1C7D7D, 0x3E576E0409F1AF5F | |
587 | data8 0x3E6125001A0DD6A1, 0xBE66A419795FBDEF | |
588 | data8 0xBE5CDE8CE1BD41FC, 0xBE621376EA54964F | |
589 | data8 0x3E6370BE476E76EE, 0x3E390D1A3427EB92 | |
590 | data8 0x3E1336DE2BF82BF8, 0xBE5FF1CBD0F7BD9E | |
591 | data8 0xBE60A3550CEB09DD, 0xBE5CA37E0980F30D | |
592 | data8 0xBE5C541B4C082D25, 0xBE5BBECA3B467D29 | |
593 | data8 0xBE400D8AB9D946C5, 0xBE5E2A0807ED374A | |
594 | data8 0xBE66CB28365C8B0A, 0x3E3AAD5BD3403BCA | |
595 | data8 0x3E526055C7EA21E0, 0xBE442C75E72880D6 | |
596 | data8 0x3E58B2BB85222A43, 0xBE5AAB79522C42BF | |
597 | data8 0xBE605CB4469DC2BC, 0xBE589FA7A48C40DC | |
598 | data8 0xBE51C2141AA42614, 0xBE48D087C37293F4 | |
599 | data8 0x3E367A1CA2D673E0, 0xBE51BEBB114F7A38 | |
600 | data8 0xBE6348E5661A4B48, 0xBDF526431D3B9962 | |
601 | data8 0x3E3A3B5E35A78A53, 0xBE46C46C1CECD788 | |
602 | data8 0xBE60B7EC7857D689, 0xBE594D3DD14F1AD7 | |
603 | data8 0xBE4F9C304C9A8F60, 0xBE52187302DFF9D2 | |
604 | data8 0xBE5E4C8855E6D68F, 0xBE62140F667F3DC4 | |
605 | data8 0xBE36961B3BF88747, 0x3E602861C96EC6AA | |
606 | data8 0xBE3B5151D57FD718, 0x3E561CD0FC4A627B | |
607 | data8 0xBE3A5217CA913FEA, 0x3E40A3CC9A5D193A | |
608 | data8 0xBE5AB71310A9C312, 0x3E4FDADBC5F57719 | |
609 | data8 0x3E361428DBDF59D5, 0x3E5DB5DB61B4180D | |
610 | data8 0xBE42AD5F7408D856, 0x3E2A314831B2B707 | |
611 | LOCAL_OBJECT_END(Constants_exp_64_W1) | |
612 | ||
613 | LOCAL_OBJECT_START(Constants_exp_64_W2) | |
614 | data8 0x0000000000000000, 0xBE641F2537A3D7A2 | |
615 | data8 0xBE68DD57AD028C40, 0xBE5C77D8F212B1B6 | |
616 | data8 0x3E57878F1BA5B070, 0xBE55A36A2ECAE6FE | |
617 | data8 0xBE620608569DFA3B, 0xBE53B50EA6D300A3 | |
618 | data8 0x3E5B5EF2223F8F2C, 0xBE56A0D9D6DE0DF4 | |
619 | data8 0xBE64EEF3EAE28F51, 0xBE5E5AE2367EA80B | |
620 | data8 0x3E47CB1A5FCBC02D, 0xBE656BA09BDAFEB7 | |
621 | data8 0x3E6E70C6805AFEE7, 0xBE6E0509A3415EBA | |
622 | data8 0xBE56856B49BFF529, 0x3E66DD3300508651 | |
623 | data8 0x3E51165FC114BC13, 0x3E53333DC453290F | |
624 | data8 0x3E6A072B05539FDA, 0xBE47CD877C0A7696 | |
625 | data8 0xBE668BF4EB05C6D9, 0xBE67C3E36AE86C93 | |
626 | data8 0xBE533904D0B3E84B, 0x3E63E8D9556B53CE | |
627 | data8 0x3E212C8963A98DC8, 0xBE33138F032A7A22 | |
628 | data8 0x3E530FA9BC584008, 0xBE6ADF82CCB93C97 | |
629 | data8 0x3E5F91138370EA39, 0x3E5443A4FB6A05D8 | |
630 | data8 0x3E63DACD181FEE7A, 0xBE62B29DF0F67DEC | |
631 | data8 0x3E65C4833DDE6307, 0x3E5BF030D40A24C1 | |
632 | data8 0x3E658B8F14E437BE, 0xBE631C29ED98B6C7 | |
633 | data8 0x3E6335D204CF7C71, 0x3E529EEDE954A79D | |
634 | data8 0x3E5D9257F64A2FB8, 0xBE6BED1B854ED06C | |
635 | data8 0x3E5096F6D71405CB, 0xBE3D4893ACB9FDF5 | |
636 | data8 0xBDFEB15801B68349, 0x3E628D35C6A463B9 | |
637 | data8 0xBE559725ADE45917, 0xBE68C29C042FC476 | |
638 | data8 0xBE67593B01E511FA, 0xBE4A4313398801ED | |
639 | data8 0x3E699571DA7C3300, 0x3E5349BE08062A9E | |
640 | data8 0x3E5229C4755BB28E, 0x3E67E42677A1F80D | |
641 | data8 0xBE52B33F6B69C352, 0xBE6B3550084DA57F | |
642 | data8 0xBE6DB03FD1D09A20, 0xBE60CBC42161B2C1 | |
643 | data8 0x3E56ED9C78A2B771, 0xBE508E319D0FA795 | |
644 | data8 0xBE59482AFD1A54E9, 0xBE2A17CEB07FD23E | |
645 | data8 0x3E68BF5C17365712, 0x3E3956F9B3785569 | |
646 | LOCAL_OBJECT_END(Constants_exp_64_W2) | |
647 | ||
648 | ||
649 | .section .text | |
650 | ||
651 | GLOBAL_IEEE754_ENTRY(expm1l) | |
652 | ||
653 | // | |
654 | // Set p7 true for expm1, p6 false | |
0347518d | 655 | // |
d5efd131 MF |
656 | |
657 | { .mlx | |
658 | getf.exp GR_signexp_x = f8 // Get sign and exponent of x, redo if unorm | |
659 | movl GR_sig_inv_ln2 = 0xb8aa3b295c17f0bc // significand of 1/ln2 | |
660 | } | |
661 | { .mlx | |
0347518d | 662 | addl GR_ad_Arg = @ltoff(Constants_exp_64_Arg#),gp |
d5efd131 MF |
663 | movl GR_rshf_2to51 = 0x4718000000000000 // 1.10000 2^(63+51) |
664 | } | |
665 | ;; | |
666 | ||
667 | { .mfi | |
668 | ld8 GR_ad_Arg = [GR_ad_Arg] // Point to Arg table | |
669 | fclass.m p8, p0 = f8, 0x1E7 // Test x for natval, nan, inf, zero | |
0347518d | 670 | cmp.eq p7, p6 = r0, r0 |
d5efd131 MF |
671 | } |
672 | { .mfb | |
673 | mov GR_exp_half = 0x0FFFE // Exponent of 0.5, for very small path | |
674 | fnorm.s1 FR_norm_x = f8 // Normalize x | |
0347518d | 675 | br.cond.sptk exp_continue |
d5efd131 MF |
676 | } |
677 | ;; | |
678 | ||
679 | GLOBAL_IEEE754_END(expm1l) | |
680 | ||
681 | ||
682 | GLOBAL_IEEE754_ENTRY(expl) | |
683 | // | |
684 | // Set p7 false for exp, p6 true | |
0347518d | 685 | // |
d5efd131 MF |
686 | { .mlx |
687 | getf.exp GR_signexp_x = f8 // Get sign and exponent of x, redo if unorm | |
688 | movl GR_sig_inv_ln2 = 0xb8aa3b295c17f0bc // significand of 1/ln2 | |
689 | } | |
690 | { .mlx | |
0347518d | 691 | addl GR_ad_Arg = @ltoff(Constants_exp_64_Arg#),gp |
d5efd131 MF |
692 | movl GR_rshf_2to51 = 0x4718000000000000 // 1.10000 2^(63+51) |
693 | } | |
694 | ;; | |
695 | ||
696 | { .mfi | |
697 | ld8 GR_ad_Arg = [GR_ad_Arg] // Point to Arg table | |
698 | fclass.m p8, p0 = f8, 0x1E7 // Test x for natval, nan, inf, zero | |
699 | cmp.eq p6, p7 = r0, r0 | |
700 | } | |
701 | { .mfi | |
702 | mov GR_exp_half = 0x0FFFE // Exponent of 0.5, for very small path | |
703 | fnorm.s1 FR_norm_x = f8 // Normalize x | |
704 | nop.i 999 | |
705 | } | |
706 | ;; | |
707 | ||
0347518d | 708 | exp_continue: |
d5efd131 | 709 | // Form two constants we need |
0347518d | 710 | // 1/ln2 * 2^63 to compute w = x * 1/ln2 * 128 |
d5efd131 MF |
711 | // 1.1000..000 * 2^(63+63-12) to right shift int(N) into the significand |
712 | ||
713 | { .mfi | |
714 | setf.sig FR_INV_LN2_2TO63 = GR_sig_inv_ln2 // form 1/ln2 * 2^63 | |
715 | fclass.nm.unc p9, p0 = f8, 0x1FF // Test x for unsupported | |
716 | mov GR_exp_2tom51 = 0xffff-51 | |
717 | } | |
718 | { .mlx | |
719 | setf.d FR_RSHF_2TO51 = GR_rshf_2to51 // Form const 1.1000 * 2^(63+51) | |
720 | movl GR_rshf = 0x43e8000000000000 // 1.10000 2^63 for right shift | |
721 | } | |
722 | ;; | |
723 | ||
724 | { .mfi | |
725 | setf.exp FR_half = GR_exp_half // Form 0.5 for very small path | |
726 | fma.s1 FR_scale = f1,f1,f0 // Scale = 1.0 | |
727 | mov GR_exp_bias = 0x0FFFF // Set exponent bias | |
728 | } | |
729 | { .mib | |
730 | add GR_ad_Limits = 0x20, GR_ad_Arg // Point to Limits table | |
731 | mov GR_exp_mask = 0x1FFFF // Form exponent mask | |
732 | (p8) br.cond.spnt EXP_64_SPECIAL // Branch if natval, nan, inf, zero | |
733 | } | |
734 | ;; | |
735 | ||
736 | { .mfi | |
737 | setf.exp FR_2TOM51 = GR_exp_2tom51 // Form 2^-51 for scaling float_N | |
738 | nop.f 999 | |
739 | add GR_ad_A = 0x40, GR_ad_Arg // Point to A table | |
740 | } | |
741 | { .mib | |
742 | setf.d FR_RSHF = GR_rshf // Form right shift const 1.1000 * 2^63 | |
743 | add GR_ad_T1 = 0x160, GR_ad_Arg // Point to T1 table | |
744 | (p9) br.cond.spnt EXP_64_UNSUPPORTED // Branch if unsupported | |
745 | } | |
746 | ;; | |
747 | ||
748 | .pred.rel "mutex",p6,p7 | |
749 | { .mfi | |
750 | ldfe FR_L_hi = [GR_ad_Arg],16 // Get L_hi | |
751 | fcmp.eq.s0 p9,p0 = f8, f0 // Dummy op to flag denormals | |
752 | (p6) add GR_ad_PQ = 0x30, GR_ad_A // Point to P table for exp | |
753 | } | |
754 | { .mfi | |
755 | ldfe FR_min_oflow_x = [GR_ad_Limits],16 // Get min x to cause overflow | |
756 | fmpy.s1 FR_rsq = f8, f8 // rsq = x * x for small path | |
757 | (p7) add GR_ad_PQ = 0x90, GR_ad_A // Point to Q table for expm1 | |
758 | };; | |
759 | ||
760 | { .mmi | |
761 | ldfe FR_L_lo = [GR_ad_Arg],16 // Get L_lo | |
762 | ldfe FR_zero_uflow_x = [GR_ad_Limits],16 // Get x for zero uflow result | |
763 | add GR_ad_W1 = 0x200, GR_ad_T1 // Point to W1 table | |
764 | } | |
765 | ;; | |
766 | ||
767 | { .mfi | |
768 | ldfe FR_P6Q9 = [GR_ad_PQ],16 // P6(exp) or Q9(expm1) for small path | |
769 | mov FR_r = FR_norm_x // r = X for small path | |
770 | mov GR_very_small_exp = -60 // Exponent of x for very small path | |
771 | } | |
772 | { .mfi | |
773 | add GR_ad_W2 = 0x400, GR_ad_T1 // Point to W2 table | |
774 | nop.f 999 | |
775 | (p7) mov GR_small_exp = -7 // Exponent of x for small path expm1 | |
776 | } | |
777 | ;; | |
778 | ||
779 | { .mmi | |
780 | ldfe FR_P5Q8 = [GR_ad_PQ],16 // P5(exp) or Q8(expm1) for small path | |
781 | and GR_exp_x = GR_signexp_x, GR_exp_mask | |
782 | (p6) mov GR_small_exp = -12 // Exponent of x for small path exp | |
783 | } | |
784 | ;; | |
785 | ||
786 | // N_signif = X * Inv_log2_by_2^12 | |
787 | // By adding 1.10...0*2^63 we shift and get round_int(N_signif) in significand. | |
788 | // We actually add 1.10...0*2^51 to X * Inv_log2 to do the same thing. | |
789 | { .mfi | |
790 | ldfe FR_P4Q7 = [GR_ad_PQ],16 // P4(exp) or Q7(expm1) for small path | |
791 | fma.s1 FR_N_signif = FR_norm_x, FR_INV_LN2_2TO63, FR_RSHF_2TO51 | |
792 | nop.i 999 | |
793 | } | |
794 | { .mfi | |
795 | sub GR_exp_x = GR_exp_x, GR_exp_bias // Get exponent | |
796 | fmpy.s1 FR_r4 = FR_rsq, FR_rsq // Form r4 for small path | |
797 | cmp.eq.unc p15, p0 = r0, r0 // Set Safe as default | |
798 | } | |
799 | ;; | |
800 | ||
801 | { .mmi | |
802 | ldfe FR_P3Q6 = [GR_ad_PQ],16 // P3(exp) or Q6(expm1) for small path | |
803 | cmp.lt p14, p0 = GR_exp_x, GR_very_small_exp // Is |x| < 2^-60? | |
804 | nop.i 999 | |
805 | } | |
806 | ;; | |
807 | ||
808 | { .mfi | |
809 | ldfe FR_P2Q5 = [GR_ad_PQ],16 // P2(exp) or Q5(expm1) for small path | |
810 | fmpy.s1 FR_half_x = FR_half, FR_norm_x // 0.5 * x for very small path | |
811 | cmp.lt p13, p0 = GR_exp_x, GR_small_exp // Is |x| < 2^-m? | |
812 | } | |
813 | { .mib | |
814 | nop.m 999 | |
815 | nop.i 999 | |
816 | (p14) br.cond.spnt EXP_VERY_SMALL // Branch if |x| < 2^-60 | |
817 | } | |
818 | ;; | |
819 | ||
820 | { .mfi | |
821 | ldfe FR_A3 = [GR_ad_A],16 // Get A3 for normal path | |
822 | fcmp.ge.s1 p10,p0 = FR_norm_x, FR_min_oflow_x // Will result overflow? | |
823 | mov GR_big_expo_neg = -16381 // -0x3ffd | |
824 | } | |
825 | { .mfb | |
826 | ldfe FR_P1Q4 = [GR_ad_PQ],16 // P1(exp) or Q4(expm1) for small path | |
827 | nop.f 999 | |
828 | (p13) br.cond.spnt EXP_SMALL // Branch if |x| < 2^-m | |
829 | // m=12 for exp, m=7 for expm1 | |
830 | } | |
831 | ;; | |
832 | ||
833 | // Now we are on the main path for |x| >= 2^-m, m=12 for exp, m=7 for expm1 | |
834 | // | |
0347518d | 835 | // float_N = round_int(N_signif) |
d5efd131 MF |
836 | // The signficand of N_signif contains the rounded integer part of X * 2^12/ln2, |
837 | // as a twos complement number in the lower bits (that is, it may be negative). | |
838 | // That twos complement number (called N) is put into GR_N. | |
839 | ||
840 | // Since N_signif is scaled by 2^51, it must be multiplied by 2^-51 | |
841 | // before the shift constant 1.10000 * 2^63 is subtracted to yield float_N. | |
842 | // Thus, float_N contains the floating point version of N | |
843 | ||
844 | ||
845 | { .mfi | |
846 | ldfe FR_A2 = [GR_ad_A],16 // Get A2 for main path | |
847 | fcmp.lt.s1 p11,p0 = FR_norm_x, FR_zero_uflow_x // Certain zero, uflow? | |
848 | add GR_ad_T2 = 0x100, GR_ad_T1 // Point to T2 table | |
849 | } | |
850 | { .mfi | |
851 | nop.m 999 | |
852 | fms.s1 FR_float_N = FR_N_signif, FR_2TOM51, FR_RSHF // Form float_N | |
853 | nop.i 999 | |
854 | } | |
855 | ;; | |
856 | ||
857 | { .mbb | |
858 | getf.sig GR_N_fix = FR_N_signif // Get N from significand | |
859 | (p10) br.cond.spnt EXP_OVERFLOW // Branch if result will overflow | |
860 | (p11) br.cond.spnt EXP_CERTAIN_UNDERFLOW_ZERO // Branch if certain zero, uflow | |
861 | } | |
862 | ;; | |
863 | ||
864 | { .mfi | |
865 | ldfe FR_A1 = [GR_ad_A],16 // Get A1 for main path | |
866 | fnma.s1 FR_r = FR_L_hi, FR_float_N, FR_norm_x // r = -L_hi * float_N + x | |
867 | extr.u GR_M1 = GR_N_fix, 6, 6 // Extract index M_1 | |
868 | } | |
869 | { .mfi | |
870 | and GR_M2 = 0x3f, GR_N_fix // Extract index M_2 | |
871 | nop.f 999 | |
872 | nop.i 999 | |
873 | } | |
874 | ;; | |
875 | ||
876 | // N_fix is only correct up to 50 bits because of our right shift technique. | |
877 | // Actually in the normal path we will have restricted K to about 14 bits. | |
878 | // Somewhat arbitrarily we extract 32 bits. | |
879 | { .mfi | |
880 | shladd GR_ad_W1 = GR_M1,3,GR_ad_W1 // Point to W1 | |
881 | nop.f 999 | |
882 | extr GR_K = GR_N_fix, 12, 32 // Extract limited range K | |
883 | } | |
884 | { .mfi | |
885 | shladd GR_ad_T1 = GR_M1,2,GR_ad_T1 // Point to T1 | |
886 | nop.f 999 | |
887 | shladd GR_ad_T2 = GR_M2,2,GR_ad_T2 // Point to T2 | |
888 | } | |
889 | ;; | |
890 | ||
891 | { .mmi | |
892 | ldfs FR_T1 = [GR_ad_T1],0 // Get T1 | |
893 | ldfd FR_W1 = [GR_ad_W1],0 // Get W1 | |
894 | add GR_exp_2_k = GR_exp_bias, GR_K // Form exponent of 2^k | |
895 | } | |
896 | ;; | |
897 | ||
898 | { .mmi | |
899 | ldfs FR_T2 = [GR_ad_T2],0 // Get T2 | |
900 | shladd GR_ad_W2 = GR_M2,3,GR_ad_W2 // Point to W2 | |
901 | sub GR_exp_2_mk = GR_exp_bias, GR_K // Form exponent of 2^-k | |
902 | } | |
903 | ;; | |
904 | ||
905 | { .mmf | |
906 | ldfd FR_W2 = [GR_ad_W2],0 // Get W2 | |
907 | setf.exp FR_scale = GR_exp_2_k // Set scale = 2^k | |
908 | fnma.s1 FR_r = FR_L_lo, FR_float_N, FR_r // r = -L_lo * float_N + r | |
909 | } | |
910 | ;; | |
911 | ||
912 | { .mfi | |
913 | setf.exp FR_2_mk = GR_exp_2_mk // Form 2^-k | |
914 | fma.s1 FR_poly = FR_r, FR_A3, FR_A2 // poly = r * A3 + A2 | |
915 | cmp.lt p8,p15 = GR_K,GR_big_expo_neg // Set Safe if K > big_expo_neg | |
916 | } | |
917 | { .mfi | |
918 | nop.m 999 | |
919 | fmpy.s1 FR_rsq = FR_r, FR_r // rsq = r * r | |
920 | nop.i 999 | |
921 | } | |
922 | ;; | |
923 | ||
924 | { .mfi | |
925 | nop.m 999 | |
926 | fmpy.s1 FR_T = FR_T1, FR_T2 // T = T1 * T2 | |
927 | nop.i 999 | |
928 | } | |
929 | { .mfi | |
930 | nop.m 999 | |
931 | fadd.s1 FR_W1_p1 = FR_W1, f1 // W1_p1 = W1 + 1.0 | |
932 | nop.i 999 | |
933 | } | |
934 | ;; | |
935 | ||
936 | { .mfi | |
0347518d | 937 | (p7) cmp.lt.unc p8, p9 = 10, GR_K // If expm1, set p8 if K > 10 |
d5efd131 MF |
938 | fma.s1 FR_poly = FR_r, FR_poly, FR_A1 // poly = r * poly + A1 |
939 | nop.i 999 | |
940 | } | |
941 | ;; | |
942 | ||
943 | { .mfi | |
944 | (p7) cmp.eq p15, p0 = r0, r0 // If expm1, set Safe flag | |
945 | fma.s1 FR_T_scale = FR_T, FR_scale, f0 // T_scale = T * scale | |
946 | (p9) cmp.gt.unc p9, p10 = -10, GR_K // If expm1, set p9 if K < -10 | |
947 | // If expm1, set p10 if -10<=K<=10 | |
948 | } | |
949 | { .mfi | |
950 | nop.m 999 | |
951 | fma.s1 FR_W = FR_W2, FR_W1_p1, FR_W1 // W = W2 * (W1+1.0) + W1 | |
952 | nop.i 999 | |
953 | } | |
954 | ;; | |
955 | ||
956 | { .mfi | |
957 | nop.m 999 | |
958 | mov FR_Y_hi = FR_T // Assume Y_hi = T | |
959 | nop.i 999 | |
960 | } | |
961 | ;; | |
962 | ||
963 | { .mfi | |
964 | nop.m 999 | |
965 | fma.s1 FR_poly = FR_rsq, FR_poly, FR_r // poly = rsq * poly + r | |
966 | nop.i 999 | |
967 | } | |
968 | ;; | |
969 | ||
970 | { .mfi | |
971 | nop.m 999 | |
972 | fma.s1 FR_Wp1_T_scale = FR_W, FR_T_scale, FR_T_scale // (W+1)*T*scale | |
973 | nop.i 999 | |
974 | } | |
975 | { .mfi | |
976 | nop.m 999 | |
977 | fma.s1 FR_W_T_scale = FR_W, FR_T_scale, f0 // W*T*scale | |
978 | nop.i 999 | |
979 | } | |
980 | ;; | |
981 | ||
982 | { .mfi | |
983 | nop.m 999 | |
984 | (p9) fsub.s1 FR_Y_hi = f0, FR_2_mk // If expm1, if K < -10 set Y_hi | |
985 | nop.i 999 | |
986 | } | |
987 | { .mfi | |
988 | nop.m 999 | |
989 | (p10) fsub.s1 FR_Y_hi = FR_T, FR_2_mk // If expm1, if |K|<=10 set Y_hi | |
990 | nop.i 999 | |
991 | } | |
992 | ;; | |
993 | ||
994 | { .mfi | |
995 | nop.m 999 | |
996 | fma.s1 FR_result_lo = FR_Wp1_T_scale, FR_poly, FR_W_T_scale | |
997 | nop.i 999 | |
998 | } | |
999 | ;; | |
1000 | ||
1001 | .pred.rel "mutex",p8,p9 | |
1002 | // If K > 10 adjust result_lo = result_lo - scale * 2^-k | |
1003 | // If |K| <= 10 adjust result_lo = result_lo + scale * T | |
1004 | { .mfi | |
1005 | nop.m 999 | |
1006 | (p8) fnma.s1 FR_result_lo = FR_scale, FR_2_mk, FR_result_lo // If K > 10 | |
1007 | nop.i 999 | |
1008 | } | |
1009 | { .mfi | |
1010 | nop.m 999 | |
1011 | (p9) fma.s1 FR_result_lo = FR_T_scale, f1, FR_result_lo // If |K| <= 10 | |
1012 | nop.i 999 | |
1013 | } | |
1014 | ;; | |
1015 | ||
1016 | { .mfi | |
1017 | nop.m 999 | |
1018 | fmpy.s0 FR_tmp = FR_A1, FR_A1 // Dummy op to set inexact | |
1019 | nop.i 999 | |
1020 | } | |
1021 | { .mfb | |
1022 | nop.m 999 | |
1023 | (p15) fma.s0 f8 = FR_Y_hi, FR_scale, FR_result_lo // Safe result | |
1024 | (p15) br.ret.sptk b0 // Safe exit for normal path | |
1025 | } | |
1026 | ;; | |
1027 | ||
1028 | // Here if unsafe, will only be here for exp with K < big_expo_neg | |
1029 | { .mfb | |
1030 | nop.m 999 | |
1031 | fma.s0 FR_RESULT = FR_Y_hi, FR_scale, FR_result_lo // Prelim result | |
1032 | br.cond.sptk EXP_POSSIBLE_UNDERFLOW // Branch to unsafe code | |
1033 | } | |
1034 | ;; | |
1035 | ||
0347518d MF |
1036 | |
1037 | EXP_SMALL: | |
d5efd131 MF |
1038 | // Here if 2^-60 < |x| < 2^-m, m=12 for exp, m=7 for expm1 |
1039 | { .mfi | |
1040 | (p7) ldfe FR_Q3 = [GR_ad_Q],16 // Get Q3 for small path, if expm1 | |
1041 | (p6) fma.s1 FR_p65 = FR_P6, FR_r, FR_P5 // If exp, p65 = P6 * r + P5 | |
1042 | nop.i 999 | |
1043 | } | |
1044 | { .mfi | |
1045 | mov GR_minus_one = -1 | |
1046 | (p7) fma.s1 FR_q98 = FR_Q9, FR_r, FR_Q8 // If expm1, q98 = Q9 * r + Q8 | |
1047 | nop.i 999 | |
1048 | } | |
1049 | ;; | |
1050 | ||
1051 | { .mfi | |
1052 | (p7) ldfe FR_Q2 = [GR_ad_Q],16 // Get Q2 for small path, if expm1 | |
1053 | (p7) fma.s1 FR_q65 = FR_Q6, FR_r, FR_Q5 // If expm1, q65 = Q6 * r + Q5 | |
1054 | nop.i 999 | |
1055 | } | |
1056 | ;; | |
1057 | ||
1058 | { .mfi | |
1059 | setf.sig FR_tmp = GR_minus_one // Create value to force inexact | |
1060 | (p6) fma.s1 FR_p21 = FR_P2, FR_r, FR_P1 // If exp, p21 = P2 * r + P1 | |
1061 | nop.i 999 | |
1062 | } | |
1063 | { .mfi | |
1064 | (p7) ldfe FR_Q1 = [GR_ad_Q],16 // Get Q1 for small path, if expm1 | |
1065 | (p7) fma.s1 FR_q43 = FR_Q4, FR_r, FR_Q3 // If expm1, q43 = Q4 * r + Q3 | |
1066 | nop.i 999 | |
1067 | } | |
1068 | ;; | |
1069 | ||
1070 | { .mfi | |
1071 | nop.m 999 | |
1072 | (p6) fma.s1 FR_p654 = FR_p65, FR_r, FR_P4 // If exp, p654 = p65 * r + P4 | |
1073 | nop.i 999 | |
1074 | } | |
1075 | { .mfi | |
1076 | nop.m 999 | |
1077 | (p7) fma.s1 FR_q987 = FR_q98, FR_r, FR_Q7 // If expm1, q987 = q98 * r + Q7 | |
1078 | nop.i 999 | |
1079 | } | |
1080 | ;; | |
1081 | ||
1082 | { .mfi | |
1083 | nop.m 999 | |
1084 | (p7) fma.s1 FR_q21 = FR_Q2, FR_r, FR_Q1 // If expm1, q21 = Q2 * r + Q1 | |
1085 | nop.i 999 | |
1086 | } | |
1087 | ;; | |
1088 | ||
1089 | { .mfi | |
1090 | nop.m 999 | |
1091 | (p6) fma.s1 FR_p210 = FR_p21, FR_rsq, FR_r // If exp, p210 = p21 * r + P0 | |
1092 | nop.i 999 | |
1093 | } | |
1094 | { .mfi | |
1095 | nop.m 999 | |
1096 | (p7) fma.s1 FR_q6543 = FR_q65, FR_rsq, FR_q43 // If expm1, q6543 = q65*r2+q43 | |
1097 | nop.i 999 | |
1098 | } | |
1099 | ;; | |
1100 | ||
1101 | { .mfi | |
1102 | nop.m 999 | |
1103 | (p6) fma.s1 FR_p6543 = FR_p654, FR_r, FR_P3 // If exp, p6543 = p654 * r + P3 | |
1104 | nop.i 999 | |
1105 | } | |
1106 | { .mfi | |
1107 | nop.m 999 | |
1108 | (p7) fma.s1 FR_q9876543 = FR_q987, FR_r4, FR_q6543 // If expm1, q9876543 = ... | |
1109 | nop.i 999 | |
1110 | } | |
1111 | ;; | |
1112 | ||
1113 | { .mfi | |
1114 | nop.m 999 | |
1115 | (p6) fma.s1 FR_Y_lo = FR_p6543, FR_r4, FR_p210 // If exp, form Y_lo | |
1116 | nop.i 999 | |
1117 | } | |
1118 | ;; | |
1119 | ||
1120 | { .mfi | |
1121 | nop.m 999 | |
1122 | (p7) fma.s1 FR_Y_lo = FR_q9876543, FR_rsq, FR_q21 // If expm1, form Y_lo | |
1123 | nop.i 999 | |
1124 | } | |
1125 | ;; | |
1126 | ||
1127 | { .mfi | |
1128 | nop.m 999 | |
1129 | fmpy.s0 FR_tmp = FR_tmp, FR_tmp // Dummy op to set inexact | |
1130 | nop.i 999 | |
1131 | } | |
1132 | ;; | |
1133 | ||
1134 | .pred.rel "mutex",p6,p7 | |
1135 | { .mfi | |
1136 | nop.m 999 | |
1137 | (p6) fma.s0 f8 = FR_Y_lo, f1, f1 // If exp, result = 1 + Y_lo | |
1138 | nop.i 999 | |
1139 | } | |
1140 | { .mfb | |
1141 | nop.m 999 | |
1142 | (p7) fma.s0 f8 = FR_Y_lo, FR_rsq, FR_norm_x // If expm1, result = Y_lo*r2+x | |
1143 | br.ret.sptk b0 // Exit for 2^-60 <= |x| < 2^-m | |
1144 | // m=12 for exp, m=7 for expm1 | |
1145 | } | |
1146 | ;; | |
1147 | ||
1148 | ||
0347518d | 1149 | EXP_VERY_SMALL: |
d5efd131 MF |
1150 | // |
1151 | // Here if 0 < |x| < 2^-60 | |
1152 | // If exp, result = 1.0 + x | |
1153 | // If expm1, result = x +x*x/2, but have to check for possible underflow | |
1154 | // | |
1155 | ||
1156 | { .mfi | |
1157 | (p7) mov GR_exp_underflow = -16381 // Exponent for possible underflow | |
1158 | (p6) fadd.s0 f8 = f1, FR_norm_x // If exp, result = 1+x | |
1159 | nop.i 999 | |
1160 | } | |
1161 | { .mfi | |
1162 | nop.m 999 | |
1163 | (p7) fmpy.s1 FR_result_lo = FR_half_x, FR_norm_x // If expm1 result_lo = x*x/2 | |
1164 | nop.i 999 | |
1165 | } | |
1166 | ;; | |
1167 | ||
1168 | { .mfi | |
1169 | (p7) cmp.lt.unc p0, p8 = GR_exp_x, GR_exp_underflow // Unsafe if expm1 x small | |
1170 | (p7) mov FR_Y_hi = FR_norm_x // If expm1, Y_hi = x | |
1171 | (p7) cmp.lt p0, p15 = GR_exp_x, GR_exp_underflow // Unsafe if expm1 x small | |
1172 | } | |
1173 | ;; | |
1174 | ||
1175 | { .mfb | |
1176 | nop.m 999 | |
1177 | (p8) fma.s0 f8 = FR_norm_x, f1, FR_result_lo // If expm1, result=x+x*x/2 | |
1178 | (p15) br.ret.sptk b0 // If Safe, exit | |
1179 | } | |
1180 | ;; | |
1181 | ||
1182 | // Here if expm1 and 0 < |x| < 2^-16381; may be possible underflow | |
1183 | { .mfb | |
1184 | nop.m 999 | |
1185 | fma.s0 FR_RESULT = FR_Y_hi, FR_scale, FR_result_lo // Prelim result | |
1186 | br.cond.sptk EXP_POSSIBLE_UNDERFLOW // Branch to unsafe code | |
1187 | } | |
1188 | ;; | |
1189 | ||
1190 | EXP_CERTAIN_UNDERFLOW_ZERO: | |
1191 | // Here if x < zero_uflow_x | |
1192 | // For exp, set result to tiny+0.0 and set I, U, and branch to error handling | |
1193 | // For expm1, set result to tiny-1.0 and set I, and exit | |
1194 | { .mmi | |
1195 | alloc GR_SAVE_PFS = ar.pfs,0,3,4,0 | |
1196 | nop.m 999 | |
1197 | mov GR_one = 1 | |
1198 | } | |
1199 | ;; | |
1200 | ||
1201 | { .mmi | |
1202 | setf.exp FR_small = GR_one // Form small value | |
1203 | nop.m 999 | |
1204 | (p6) mov GR_Parameter_TAG = 13 // Error tag for exp underflow | |
1205 | } | |
1206 | ;; | |
1207 | ||
1208 | { .mfi | |
1209 | nop.m 999 | |
1210 | fmerge.s FR_X = f8,f8 // Save x for error call | |
1211 | nop.i 999 | |
1212 | } | |
1213 | ;; | |
1214 | ||
1215 | .pred.rel "mutex",p6,p7 | |
1216 | { .mfb | |
1217 | nop.m 999 | |
1218 | (p6) fma.s0 FR_RESULT = FR_small, FR_small, f0 // If exp, set I,U, tiny result | |
1219 | (p6) br.cond.sptk __libm_error_region // If exp, go to error handling | |
1220 | } | |
1221 | { .mfb | |
1222 | nop.m 999 | |
1223 | (p7) fms.s0 f8 = FR_small, FR_small, f1 // If expm1, set I, result -1.0 | |
1224 | (p7) br.ret.sptk b0 // If expm1, exit | |
1225 | } | |
1226 | ;; | |
0347518d MF |
1227 | |
1228 | ||
d5efd131 MF |
1229 | EXP_OVERFLOW: |
1230 | // Here if x >= min_oflow_x | |
1231 | { .mmi | |
1232 | alloc GR_SAVE_PFS = ar.pfs,0,3,4,0 | |
1233 | mov GR_huge_exp = 0x1fffe | |
1234 | nop.i 999 | |
1235 | } | |
1236 | { .mfi | |
1237 | mov GR_huge_signif = -0x1 | |
1238 | nop.f 999 | |
1239 | (p6) mov GR_Parameter_TAG = 12 // Error tag for exp overflow | |
1240 | } | |
1241 | ;; | |
1242 | ||
1243 | { .mmf | |
1244 | setf.exp FR_huge_exp = GR_huge_exp // Create huge value | |
1245 | setf.sig FR_huge_signif = GR_huge_signif // Create huge value | |
1246 | fmerge.s FR_X = f8,f8 // Save x for error call | |
1247 | } | |
1248 | ;; | |
1249 | ||
1250 | { .mfi | |
1251 | nop.m 999 | |
1252 | fmerge.se FR_huge = FR_huge_exp, FR_huge_signif | |
1253 | (p7) mov GR_Parameter_TAG = 39 // Error tag for expm1 overflow | |
1254 | } | |
1255 | ;; | |
1256 | ||
1257 | { .mfb | |
1258 | nop.m 999 | |
1259 | fma.s0 FR_RESULT = FR_huge, FR_huge, FR_huge // Force I, O, and Inf | |
1260 | br.cond.sptk __libm_error_region // Branch to error handling | |
1261 | } | |
1262 | ;; | |
1263 | ||
1264 | ||
1265 | ||
1266 | EXP_POSSIBLE_UNDERFLOW: | |
1267 | // Here if exp and zero_uflow_x < x < about -11356 [where k < -16381] | |
1268 | // Here if expm1 and |x| < 2^-16381 | |
1269 | { .mfi | |
1270 | alloc GR_SAVE_PFS = ar.pfs,0,3,4,0 | |
1271 | fsetc.s2 0x7F,0x41 // Set FTZ and disable traps | |
1272 | nop.i 999 | |
1273 | } | |
1274 | ;; | |
1275 | ||
1276 | { .mfi | |
1277 | nop.m 999 | |
1278 | fma.s2 FR_ftz = FR_Y_hi, FR_scale, FR_result_lo // Result with FTZ | |
1279 | nop.i 999 | |
1280 | } | |
1281 | ;; | |
1282 | ||
1283 | { .mfi | |
1284 | nop.m 999 | |
1285 | fsetc.s2 0x7F,0x40 // Disable traps (set s2 default) | |
1286 | nop.i 999 | |
1287 | } | |
1288 | ;; | |
1289 | ||
1290 | { .mfi | |
1291 | nop.m 999 | |
1292 | (p6) fclass.m.unc p11, p0 = FR_ftz, 0x00F // If exp, FTZ result denorm or zero? | |
1293 | nop.i 999 | |
1294 | } | |
1295 | ;; | |
1296 | ||
1297 | { .mfb | |
1298 | (p11) mov GR_Parameter_TAG = 13 // exp underflow | |
1299 | fmerge.s FR_X = f8,f8 // Save x for error call | |
1300 | (p11) br.cond.spnt __libm_error_region // Branch on exp underflow | |
1301 | } | |
1302 | ;; | |
1303 | ||
1304 | { .mfb | |
1305 | nop.m 999 | |
1306 | mov f8 = FR_RESULT // Was safe after all | |
1307 | br.ret.sptk b0 | |
1308 | } | |
1309 | ;; | |
1310 | ||
1311 | ||
0347518d | 1312 | EXP_64_SPECIAL: |
d5efd131 MF |
1313 | // Here if x natval, nan, inf, zero |
1314 | // If x natval, +inf, or if expm1 and x zero, just return x. | |
1315 | // The other cases must be tested for, and results set. | |
1316 | // These cases do not generate exceptions. | |
1317 | { .mfi | |
1318 | nop.m 999 | |
1319 | fclass.m p8, p0 = f8, 0x0c3 // Is x nan? | |
1320 | nop.i 999 | |
1321 | } | |
1322 | ;; | |
1323 | ||
1324 | { .mfi | |
1325 | nop.m 999 | |
1326 | (p6) fclass.m.unc p13, p0 = f8, 0x007 // If exp, is x zero? | |
1327 | nop.i 999 | |
1328 | } | |
1329 | ;; | |
1330 | ||
1331 | { .mfi | |
1332 | nop.m 999 | |
1333 | (p6) fclass.m.unc p11, p0 = f8, 0x022 // If exp, is x -inf? | |
1334 | nop.i 999 | |
1335 | } | |
1336 | { .mfi | |
1337 | nop.m 999 | |
1338 | (p8) fadd.s0 f8 = f8, f1 // If x nan, result quietized x | |
1339 | nop.i 999 | |
1340 | } | |
1341 | ;; | |
1342 | ||
1343 | { .mfi | |
1344 | nop.m 999 | |
1345 | (p7) fclass.m.unc p10, p0 = f8, 0x022 // If expm1, is x -inf? | |
1346 | nop.i 999 | |
1347 | } | |
1348 | { .mfi | |
1349 | nop.m 999 | |
1350 | (p13) fadd.s0 f8 = f0, f1 // If exp and x zero, result 1.0 | |
1351 | nop.i 999 | |
1352 | } | |
1353 | ;; | |
1354 | ||
1355 | { .mfi | |
1356 | nop.m 999 | |
1357 | (p11) mov f8 = f0 // If exp and x -inf, result 0 | |
1358 | nop.i 999 | |
1359 | } | |
1360 | ;; | |
1361 | ||
1362 | { .mfb | |
1363 | nop.m 999 | |
1364 | (p10) fsub.s1 f8 = f0, f1 // If expm1, x -inf, result -1.0 | |
1365 | br.ret.sptk b0 // Exit special cases | |
1366 | } | |
1367 | ;; | |
1368 | ||
1369 | ||
0347518d | 1370 | EXP_64_UNSUPPORTED: |
d5efd131 MF |
1371 | // Here if x unsupported type |
1372 | { .mfb | |
1373 | nop.m 999 | |
1374 | fmpy.s0 f8 = f8, f0 // Return nan | |
1375 | br.ret.sptk b0 | |
1376 | } | |
1377 | ;; | |
1378 | ||
1379 | GLOBAL_IEEE754_END(expl) | |
1380 | ||
1381 | LOCAL_LIBM_ENTRY(__libm_error_region) | |
1382 | .prologue | |
1383 | { .mfi | |
1384 | add GR_Parameter_Y=-32,sp // Parameter 2 value | |
1385 | nop.f 0 | |
1386 | .save ar.pfs,GR_SAVE_PFS | |
1387 | mov GR_SAVE_PFS=ar.pfs // Save ar.pfs | |
1388 | } | |
1389 | { .mfi | |
1390 | .fframe 64 | |
1391 | add sp=-64,sp // Create new stack | |
1392 | nop.f 0 | |
1393 | mov GR_SAVE_GP=gp // Save gp | |
1394 | };; | |
1395 | { .mmi | |
1396 | stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack | |
1397 | add GR_Parameter_X = 16,sp // Parameter 1 address | |
1398 | .save b0, GR_SAVE_B0 | |
1399 | mov GR_SAVE_B0=b0 // Save b0 | |
1400 | };; | |
1401 | .body | |
1402 | { .mib | |
1403 | stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack | |
1404 | add GR_Parameter_RESULT = 0,GR_Parameter_Y | |
1405 | nop.b 0 // Parameter 3 address | |
1406 | } | |
1407 | { .mib | |
1408 | stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack | |
1409 | add GR_Parameter_Y = -16,GR_Parameter_Y | |
1410 | br.call.sptk b0=__libm_error_support# // Call error handling function | |
1411 | };; | |
1412 | { .mmi | |
1413 | add GR_Parameter_RESULT = 48,sp | |
1414 | nop.m 0 | |
1415 | nop.i 0 | |
1416 | };; | |
1417 | { .mmi | |
1418 | ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack | |
1419 | .restore sp | |
1420 | add sp = 64,sp // Restore stack pointer | |
1421 | mov b0 = GR_SAVE_B0 // Restore return address | |
1422 | };; | |
1423 | { .mib | |
1424 | mov gp = GR_SAVE_GP // Restore gp | |
1425 | mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs | |
1426 | br.ret.sptk b0 // Return | |
1427 | };; | |
1428 | LOCAL_LIBM_END(__libm_error_region#) | |
1429 | ||
1430 | .type __libm_error_support#,@function | |
1431 | .global __libm_error_support# |