]>
Commit | Line | Data |
---|---|---|
134c8a50 | 1 | /* IEEE-754 double-precision functions for Xtensa |
83ffe9cd | 2 | Copyright (C) 2006-2023 Free Software Foundation, Inc. |
134c8a50 BW |
3 | Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it | |
8 | under the terms of the GNU General Public License as published by | |
748086b7 | 9 | the Free Software Foundation; either version 3, or (at your option) |
134c8a50 BW |
10 | any later version. |
11 | ||
134c8a50 BW |
12 | GCC is distributed in the hope that it will be useful, but WITHOUT |
13 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
14 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public | |
15 | License for more details. | |
16 | ||
748086b7 JJ |
17 | Under Section 7 of GPL version 3, you are granted additional |
18 | permissions described in the GCC Runtime Library Exception, version | |
19 | 3.1, as published by the Free Software Foundation. | |
20 | ||
21 | You should have received a copy of the GNU General Public License and | |
22 | a copy of the GCC Runtime Library Exception along with this program; | |
23 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
24 | <http://www.gnu.org/licenses/>. */ | |
134c8a50 BW |
25 | |
26 | #ifdef __XTENSA_EB__ | |
27 | #define xh a2 | |
28 | #define xl a3 | |
29 | #define yh a4 | |
30 | #define yl a5 | |
31 | #else | |
32 | #define xh a3 | |
33 | #define xl a2 | |
34 | #define yh a5 | |
35 | #define yl a4 | |
36 | #endif | |
37 | ||
38 | /* Warning! The branch displacements for some Xtensa branch instructions | |
39 | are quite small, and this code has been carefully laid out to keep | |
40 | branch targets in range. If you change anything, be sure to check that | |
41 | the assembler is not relaxing anything to branch over a jump. */ | |
42 | ||
43 | #ifdef L_negdf2 | |
44 | ||
45 | .align 4 | |
46 | .global __negdf2 | |
47 | .type __negdf2, @function | |
48 | __negdf2: | |
b7974b3a | 49 | leaf_entry sp, 16 |
134c8a50 BW |
50 | movi a4, 0x80000000 |
51 | xor xh, xh, a4 | |
b7974b3a | 52 | leaf_return |
134c8a50 BW |
53 | |
54 | #endif /* L_negdf2 */ | |
55 | ||
56 | #ifdef L_addsubdf3 | |
57 | ||
faef260e | 58 | .literal_position |
134c8a50 BW |
59 | /* Addition */ |
60 | __adddf3_aux: | |
61 | ||
62 | /* Handle NaNs and Infinities. (This code is placed before the | |
63 | start of the function just to keep it in range of the limited | |
64 | branch displacements.) */ | |
65 | ||
66 | .Ladd_xnan_or_inf: | |
67 | /* If y is neither Infinity nor NaN, return x. */ | |
0889f168 | 68 | bnall yh, a6, .Ladd_return_nan_or_inf |
134c8a50 BW |
69 | /* If x is a NaN, return it. Otherwise, return y. */ |
70 | slli a7, xh, 12 | |
71 | or a7, a7, xl | |
0889f168 | 72 | bnez a7, .Ladd_return_nan |
134c8a50 BW |
73 | |
74 | .Ladd_ynan_or_inf: | |
75 | /* Return y. */ | |
76 | mov xh, yh | |
77 | mov xl, yl | |
0889f168 MF |
78 | |
79 | .Ladd_return_nan_or_inf: | |
80 | slli a7, xh, 12 | |
81 | or a7, a7, xl | |
82 | bnez a7, .Ladd_return_nan | |
83 | leaf_return | |
84 | ||
85 | .Ladd_return_nan: | |
86 | movi a4, 0x80000 /* make it a quiet NaN */ | |
87 | or xh, xh, a4 | |
b7974b3a | 88 | leaf_return |
134c8a50 BW |
89 | |
90 | .Ladd_opposite_signs: | |
91 | /* Operand signs differ. Do a subtraction. */ | |
92 | slli a7, a6, 11 | |
93 | xor yh, yh, a7 | |
94 | j .Lsub_same_sign | |
95 | ||
96 | .align 4 | |
97 | .global __adddf3 | |
98 | .type __adddf3, @function | |
99 | __adddf3: | |
b7974b3a | 100 | leaf_entry sp, 16 |
134c8a50 BW |
101 | movi a6, 0x7ff00000 |
102 | ||
103 | /* Check if the two operands have the same sign. */ | |
104 | xor a7, xh, yh | |
105 | bltz a7, .Ladd_opposite_signs | |
106 | ||
107 | .Ladd_same_sign: | |
108 | /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */ | |
109 | ball xh, a6, .Ladd_xnan_or_inf | |
110 | ball yh, a6, .Ladd_ynan_or_inf | |
111 | ||
112 | /* Compare the exponents. The smaller operand will be shifted | |
113 | right by the exponent difference and added to the larger | |
114 | one. */ | |
115 | extui a7, xh, 20, 12 | |
116 | extui a8, yh, 20, 12 | |
117 | bltu a7, a8, .Ladd_shiftx | |
118 | ||
119 | .Ladd_shifty: | |
120 | /* Check if the smaller (or equal) exponent is zero. */ | |
121 | bnone yh, a6, .Ladd_yexpzero | |
122 | ||
123 | /* Replace yh sign/exponent with 0x001. */ | |
124 | or yh, yh, a6 | |
125 | slli yh, yh, 11 | |
126 | srli yh, yh, 11 | |
127 | ||
128 | .Ladd_yexpdiff: | |
129 | /* Compute the exponent difference. Optimize for difference < 32. */ | |
130 | sub a10, a7, a8 | |
131 | bgeui a10, 32, .Ladd_bigshifty | |
132 | ||
133 | /* Shift yh/yl right by the exponent difference. Any bits that are | |
134 | shifted out of yl are saved in a9 for rounding the result. */ | |
135 | ssr a10 | |
136 | movi a9, 0 | |
137 | src a9, yl, a9 | |
138 | src yl, yh, yl | |
139 | srl yh, yh | |
140 | ||
141 | .Ladd_addy: | |
142 | /* Do the 64-bit addition. */ | |
143 | add xl, xl, yl | |
144 | add xh, xh, yh | |
145 | bgeu xl, yl, 1f | |
146 | addi xh, xh, 1 | |
147 | 1: | |
148 | /* Check if the add overflowed into the exponent. */ | |
149 | extui a10, xh, 20, 12 | |
150 | beq a10, a7, .Ladd_round | |
151 | mov a8, a7 | |
152 | j .Ladd_carry | |
153 | ||
154 | .Ladd_yexpzero: | |
155 | /* y is a subnormal value. Replace its sign/exponent with zero, | |
156 | i.e., no implicit "1.0", and increment the apparent exponent | |
157 | because subnormals behave as if they had the minimum (nonzero) | |
158 | exponent. Test for the case when both exponents are zero. */ | |
159 | slli yh, yh, 12 | |
160 | srli yh, yh, 12 | |
161 | bnone xh, a6, .Ladd_bothexpzero | |
162 | addi a8, a8, 1 | |
163 | j .Ladd_yexpdiff | |
164 | ||
165 | .Ladd_bothexpzero: | |
166 | /* Both exponents are zero. Handle this as a special case. There | |
167 | is no need to shift or round, and the normal code for handling | |
168 | a carry into the exponent field will not work because it | |
169 | assumes there is an implicit "1.0" that needs to be added. */ | |
170 | add xl, xl, yl | |
171 | add xh, xh, yh | |
172 | bgeu xl, yl, 1f | |
173 | addi xh, xh, 1 | |
b7974b3a | 174 | 1: leaf_return |
134c8a50 BW |
175 | |
176 | .Ladd_bigshifty: | |
177 | /* Exponent difference > 64 -- just return the bigger value. */ | |
178 | bgeui a10, 64, 1b | |
179 | ||
180 | /* Shift yh/yl right by the exponent difference. Any bits that are | |
181 | shifted out are saved in a9 for rounding the result. */ | |
182 | ssr a10 | |
183 | sll a11, yl /* lost bits shifted out of yl */ | |
184 | src a9, yh, yl | |
185 | srl yl, yh | |
186 | movi yh, 0 | |
187 | beqz a11, .Ladd_addy | |
188 | or a9, a9, a10 /* any positive, nonzero value will work */ | |
189 | j .Ladd_addy | |
190 | ||
191 | .Ladd_xexpzero: | |
192 | /* Same as "yexpzero" except skip handling the case when both | |
193 | exponents are zero. */ | |
194 | slli xh, xh, 12 | |
195 | srli xh, xh, 12 | |
196 | addi a7, a7, 1 | |
197 | j .Ladd_xexpdiff | |
198 | ||
199 | .Ladd_shiftx: | |
200 | /* Same thing as the "shifty" code, but with x and y swapped. Also, | |
201 | because the exponent difference is always nonzero in this version, | |
202 | the shift sequence can use SLL and skip loading a constant zero. */ | |
203 | bnone xh, a6, .Ladd_xexpzero | |
204 | ||
205 | or xh, xh, a6 | |
206 | slli xh, xh, 11 | |
207 | srli xh, xh, 11 | |
208 | ||
209 | .Ladd_xexpdiff: | |
210 | sub a10, a8, a7 | |
211 | bgeui a10, 32, .Ladd_bigshiftx | |
212 | ||
213 | ssr a10 | |
214 | sll a9, xl | |
215 | src xl, xh, xl | |
216 | srl xh, xh | |
217 | ||
218 | .Ladd_addx: | |
219 | add xl, xl, yl | |
220 | add xh, xh, yh | |
221 | bgeu xl, yl, 1f | |
222 | addi xh, xh, 1 | |
223 | 1: | |
224 | /* Check if the add overflowed into the exponent. */ | |
225 | extui a10, xh, 20, 12 | |
226 | bne a10, a8, .Ladd_carry | |
227 | ||
228 | .Ladd_round: | |
229 | /* Round up if the leftover fraction is >= 1/2. */ | |
230 | bgez a9, 1f | |
231 | addi xl, xl, 1 | |
232 | beqz xl, .Ladd_roundcarry | |
233 | ||
234 | /* Check if the leftover fraction is exactly 1/2. */ | |
235 | slli a9, a9, 1 | |
236 | beqz a9, .Ladd_exactlyhalf | |
b7974b3a | 237 | 1: leaf_return |
134c8a50 BW |
238 | |
239 | .Ladd_bigshiftx: | |
240 | /* Mostly the same thing as "bigshifty".... */ | |
241 | bgeui a10, 64, .Ladd_returny | |
242 | ||
243 | ssr a10 | |
244 | sll a11, xl | |
245 | src a9, xh, xl | |
246 | srl xl, xh | |
247 | movi xh, 0 | |
248 | beqz a11, .Ladd_addx | |
249 | or a9, a9, a10 | |
250 | j .Ladd_addx | |
251 | ||
252 | .Ladd_returny: | |
253 | mov xh, yh | |
254 | mov xl, yl | |
b7974b3a | 255 | leaf_return |
134c8a50 BW |
256 | |
257 | .Ladd_carry: | |
258 | /* The addition has overflowed into the exponent field, so the | |
259 | value needs to be renormalized. The mantissa of the result | |
260 | can be recovered by subtracting the original exponent and | |
261 | adding 0x100000 (which is the explicit "1.0" for the | |
262 | mantissa of the non-shifted operand -- the "1.0" for the | |
263 | shifted operand was already added). The mantissa can then | |
264 | be shifted right by one bit. The explicit "1.0" of the | |
265 | shifted mantissa then needs to be replaced by the exponent, | |
266 | incremented by one to account for the normalizing shift. | |
267 | It is faster to combine these operations: do the shift first | |
268 | and combine the additions and subtractions. If x is the | |
269 | original exponent, the result is: | |
270 | shifted mantissa - (x << 19) + (1 << 19) + (x << 20) | |
271 | or: | |
272 | shifted mantissa + ((x + 1) << 19) | |
273 | Note that the exponent is incremented here by leaving the | |
274 | explicit "1.0" of the mantissa in the exponent field. */ | |
275 | ||
276 | /* Shift xh/xl right by one bit. Save the lsb of xl. */ | |
277 | mov a10, xl | |
278 | ssai 1 | |
279 | src xl, xh, xl | |
280 | srl xh, xh | |
281 | ||
282 | /* See explanation above. The original exponent is in a8. */ | |
283 | addi a8, a8, 1 | |
284 | slli a8, a8, 19 | |
285 | add xh, xh, a8 | |
286 | ||
287 | /* Return an Infinity if the exponent overflowed. */ | |
288 | ball xh, a6, .Ladd_infinity | |
289 | ||
290 | /* Same thing as the "round" code except the msb of the leftover | |
291 | fraction is bit 0 of a10, with the rest of the fraction in a9. */ | |
292 | bbci.l a10, 0, 1f | |
293 | addi xl, xl, 1 | |
294 | beqz xl, .Ladd_roundcarry | |
295 | beqz a9, .Ladd_exactlyhalf | |
b7974b3a | 296 | 1: leaf_return |
134c8a50 BW |
297 | |
298 | .Ladd_infinity: | |
299 | /* Clear the mantissa. */ | |
300 | movi xl, 0 | |
301 | srli xh, xh, 20 | |
302 | slli xh, xh, 20 | |
303 | ||
304 | /* The sign bit may have been lost in a carry-out. Put it back. */ | |
305 | slli a8, a8, 1 | |
306 | or xh, xh, a8 | |
b7974b3a | 307 | leaf_return |
134c8a50 BW |
308 | |
309 | .Ladd_exactlyhalf: | |
310 | /* Round down to the nearest even value. */ | |
311 | srli xl, xl, 1 | |
312 | slli xl, xl, 1 | |
b7974b3a | 313 | leaf_return |
134c8a50 BW |
314 | |
315 | .Ladd_roundcarry: | |
316 | /* xl is always zero when the rounding increment overflows, so | |
317 | there's no need to round it to an even value. */ | |
318 | addi xh, xh, 1 | |
319 | /* Overflow to the exponent is OK. */ | |
b7974b3a | 320 | leaf_return |
134c8a50 BW |
321 | |
322 | ||
323 | /* Subtraction */ | |
324 | __subdf3_aux: | |
325 | ||
326 | /* Handle NaNs and Infinities. (This code is placed before the | |
327 | start of the function just to keep it in range of the limited | |
328 | branch displacements.) */ | |
329 | ||
330 | .Lsub_xnan_or_inf: | |
331 | /* If y is neither Infinity nor NaN, return x. */ | |
0889f168 MF |
332 | bnall yh, a6, .Lsub_return_nan_or_inf |
333 | ||
334 | .Lsub_return_nan: | |
134c8a50 BW |
335 | /* Both x and y are either NaN or Inf, so the result is NaN. */ |
336 | movi a4, 0x80000 /* make it a quiet NaN */ | |
337 | or xh, xh, a4 | |
0889f168 | 338 | leaf_return |
134c8a50 BW |
339 | |
340 | .Lsub_ynan_or_inf: | |
341 | /* Negate y and return it. */ | |
342 | slli a7, a6, 11 | |
343 | xor xh, yh, a7 | |
344 | mov xl, yl | |
0889f168 MF |
345 | |
346 | .Lsub_return_nan_or_inf: | |
347 | slli a7, xh, 12 | |
348 | or a7, a7, xl | |
349 | bnez a7, .Lsub_return_nan | |
b7974b3a | 350 | leaf_return |
134c8a50 BW |
351 | |
352 | .Lsub_opposite_signs: | |
353 | /* Operand signs differ. Do an addition. */ | |
354 | slli a7, a6, 11 | |
355 | xor yh, yh, a7 | |
356 | j .Ladd_same_sign | |
357 | ||
358 | .align 4 | |
359 | .global __subdf3 | |
360 | .type __subdf3, @function | |
361 | __subdf3: | |
b7974b3a | 362 | leaf_entry sp, 16 |
134c8a50 BW |
363 | movi a6, 0x7ff00000 |
364 | ||
365 | /* Check if the two operands have the same sign. */ | |
366 | xor a7, xh, yh | |
367 | bltz a7, .Lsub_opposite_signs | |
368 | ||
369 | .Lsub_same_sign: | |
370 | /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */ | |
371 | ball xh, a6, .Lsub_xnan_or_inf | |
372 | ball yh, a6, .Lsub_ynan_or_inf | |
373 | ||
374 | /* Compare the operands. In contrast to addition, the entire | |
375 | value matters here. */ | |
376 | extui a7, xh, 20, 11 | |
377 | extui a8, yh, 20, 11 | |
378 | bltu xh, yh, .Lsub_xsmaller | |
379 | beq xh, yh, .Lsub_compare_low | |
380 | ||
381 | .Lsub_ysmaller: | |
382 | /* Check if the smaller (or equal) exponent is zero. */ | |
383 | bnone yh, a6, .Lsub_yexpzero | |
384 | ||
385 | /* Replace yh sign/exponent with 0x001. */ | |
386 | or yh, yh, a6 | |
387 | slli yh, yh, 11 | |
388 | srli yh, yh, 11 | |
389 | ||
390 | .Lsub_yexpdiff: | |
391 | /* Compute the exponent difference. Optimize for difference < 32. */ | |
392 | sub a10, a7, a8 | |
393 | bgeui a10, 32, .Lsub_bigshifty | |
394 | ||
395 | /* Shift yh/yl right by the exponent difference. Any bits that are | |
396 | shifted out of yl are saved in a9 for rounding the result. */ | |
397 | ssr a10 | |
398 | movi a9, 0 | |
399 | src a9, yl, a9 | |
400 | src yl, yh, yl | |
401 | srl yh, yh | |
402 | ||
403 | .Lsub_suby: | |
404 | /* Do the 64-bit subtraction. */ | |
405 | sub xh, xh, yh | |
406 | bgeu xl, yl, 1f | |
407 | addi xh, xh, -1 | |
408 | 1: sub xl, xl, yl | |
409 | ||
410 | /* Subtract the leftover bits in a9 from zero and propagate any | |
411 | borrow from xh/xl. */ | |
412 | neg a9, a9 | |
413 | beqz a9, 1f | |
414 | addi a5, xh, -1 | |
415 | moveqz xh, a5, xl | |
416 | addi xl, xl, -1 | |
417 | 1: | |
418 | /* Check if the subtract underflowed into the exponent. */ | |
419 | extui a10, xh, 20, 11 | |
420 | beq a10, a7, .Lsub_round | |
421 | j .Lsub_borrow | |
422 | ||
423 | .Lsub_compare_low: | |
424 | /* The high words are equal. Compare the low words. */ | |
425 | bltu xl, yl, .Lsub_xsmaller | |
426 | bltu yl, xl, .Lsub_ysmaller | |
427 | /* The operands are equal. Return 0.0. */ | |
428 | movi xh, 0 | |
429 | movi xl, 0 | |
b7974b3a | 430 | 1: leaf_return |
134c8a50 BW |
431 | |
432 | .Lsub_yexpzero: | |
433 | /* y is a subnormal value. Replace its sign/exponent with zero, | |
434 | i.e., no implicit "1.0". Unless x is also a subnormal, increment | |
435 | y's apparent exponent because subnormals behave as if they had | |
436 | the minimum (nonzero) exponent. */ | |
437 | slli yh, yh, 12 | |
438 | srli yh, yh, 12 | |
439 | bnone xh, a6, .Lsub_yexpdiff | |
440 | addi a8, a8, 1 | |
441 | j .Lsub_yexpdiff | |
442 | ||
443 | .Lsub_bigshifty: | |
444 | /* Exponent difference > 64 -- just return the bigger value. */ | |
445 | bgeui a10, 64, 1b | |
446 | ||
447 | /* Shift yh/yl right by the exponent difference. Any bits that are | |
448 | shifted out are saved in a9 for rounding the result. */ | |
449 | ssr a10 | |
450 | sll a11, yl /* lost bits shifted out of yl */ | |
451 | src a9, yh, yl | |
452 | srl yl, yh | |
453 | movi yh, 0 | |
454 | beqz a11, .Lsub_suby | |
455 | or a9, a9, a10 /* any positive, nonzero value will work */ | |
456 | j .Lsub_suby | |
457 | ||
458 | .Lsub_xsmaller: | |
459 | /* Same thing as the "ysmaller" code, but with x and y swapped and | |
460 | with y negated. */ | |
461 | bnone xh, a6, .Lsub_xexpzero | |
462 | ||
463 | or xh, xh, a6 | |
464 | slli xh, xh, 11 | |
465 | srli xh, xh, 11 | |
466 | ||
467 | .Lsub_xexpdiff: | |
468 | sub a10, a8, a7 | |
469 | bgeui a10, 32, .Lsub_bigshiftx | |
470 | ||
471 | ssr a10 | |
472 | movi a9, 0 | |
473 | src a9, xl, a9 | |
474 | src xl, xh, xl | |
475 | srl xh, xh | |
476 | ||
477 | /* Negate y. */ | |
478 | slli a11, a6, 11 | |
479 | xor yh, yh, a11 | |
480 | ||
481 | .Lsub_subx: | |
482 | sub xl, yl, xl | |
483 | sub xh, yh, xh | |
484 | bgeu yl, xl, 1f | |
485 | addi xh, xh, -1 | |
486 | 1: | |
487 | /* Subtract the leftover bits in a9 from zero and propagate any | |
488 | borrow from xh/xl. */ | |
489 | neg a9, a9 | |
490 | beqz a9, 1f | |
491 | addi a5, xh, -1 | |
492 | moveqz xh, a5, xl | |
493 | addi xl, xl, -1 | |
494 | 1: | |
495 | /* Check if the subtract underflowed into the exponent. */ | |
496 | extui a10, xh, 20, 11 | |
497 | bne a10, a8, .Lsub_borrow | |
498 | ||
499 | .Lsub_round: | |
500 | /* Round up if the leftover fraction is >= 1/2. */ | |
501 | bgez a9, 1f | |
502 | addi xl, xl, 1 | |
503 | beqz xl, .Lsub_roundcarry | |
504 | ||
505 | /* Check if the leftover fraction is exactly 1/2. */ | |
506 | slli a9, a9, 1 | |
507 | beqz a9, .Lsub_exactlyhalf | |
b7974b3a | 508 | 1: leaf_return |
134c8a50 BW |
509 | |
510 | .Lsub_xexpzero: | |
511 | /* Same as "yexpzero". */ | |
512 | slli xh, xh, 12 | |
513 | srli xh, xh, 12 | |
514 | bnone yh, a6, .Lsub_xexpdiff | |
515 | addi a7, a7, 1 | |
516 | j .Lsub_xexpdiff | |
517 | ||
518 | .Lsub_bigshiftx: | |
519 | /* Mostly the same thing as "bigshifty", but with the sign bit of the | |
520 | shifted value set so that the subsequent subtraction flips the | |
521 | sign of y. */ | |
522 | bgeui a10, 64, .Lsub_returny | |
523 | ||
524 | ssr a10 | |
525 | sll a11, xl | |
526 | src a9, xh, xl | |
527 | srl xl, xh | |
528 | slli xh, a6, 11 /* set sign bit of xh */ | |
529 | beqz a11, .Lsub_subx | |
530 | or a9, a9, a10 | |
531 | j .Lsub_subx | |
532 | ||
533 | .Lsub_returny: | |
534 | /* Negate and return y. */ | |
535 | slli a7, a6, 11 | |
536 | xor xh, yh, a7 | |
537 | mov xl, yl | |
b7974b3a | 538 | leaf_return |
134c8a50 BW |
539 | |
540 | .Lsub_borrow: | |
541 | /* The subtraction has underflowed into the exponent field, so the | |
542 | value needs to be renormalized. Shift the mantissa left as | |
543 | needed to remove any leading zeros and adjust the exponent | |
544 | accordingly. If the exponent is not large enough to remove | |
545 | all the leading zeros, the result will be a subnormal value. */ | |
546 | ||
547 | slli a8, xh, 12 | |
548 | beqz a8, .Lsub_xhzero | |
549 | do_nsau a6, a8, a7, a11 | |
550 | srli a8, a8, 12 | |
551 | bge a6, a10, .Lsub_subnormal | |
552 | addi a6, a6, 1 | |
553 | ||
554 | .Lsub_shift_lt32: | |
555 | /* Shift the mantissa (a8/xl/a9) left by a6. */ | |
556 | ssl a6 | |
557 | src a8, a8, xl | |
558 | src xl, xl, a9 | |
559 | sll a9, a9 | |
560 | ||
561 | /* Combine the shifted mantissa with the sign and exponent, | |
562 | decrementing the exponent by a6. (The exponent has already | |
563 | been decremented by one due to the borrow from the subtraction, | |
564 | but adding the mantissa will increment the exponent by one.) */ | |
565 | srli xh, xh, 20 | |
566 | sub xh, xh, a6 | |
567 | slli xh, xh, 20 | |
568 | add xh, xh, a8 | |
569 | j .Lsub_round | |
570 | ||
571 | .Lsub_exactlyhalf: | |
572 | /* Round down to the nearest even value. */ | |
573 | srli xl, xl, 1 | |
574 | slli xl, xl, 1 | |
b7974b3a | 575 | leaf_return |
134c8a50 BW |
576 | |
577 | .Lsub_roundcarry: | |
578 | /* xl is always zero when the rounding increment overflows, so | |
579 | there's no need to round it to an even value. */ | |
580 | addi xh, xh, 1 | |
581 | /* Overflow to the exponent is OK. */ | |
b7974b3a | 582 | leaf_return |
134c8a50 BW |
583 | |
584 | .Lsub_xhzero: | |
585 | /* When normalizing the result, all the mantissa bits in the high | |
586 | word are zero. Shift by "20 + (leading zero count of xl) + 1". */ | |
587 | do_nsau a6, xl, a7, a11 | |
588 | addi a6, a6, 21 | |
589 | blt a10, a6, .Lsub_subnormal | |
590 | ||
591 | .Lsub_normalize_shift: | |
592 | bltui a6, 32, .Lsub_shift_lt32 | |
593 | ||
594 | ssl a6 | |
595 | src a8, xl, a9 | |
596 | sll xl, a9 | |
597 | movi a9, 0 | |
598 | ||
599 | srli xh, xh, 20 | |
600 | sub xh, xh, a6 | |
601 | slli xh, xh, 20 | |
602 | add xh, xh, a8 | |
603 | j .Lsub_round | |
604 | ||
605 | .Lsub_subnormal: | |
606 | /* The exponent is too small to shift away all the leading zeros. | |
607 | Set a6 to the current exponent (which has already been | |
608 | decremented by the borrow) so that the exponent of the result | |
609 | will be zero. Do not add 1 to a6 in this case, because: (1) | |
610 | adding the mantissa will not increment the exponent, so there is | |
611 | no need to subtract anything extra from the exponent to | |
612 | compensate, and (2) the effective exponent of a subnormal is 1 | |
613 | not 0 so the shift amount must be 1 smaller than normal. */ | |
614 | mov a6, a10 | |
615 | j .Lsub_normalize_shift | |
616 | ||
617 | #endif /* L_addsubdf3 */ | |
618 | ||
619 | #ifdef L_muldf3 | |
620 | ||
621 | /* Multiplication */ | |
7f0ee694 BW |
622 | #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 |
623 | #define XCHAL_NO_MUL 1 | |
624 | #endif | |
625 | ||
9bfcbdee | 626 | .literal_position |
134c8a50 BW |
627 | __muldf3_aux: |
628 | ||
629 | /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). | |
630 | (This code is placed before the start of the function just to | |
631 | keep it in range of the limited branch displacements.) */ | |
632 | ||
633 | .Lmul_xexpzero: | |
634 | /* Clear the sign bit of x. */ | |
635 | slli xh, xh, 1 | |
636 | srli xh, xh, 1 | |
637 | ||
638 | /* If x is zero, return zero. */ | |
639 | or a10, xh, xl | |
640 | beqz a10, .Lmul_return_zero | |
641 | ||
642 | /* Normalize x. Adjust the exponent in a8. */ | |
643 | beqz xh, .Lmul_xh_zero | |
644 | do_nsau a10, xh, a11, a12 | |
645 | addi a10, a10, -11 | |
646 | ssl a10 | |
647 | src xh, xh, xl | |
648 | sll xl, xl | |
649 | movi a8, 1 | |
650 | sub a8, a8, a10 | |
651 | j .Lmul_xnormalized | |
652 | .Lmul_xh_zero: | |
653 | do_nsau a10, xl, a11, a12 | |
654 | addi a10, a10, -11 | |
655 | movi a8, -31 | |
656 | sub a8, a8, a10 | |
657 | ssl a10 | |
658 | bltz a10, .Lmul_xl_srl | |
659 | sll xh, xl | |
660 | movi xl, 0 | |
661 | j .Lmul_xnormalized | |
662 | .Lmul_xl_srl: | |
663 | srl xh, xl | |
664 | sll xl, xl | |
665 | j .Lmul_xnormalized | |
666 | ||
667 | .Lmul_yexpzero: | |
668 | /* Clear the sign bit of y. */ | |
669 | slli yh, yh, 1 | |
670 | srli yh, yh, 1 | |
671 | ||
672 | /* If y is zero, return zero. */ | |
673 | or a10, yh, yl | |
674 | beqz a10, .Lmul_return_zero | |
675 | ||
676 | /* Normalize y. Adjust the exponent in a9. */ | |
677 | beqz yh, .Lmul_yh_zero | |
678 | do_nsau a10, yh, a11, a12 | |
679 | addi a10, a10, -11 | |
680 | ssl a10 | |
681 | src yh, yh, yl | |
682 | sll yl, yl | |
683 | movi a9, 1 | |
684 | sub a9, a9, a10 | |
685 | j .Lmul_ynormalized | |
686 | .Lmul_yh_zero: | |
687 | do_nsau a10, yl, a11, a12 | |
688 | addi a10, a10, -11 | |
689 | movi a9, -31 | |
690 | sub a9, a9, a10 | |
691 | ssl a10 | |
692 | bltz a10, .Lmul_yl_srl | |
693 | sll yh, yl | |
694 | movi yl, 0 | |
695 | j .Lmul_ynormalized | |
696 | .Lmul_yl_srl: | |
697 | srl yh, yl | |
698 | sll yl, yl | |
699 | j .Lmul_ynormalized | |
700 | ||
701 | .Lmul_return_zero: | |
702 | /* Return zero with the appropriate sign bit. */ | |
703 | srli xh, a7, 31 | |
704 | slli xh, xh, 31 | |
705 | movi xl, 0 | |
706 | j .Lmul_done | |
707 | ||
708 | .Lmul_xnan_or_inf: | |
709 | /* If y is zero, return NaN. */ | |
710 | bnez yl, 1f | |
711 | slli a8, yh, 1 | |
0889f168 | 712 | beqz a8, .Lmul_return_nan |
134c8a50 BW |
713 | 1: |
714 | /* If y is NaN, return y. */ | |
715 | bnall yh, a6, .Lmul_returnx | |
716 | slli a8, yh, 12 | |
717 | or a8, a8, yl | |
718 | beqz a8, .Lmul_returnx | |
719 | ||
720 | .Lmul_returny: | |
721 | mov xh, yh | |
722 | mov xl, yl | |
723 | ||
724 | .Lmul_returnx: | |
0889f168 MF |
725 | slli a8, xh, 12 |
726 | or a8, a8, xl | |
727 | bnez a8, .Lmul_return_nan | |
134c8a50 BW |
728 | /* Set the sign bit and return. */ |
729 | extui a7, a7, 31, 1 | |
730 | slli xh, xh, 1 | |
731 | ssai 1 | |
732 | src xh, a7, xh | |
733 | j .Lmul_done | |
734 | ||
735 | .Lmul_ynan_or_inf: | |
736 | /* If x is zero, return NaN. */ | |
737 | bnez xl, .Lmul_returny | |
738 | slli a8, xh, 1 | |
739 | bnez a8, .Lmul_returny | |
0889f168 MF |
740 | mov xh, yh |
741 | ||
742 | .Lmul_return_nan: | |
743 | movi a4, 0x80000 /* make it a quiet NaN */ | |
744 | or xh, xh, a4 | |
134c8a50 BW |
745 | j .Lmul_done |
746 | ||
747 | .align 4 | |
748 | .global __muldf3 | |
749 | .type __muldf3, @function | |
750 | __muldf3: | |
134c8a50 | 751 | #if __XTENSA_CALL0_ABI__ |
7f0ee694 | 752 | leaf_entry sp, 32 |
134c8a50 BW |
753 | addi sp, sp, -32 |
754 | s32i a12, sp, 16 | |
755 | s32i a13, sp, 20 | |
756 | s32i a14, sp, 24 | |
757 | s32i a15, sp, 28 | |
7f0ee694 BW |
758 | #elif XCHAL_NO_MUL |
759 | /* This is not really a leaf function; allocate enough stack space | |
760 | to allow CALL12s to a helper function. */ | |
761 | leaf_entry sp, 64 | |
762 | #else | |
763 | leaf_entry sp, 32 | |
134c8a50 BW |
764 | #endif |
765 | movi a6, 0x7ff00000 | |
766 | ||
767 | /* Get the sign of the result. */ | |
768 | xor a7, xh, yh | |
769 | ||
770 | /* Check for NaN and infinity. */ | |
771 | ball xh, a6, .Lmul_xnan_or_inf | |
772 | ball yh, a6, .Lmul_ynan_or_inf | |
773 | ||
774 | /* Extract the exponents. */ | |
775 | extui a8, xh, 20, 11 | |
776 | extui a9, yh, 20, 11 | |
777 | ||
778 | beqz a8, .Lmul_xexpzero | |
779 | .Lmul_xnormalized: | |
780 | beqz a9, .Lmul_yexpzero | |
781 | .Lmul_ynormalized: | |
782 | ||
783 | /* Add the exponents. */ | |
784 | add a8, a8, a9 | |
785 | ||
786 | /* Replace sign/exponent fields with explicit "1.0". */ | |
787 | movi a10, 0x1fffff | |
788 | or xh, xh, a6 | |
789 | and xh, xh, a10 | |
790 | or yh, yh, a6 | |
791 | and yh, yh, a10 | |
792 | ||
793 | /* Multiply 64x64 to 128 bits. The result ends up in xh/xl/a6. | |
794 | The least-significant word of the result is thrown away except | |
795 | that if it is nonzero, the lsb of a6 is set to 1. */ | |
796 | #if XCHAL_HAVE_MUL32_HIGH | |
797 | ||
798 | /* Compute a6 with any carry-outs in a10. */ | |
799 | movi a10, 0 | |
800 | mull a6, xl, yh | |
801 | mull a11, xh, yl | |
802 | add a6, a6, a11 | |
803 | bgeu a6, a11, 1f | |
804 | addi a10, a10, 1 | |
805 | 1: | |
806 | muluh a11, xl, yl | |
807 | add a6, a6, a11 | |
808 | bgeu a6, a11, 1f | |
809 | addi a10, a10, 1 | |
810 | 1: | |
811 | /* If the low word of the result is nonzero, set the lsb of a6. */ | |
812 | mull a11, xl, yl | |
813 | beqz a11, 1f | |
814 | movi a9, 1 | |
815 | or a6, a6, a9 | |
816 | 1: | |
817 | /* Compute xl with any carry-outs in a9. */ | |
818 | movi a9, 0 | |
819 | mull a11, xh, yh | |
820 | add a10, a10, a11 | |
821 | bgeu a10, a11, 1f | |
822 | addi a9, a9, 1 | |
823 | 1: | |
824 | muluh a11, xh, yl | |
825 | add a10, a10, a11 | |
826 | bgeu a10, a11, 1f | |
827 | addi a9, a9, 1 | |
828 | 1: | |
829 | muluh xl, xl, yh | |
830 | add xl, xl, a10 | |
831 | bgeu xl, a10, 1f | |
832 | addi a9, a9, 1 | |
833 | 1: | |
834 | /* Compute xh. */ | |
835 | muluh xh, xh, yh | |
836 | add xh, xh, a9 | |
837 | ||
7f0ee694 | 838 | #else /* ! XCHAL_HAVE_MUL32_HIGH */ |
134c8a50 BW |
839 | |
840 | /* Break the inputs into 16-bit chunks and compute 16 32-bit partial | |
841 | products. These partial products are: | |
842 | ||
843 | 0 xll * yll | |
844 | ||
845 | 1 xll * ylh | |
846 | 2 xlh * yll | |
847 | ||
848 | 3 xll * yhl | |
849 | 4 xlh * ylh | |
850 | 5 xhl * yll | |
851 | ||
852 | 6 xll * yhh | |
853 | 7 xlh * yhl | |
854 | 8 xhl * ylh | |
855 | 9 xhh * yll | |
856 | ||
857 | 10 xlh * yhh | |
858 | 11 xhl * yhl | |
859 | 12 xhh * ylh | |
860 | ||
861 | 13 xhl * yhh | |
862 | 14 xhh * yhl | |
863 | ||
864 | 15 xhh * yhh | |
865 | ||
866 | where the input chunks are (hh, hl, lh, ll). If using the Mul16 | |
867 | or Mul32 multiplier options, these input chunks must be stored in | |
868 | separate registers. For Mac16, the UMUL.AA.* opcodes can specify | |
869 | that the inputs come from either half of the registers, so there | |
870 | is no need to shift them out ahead of time. If there is no | |
871 | multiply hardware, the 16-bit chunks can be extracted when setting | |
872 | up the arguments to the separate multiply function. */ | |
873 | ||
874 | /* Save a7 since it is needed to hold a temporary value. */ | |
875 | s32i a7, sp, 4 | |
7f0ee694 | 876 | #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL |
134c8a50 BW |
877 | /* Calling a separate multiply function will clobber a0 and requires |
878 | use of a8 as a temporary, so save those values now. (The function | |
879 | uses a custom ABI so nothing else needs to be saved.) */ | |
880 | s32i a0, sp, 0 | |
881 | s32i a8, sp, 8 | |
882 | #endif | |
883 | ||
884 | #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32 | |
885 | ||
886 | #define xlh a12 | |
887 | #define ylh a13 | |
888 | #define xhh a14 | |
889 | #define yhh a15 | |
890 | ||
891 | /* Get the high halves of the inputs into registers. */ | |
892 | srli xlh, xl, 16 | |
893 | srli ylh, yl, 16 | |
894 | srli xhh, xh, 16 | |
895 | srli yhh, yh, 16 | |
896 | ||
897 | #define xll xl | |
898 | #define yll yl | |
899 | #define xhl xh | |
900 | #define yhl yh | |
901 | ||
902 | #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16 | |
903 | /* Clear the high halves of the inputs. This does not matter | |
904 | for MUL16 because the high bits are ignored. */ | |
905 | extui xl, xl, 0, 16 | |
906 | extui xh, xh, 0, 16 | |
907 | extui yl, yl, 0, 16 | |
908 | extui yh, yh, 0, 16 | |
909 | #endif | |
910 | #endif /* MUL16 || MUL32 */ | |
911 | ||
912 | ||
913 | #if XCHAL_HAVE_MUL16 | |
914 | ||
915 | #define do_mul(dst, xreg, xhalf, yreg, yhalf) \ | |
916 | mul16u dst, xreg ## xhalf, yreg ## yhalf | |
917 | ||
918 | #elif XCHAL_HAVE_MUL32 | |
919 | ||
920 | #define do_mul(dst, xreg, xhalf, yreg, yhalf) \ | |
921 | mull dst, xreg ## xhalf, yreg ## yhalf | |
922 | ||
923 | #elif XCHAL_HAVE_MAC16 | |
924 | ||
925 | /* The preprocessor insists on inserting a space when concatenating after | |
926 | a period in the definition of do_mul below. These macros are a workaround | |
927 | using underscores instead of periods when doing the concatenation. */ | |
928 | #define umul_aa_ll umul.aa.ll | |
929 | #define umul_aa_lh umul.aa.lh | |
930 | #define umul_aa_hl umul.aa.hl | |
931 | #define umul_aa_hh umul.aa.hh | |
932 | ||
933 | #define do_mul(dst, xreg, xhalf, yreg, yhalf) \ | |
934 | umul_aa_ ## xhalf ## yhalf xreg, yreg; \ | |
935 | rsr dst, ACCLO | |
936 | ||
937 | #else /* no multiply hardware */ | |
938 | ||
939 | #define set_arg_l(dst, src) \ | |
940 | extui dst, src, 0, 16 | |
941 | #define set_arg_h(dst, src) \ | |
942 | srli dst, src, 16 | |
943 | ||
7f0ee694 | 944 | #if __XTENSA_CALL0_ABI__ |
134c8a50 BW |
945 | #define do_mul(dst, xreg, xhalf, yreg, yhalf) \ |
946 | set_arg_ ## xhalf (a13, xreg); \ | |
947 | set_arg_ ## yhalf (a14, yreg); \ | |
948 | call0 .Lmul_mulsi3; \ | |
949 | mov dst, a12 | |
7f0ee694 BW |
950 | #else |
951 | #define do_mul(dst, xreg, xhalf, yreg, yhalf) \ | |
952 | set_arg_ ## xhalf (a14, xreg); \ | |
953 | set_arg_ ## yhalf (a15, yreg); \ | |
954 | call12 .Lmul_mulsi3; \ | |
955 | mov dst, a14 | |
956 | #endif /* __XTENSA_CALL0_ABI__ */ | |
957 | ||
958 | #endif /* no multiply hardware */ | |
134c8a50 BW |
959 | |
960 | /* Add pp1 and pp2 into a10 with carry-out in a9. */ | |
961 | do_mul(a10, xl, l, yl, h) /* pp 1 */ | |
962 | do_mul(a11, xl, h, yl, l) /* pp 2 */ | |
963 | movi a9, 0 | |
964 | add a10, a10, a11 | |
965 | bgeu a10, a11, 1f | |
966 | addi a9, a9, 1 | |
967 | 1: | |
968 | /* Initialize a6 with a9/a10 shifted into position. Note that | |
969 | this value can be safely incremented without any carry-outs. */ | |
970 | ssai 16 | |
971 | src a6, a9, a10 | |
972 | ||
973 | /* Compute the low word into a10. */ | |
974 | do_mul(a11, xl, l, yl, l) /* pp 0 */ | |
975 | sll a10, a10 | |
976 | add a10, a10, a11 | |
977 | bgeu a10, a11, 1f | |
978 | addi a6, a6, 1 | |
979 | 1: | |
980 | /* Compute the contributions of pp0-5 to a6, with carry-outs in a9. | |
981 | This is good enough to determine the low half of a6, so that any | |
982 | nonzero bits from the low word of the result can be collapsed | |
983 | into a6, freeing up a register. */ | |
984 | movi a9, 0 | |
985 | do_mul(a11, xl, l, yh, l) /* pp 3 */ | |
986 | add a6, a6, a11 | |
987 | bgeu a6, a11, 1f | |
988 | addi a9, a9, 1 | |
989 | 1: | |
990 | do_mul(a11, xl, h, yl, h) /* pp 4 */ | |
991 | add a6, a6, a11 | |
992 | bgeu a6, a11, 1f | |
993 | addi a9, a9, 1 | |
994 | 1: | |
995 | do_mul(a11, xh, l, yl, l) /* pp 5 */ | |
996 | add a6, a6, a11 | |
997 | bgeu a6, a11, 1f | |
998 | addi a9, a9, 1 | |
999 | 1: | |
1000 | /* Collapse any nonzero bits from the low word into a6. */ | |
1001 | beqz a10, 1f | |
1002 | movi a11, 1 | |
1003 | or a6, a6, a11 | |
1004 | 1: | |
1005 | /* Add pp6-9 into a11 with carry-outs in a10. */ | |
1006 | do_mul(a7, xl, l, yh, h) /* pp 6 */ | |
1007 | do_mul(a11, xh, h, yl, l) /* pp 9 */ | |
1008 | movi a10, 0 | |
1009 | add a11, a11, a7 | |
1010 | bgeu a11, a7, 1f | |
1011 | addi a10, a10, 1 | |
1012 | 1: | |
1013 | do_mul(a7, xl, h, yh, l) /* pp 7 */ | |
1014 | add a11, a11, a7 | |
1015 | bgeu a11, a7, 1f | |
1016 | addi a10, a10, 1 | |
1017 | 1: | |
1018 | do_mul(a7, xh, l, yl, h) /* pp 8 */ | |
1019 | add a11, a11, a7 | |
1020 | bgeu a11, a7, 1f | |
1021 | addi a10, a10, 1 | |
1022 | 1: | |
1023 | /* Shift a10/a11 into position, and add low half of a11 to a6. */ | |
1024 | src a10, a10, a11 | |
1025 | add a10, a10, a9 | |
1026 | sll a11, a11 | |
1027 | add a6, a6, a11 | |
1028 | bgeu a6, a11, 1f | |
1029 | addi a10, a10, 1 | |
1030 | 1: | |
1031 | /* Add pp10-12 into xl with carry-outs in a9. */ | |
1032 | movi a9, 0 | |
1033 | do_mul(xl, xl, h, yh, h) /* pp 10 */ | |
1034 | add xl, xl, a10 | |
1035 | bgeu xl, a10, 1f | |
1036 | addi a9, a9, 1 | |
1037 | 1: | |
1038 | do_mul(a10, xh, l, yh, l) /* pp 11 */ | |
1039 | add xl, xl, a10 | |
1040 | bgeu xl, a10, 1f | |
1041 | addi a9, a9, 1 | |
1042 | 1: | |
1043 | do_mul(a10, xh, h, yl, h) /* pp 12 */ | |
1044 | add xl, xl, a10 | |
1045 | bgeu xl, a10, 1f | |
1046 | addi a9, a9, 1 | |
1047 | 1: | |
1048 | /* Add pp13-14 into a11 with carry-outs in a10. */ | |
1049 | do_mul(a11, xh, l, yh, h) /* pp 13 */ | |
1050 | do_mul(a7, xh, h, yh, l) /* pp 14 */ | |
1051 | movi a10, 0 | |
1052 | add a11, a11, a7 | |
1053 | bgeu a11, a7, 1f | |
1054 | addi a10, a10, 1 | |
1055 | 1: | |
1056 | /* Shift a10/a11 into position, and add low half of a11 to a6. */ | |
1057 | src a10, a10, a11 | |
1058 | add a10, a10, a9 | |
1059 | sll a11, a11 | |
1060 | add xl, xl, a11 | |
1061 | bgeu xl, a11, 1f | |
1062 | addi a10, a10, 1 | |
1063 | 1: | |
1064 | /* Compute xh. */ | |
1065 | do_mul(xh, xh, h, yh, h) /* pp 15 */ | |
1066 | add xh, xh, a10 | |
1067 | ||
1068 | /* Restore values saved on the stack during the multiplication. */ | |
1069 | l32i a7, sp, 4 | |
7f0ee694 | 1070 | #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL |
134c8a50 BW |
1071 | l32i a0, sp, 0 |
1072 | l32i a8, sp, 8 | |
1073 | #endif | |
7f0ee694 | 1074 | #endif /* ! XCHAL_HAVE_MUL32_HIGH */ |
134c8a50 BW |
1075 | |
1076 | /* Shift left by 12 bits, unless there was a carry-out from the | |
1077 | multiply, in which case, shift by 11 bits and increment the | |
1078 | exponent. Note: It is convenient to use the constant 0x3ff | |
1079 | instead of 0x400 when removing the extra exponent bias (so that | |
1080 | it is easy to construct 0x7fe for the overflow check). Reverse | |
1081 | the logic here to decrement the exponent sum by one unless there | |
1082 | was a carry-out. */ | |
1083 | movi a4, 11 | |
1084 | srli a5, xh, 21 - 12 | |
1085 | bnez a5, 1f | |
1086 | addi a4, a4, 1 | |
1087 | addi a8, a8, -1 | |
1088 | 1: ssl a4 | |
1089 | src xh, xh, xl | |
1090 | src xl, xl, a6 | |
1091 | sll a6, a6 | |
1092 | ||
1093 | /* Subtract the extra bias from the exponent sum (plus one to account | |
1094 | for the explicit "1.0" of the mantissa that will be added to the | |
1095 | exponent in the final result). */ | |
1096 | movi a4, 0x3ff | |
1097 | sub a8, a8, a4 | |
1098 | ||
1099 | /* Check for over/underflow. The value in a8 is one less than the | |
1100 | final exponent, so values in the range 0..7fd are OK here. */ | |
1101 | slli a4, a4, 1 /* 0x7fe */ | |
1102 | bgeu a8, a4, .Lmul_overflow | |
1103 | ||
1104 | .Lmul_round: | |
1105 | /* Round. */ | |
1106 | bgez a6, .Lmul_rounded | |
1107 | addi xl, xl, 1 | |
1108 | beqz xl, .Lmul_roundcarry | |
1109 | slli a6, a6, 1 | |
1110 | beqz a6, .Lmul_exactlyhalf | |
1111 | ||
1112 | .Lmul_rounded: | |
1113 | /* Add the exponent to the mantissa. */ | |
1114 | slli a8, a8, 20 | |
1115 | add xh, xh, a8 | |
1116 | ||
1117 | .Lmul_addsign: | |
1118 | /* Add the sign bit. */ | |
1119 | srli a7, a7, 31 | |
1120 | slli a7, a7, 31 | |
1121 | or xh, xh, a7 | |
1122 | ||
1123 | .Lmul_done: | |
1124 | #if __XTENSA_CALL0_ABI__ | |
1125 | l32i a12, sp, 16 | |
1126 | l32i a13, sp, 20 | |
1127 | l32i a14, sp, 24 | |
1128 | l32i a15, sp, 28 | |
1129 | addi sp, sp, 32 | |
1130 | #endif | |
b7974b3a | 1131 | leaf_return |
134c8a50 BW |
1132 | |
1133 | .Lmul_exactlyhalf: | |
1134 | /* Round down to the nearest even value. */ | |
1135 | srli xl, xl, 1 | |
1136 | slli xl, xl, 1 | |
1137 | j .Lmul_rounded | |
1138 | ||
1139 | .Lmul_roundcarry: | |
1140 | /* xl is always zero when the rounding increment overflows, so | |
1141 | there's no need to round it to an even value. */ | |
1142 | addi xh, xh, 1 | |
1143 | /* Overflow is OK -- it will be added to the exponent. */ | |
1144 | j .Lmul_rounded | |
1145 | ||
1146 | .Lmul_overflow: | |
1147 | bltz a8, .Lmul_underflow | |
1148 | /* Return +/- Infinity. */ | |
1149 | addi a8, a4, 1 /* 0x7ff */ | |
1150 | slli xh, a8, 20 | |
1151 | movi xl, 0 | |
1152 | j .Lmul_addsign | |
1153 | ||
1154 | .Lmul_underflow: | |
1155 | /* Create a subnormal value, where the exponent field contains zero, | |
1156 | but the effective exponent is 1. The value of a8 is one less than | |
1157 | the actual exponent, so just negate it to get the shift amount. */ | |
1158 | neg a8, a8 | |
1159 | mov a9, a6 | |
1160 | ssr a8 | |
1161 | bgeui a8, 32, .Lmul_bigshift | |
1162 | ||
1163 | /* Shift xh/xl right. Any bits that are shifted out of xl are saved | |
1164 | in a6 (combined with the shifted-out bits currently in a6) for | |
1165 | rounding the result. */ | |
1166 | sll a6, xl | |
1167 | src xl, xh, xl | |
1168 | srl xh, xh | |
1169 | j 1f | |
1170 | ||
1171 | .Lmul_bigshift: | |
1172 | bgeui a8, 64, .Lmul_flush_to_zero | |
1173 | sll a10, xl /* lost bits shifted out of xl */ | |
1174 | src a6, xh, xl | |
1175 | srl xl, xh | |
1176 | movi xh, 0 | |
1177 | or a9, a9, a10 | |
1178 | ||
1179 | /* Set the exponent to zero. */ | |
1180 | 1: movi a8, 0 | |
1181 | ||
1182 | /* Pack any nonzero bits shifted out into a6. */ | |
1183 | beqz a9, .Lmul_round | |
1184 | movi a9, 1 | |
1185 | or a6, a6, a9 | |
1186 | j .Lmul_round | |
1187 | ||
1188 | .Lmul_flush_to_zero: | |
1189 | /* Return zero with the appropriate sign bit. */ | |
1190 | srli xh, a7, 31 | |
1191 | slli xh, xh, 31 | |
1192 | movi xl, 0 | |
1193 | j .Lmul_done | |
1194 | ||
7f0ee694 | 1195 | #if XCHAL_NO_MUL |
134c8a50 BW |
1196 | |
1197 | /* For Xtensa processors with no multiply hardware, this simplified | |
1198 | version of _mulsi3 is used for multiplying 16-bit chunks of | |
7f0ee694 BW |
1199 | the floating-point mantissas. When using CALL0, this function |
1200 | uses a custom ABI: the inputs are passed in a13 and a14, the | |
1201 | result is returned in a12, and a8 and a15 are clobbered. */ | |
134c8a50 BW |
1202 | .align 4 |
1203 | .Lmul_mulsi3: | |
7f0ee694 BW |
1204 | leaf_entry sp, 16 |
1205 | .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2 | |
1206 | movi \dst, 0 | |
1207 | 1: add \tmp1, \src2, \dst | |
1208 | extui \tmp2, \src1, 0, 1 | |
1209 | movnez \dst, \tmp1, \tmp2 | |
1210 | ||
1211 | do_addx2 \tmp1, \src2, \dst, \tmp1 | |
1212 | extui \tmp2, \src1, 1, 1 | |
1213 | movnez \dst, \tmp1, \tmp2 | |
1214 | ||
1215 | do_addx4 \tmp1, \src2, \dst, \tmp1 | |
1216 | extui \tmp2, \src1, 2, 1 | |
1217 | movnez \dst, \tmp1, \tmp2 | |
1218 | ||
1219 | do_addx8 \tmp1, \src2, \dst, \tmp1 | |
1220 | extui \tmp2, \src1, 3, 1 | |
1221 | movnez \dst, \tmp1, \tmp2 | |
1222 | ||
1223 | srli \src1, \src1, 4 | |
1224 | slli \src2, \src2, 4 | |
1225 | bnez \src1, 1b | |
1226 | .endm | |
1227 | #if __XTENSA_CALL0_ABI__ | |
1228 | mul_mulsi3_body a12, a13, a14, a15, a8 | |
1229 | #else | |
1230 | /* The result will be written into a2, so save that argument in a4. */ | |
1231 | mov a4, a2 | |
1232 | mul_mulsi3_body a2, a4, a3, a5, a6 | |
1233 | #endif | |
1234 | leaf_return | |
1235 | #endif /* XCHAL_NO_MUL */ | |
134c8a50 BW |
1236 | #endif /* L_muldf3 */ |
1237 | ||
1238 | #ifdef L_divdf3 | |
1239 | ||
1240 | /* Division */ | |
66192aa1 DKC |
1241 | |
1242 | #if XCHAL_HAVE_DFP_DIV | |
1243 | ||
1244 | .text | |
1245 | .align 4 | |
1246 | .global __divdf3 | |
1247 | .type __divdf3, @function | |
1248 | __divdf3: | |
1249 | leaf_entry sp, 16 | |
1250 | ||
1251 | wfrd f1, xh, xl | |
1252 | wfrd f2, yh, yl | |
1253 | ||
1254 | div0.d f3, f2 | |
1255 | nexp01.d f4, f2 | |
1256 | const.d f0, 1 | |
1257 | maddn.d f0, f4, f3 | |
1258 | const.d f5, 0 | |
1259 | mov.d f7, f2 | |
1260 | mkdadj.d f7, f1 | |
1261 | maddn.d f3, f0, f3 | |
1262 | maddn.d f5, f0, f0 | |
1263 | nexp01.d f1, f1 | |
1264 | div0.d f2, f2 | |
1265 | maddn.d f3, f5, f3 | |
1266 | const.d f5, 1 | |
1267 | const.d f0, 0 | |
1268 | neg.d f6, f1 | |
1269 | maddn.d f5, f4, f3 | |
1270 | maddn.d f0, f6, f2 | |
1271 | maddn.d f3, f5, f3 | |
1272 | maddn.d f6, f4, f0 | |
1273 | const.d f2, 1 | |
1274 | maddn.d f2, f4, f3 | |
1275 | maddn.d f0, f6, f3 | |
1276 | neg.d f1, f1 | |
1277 | maddn.d f3, f2, f3 | |
1278 | maddn.d f1, f4, f0 | |
1279 | addexpm.d f0, f7 | |
1280 | addexp.d f3, f7 | |
1281 | divn.d f0, f1, f3 | |
1282 | ||
1283 | rfr xl, f0 | |
1284 | rfrd xh, f0 | |
1285 | ||
1286 | leaf_return | |
1287 | ||
1288 | #else | |
1289 | ||
1290 | .literal_position | |
1291 | ||
134c8a50 BW |
1292 | __divdf3_aux: |
1293 | ||
1294 | /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). | |
1295 | (This code is placed before the start of the function just to | |
1296 | keep it in range of the limited branch displacements.) */ | |
1297 | ||
1298 | .Ldiv_yexpzero: | |
1299 | /* Clear the sign bit of y. */ | |
1300 | slli yh, yh, 1 | |
1301 | srli yh, yh, 1 | |
1302 | ||
1303 | /* Check for division by zero. */ | |
1304 | or a10, yh, yl | |
1305 | beqz a10, .Ldiv_yzero | |
1306 | ||
1307 | /* Normalize y. Adjust the exponent in a9. */ | |
1308 | beqz yh, .Ldiv_yh_zero | |
1309 | do_nsau a10, yh, a11, a9 | |
1310 | addi a10, a10, -11 | |
1311 | ssl a10 | |
1312 | src yh, yh, yl | |
1313 | sll yl, yl | |
1314 | movi a9, 1 | |
1315 | sub a9, a9, a10 | |
1316 | j .Ldiv_ynormalized | |
1317 | .Ldiv_yh_zero: | |
1318 | do_nsau a10, yl, a11, a9 | |
1319 | addi a10, a10, -11 | |
1320 | movi a9, -31 | |
1321 | sub a9, a9, a10 | |
1322 | ssl a10 | |
1323 | bltz a10, .Ldiv_yl_srl | |
1324 | sll yh, yl | |
1325 | movi yl, 0 | |
1326 | j .Ldiv_ynormalized | |
1327 | .Ldiv_yl_srl: | |
1328 | srl yh, yl | |
1329 | sll yl, yl | |
1330 | j .Ldiv_ynormalized | |
1331 | ||
1332 | .Ldiv_yzero: | |
1333 | /* y is zero. Return NaN if x is also zero; otherwise, infinity. */ | |
1334 | slli xh, xh, 1 | |
1335 | srli xh, xh, 1 | |
1336 | or xl, xl, xh | |
1337 | srli xh, a7, 31 | |
1338 | slli xh, xh, 31 | |
1339 | or xh, xh, a6 | |
1340 | bnez xl, 1f | |
1341 | movi a4, 0x80000 /* make it a quiet NaN */ | |
1342 | or xh, xh, a4 | |
1343 | 1: movi xl, 0 | |
b7974b3a | 1344 | leaf_return |
134c8a50 BW |
1345 | |
1346 | .Ldiv_xexpzero: | |
1347 | /* Clear the sign bit of x. */ | |
1348 | slli xh, xh, 1 | |
1349 | srli xh, xh, 1 | |
1350 | ||
1351 | /* If x is zero, return zero. */ | |
1352 | or a10, xh, xl | |
1353 | beqz a10, .Ldiv_return_zero | |
1354 | ||
1355 | /* Normalize x. Adjust the exponent in a8. */ | |
1356 | beqz xh, .Ldiv_xh_zero | |
1357 | do_nsau a10, xh, a11, a8 | |
1358 | addi a10, a10, -11 | |
1359 | ssl a10 | |
1360 | src xh, xh, xl | |
1361 | sll xl, xl | |
1362 | movi a8, 1 | |
1363 | sub a8, a8, a10 | |
1364 | j .Ldiv_xnormalized | |
1365 | .Ldiv_xh_zero: | |
1366 | do_nsau a10, xl, a11, a8 | |
1367 | addi a10, a10, -11 | |
1368 | movi a8, -31 | |
1369 | sub a8, a8, a10 | |
1370 | ssl a10 | |
1371 | bltz a10, .Ldiv_xl_srl | |
1372 | sll xh, xl | |
1373 | movi xl, 0 | |
1374 | j .Ldiv_xnormalized | |
1375 | .Ldiv_xl_srl: | |
1376 | srl xh, xl | |
1377 | sll xl, xl | |
1378 | j .Ldiv_xnormalized | |
1379 | ||
1380 | .Ldiv_return_zero: | |
1381 | /* Return zero with the appropriate sign bit. */ | |
1382 | srli xh, a7, 31 | |
1383 | slli xh, xh, 31 | |
1384 | movi xl, 0 | |
b7974b3a | 1385 | leaf_return |
134c8a50 BW |
1386 | |
1387 | .Ldiv_xnan_or_inf: | |
1388 | /* Set the sign bit of the result. */ | |
1389 | srli a7, yh, 31 | |
1390 | slli a7, a7, 31 | |
1391 | xor xh, xh, a7 | |
1392 | /* If y is NaN or Inf, return NaN. */ | |
0889f168 MF |
1393 | ball yh, a6, .Ldiv_return_nan |
1394 | slli a8, xh, 12 | |
1395 | or a8, a8, xl | |
1396 | bnez a8, .Ldiv_return_nan | |
1397 | leaf_return | |
134c8a50 BW |
1398 | |
1399 | .Ldiv_ynan_or_inf: | |
1400 | /* If y is Infinity, return zero. */ | |
1401 | slli a8, yh, 12 | |
1402 | or a8, a8, yl | |
1403 | beqz a8, .Ldiv_return_zero | |
1404 | /* y is NaN; return it. */ | |
1405 | mov xh, yh | |
1406 | mov xl, yl | |
0889f168 MF |
1407 | |
1408 | .Ldiv_return_nan: | |
1409 | movi a4, 0x80000 /* make it a quiet NaN */ | |
1410 | or xh, xh, a4 | |
b7974b3a | 1411 | leaf_return |
134c8a50 BW |
1412 | |
1413 | .Ldiv_highequal1: | |
1414 | bltu xl, yl, 2f | |
1415 | j 3f | |
1416 | ||
1417 | .align 4 | |
1418 | .global __divdf3 | |
1419 | .type __divdf3, @function | |
1420 | __divdf3: | |
b7974b3a | 1421 | leaf_entry sp, 16 |
134c8a50 BW |
1422 | movi a6, 0x7ff00000 |
1423 | ||
1424 | /* Get the sign of the result. */ | |
1425 | xor a7, xh, yh | |
1426 | ||
1427 | /* Check for NaN and infinity. */ | |
1428 | ball xh, a6, .Ldiv_xnan_or_inf | |
1429 | ball yh, a6, .Ldiv_ynan_or_inf | |
1430 | ||
1431 | /* Extract the exponents. */ | |
1432 | extui a8, xh, 20, 11 | |
1433 | extui a9, yh, 20, 11 | |
1434 | ||
1435 | beqz a9, .Ldiv_yexpzero | |
1436 | .Ldiv_ynormalized: | |
1437 | beqz a8, .Ldiv_xexpzero | |
1438 | .Ldiv_xnormalized: | |
1439 | ||
1440 | /* Subtract the exponents. */ | |
1441 | sub a8, a8, a9 | |
1442 | ||
1443 | /* Replace sign/exponent fields with explicit "1.0". */ | |
1444 | movi a10, 0x1fffff | |
1445 | or xh, xh, a6 | |
1446 | and xh, xh, a10 | |
1447 | or yh, yh, a6 | |
1448 | and yh, yh, a10 | |
1449 | ||
1450 | /* Set SAR for left shift by one. */ | |
1451 | ssai (32 - 1) | |
1452 | ||
1453 | /* The first digit of the mantissa division must be a one. | |
1454 | Shift x (and adjust the exponent) as needed to make this true. */ | |
1455 | bltu yh, xh, 3f | |
1456 | beq yh, xh, .Ldiv_highequal1 | |
1457 | 2: src xh, xh, xl | |
1458 | sll xl, xl | |
1459 | addi a8, a8, -1 | |
1460 | 3: | |
1461 | /* Do the first subtraction and shift. */ | |
1462 | sub xh, xh, yh | |
1463 | bgeu xl, yl, 1f | |
1464 | addi xh, xh, -1 | |
1465 | 1: sub xl, xl, yl | |
1466 | src xh, xh, xl | |
1467 | sll xl, xl | |
1468 | ||
1469 | /* Put the quotient into a10/a11. */ | |
1470 | movi a10, 0 | |
1471 | movi a11, 1 | |
1472 | ||
1473 | /* Divide one bit at a time for 52 bits. */ | |
1474 | movi a9, 52 | |
1475 | #if XCHAL_HAVE_LOOPS | |
1476 | loop a9, .Ldiv_loopend | |
1477 | #endif | |
1478 | .Ldiv_loop: | |
1479 | /* Shift the quotient << 1. */ | |
1480 | src a10, a10, a11 | |
1481 | sll a11, a11 | |
1482 | ||
1483 | /* Is this digit a 0 or 1? */ | |
1484 | bltu xh, yh, 3f | |
1485 | beq xh, yh, .Ldiv_highequal2 | |
1486 | ||
1487 | /* Output a 1 and subtract. */ | |
1488 | 2: addi a11, a11, 1 | |
1489 | sub xh, xh, yh | |
1490 | bgeu xl, yl, 1f | |
1491 | addi xh, xh, -1 | |
1492 | 1: sub xl, xl, yl | |
1493 | ||
1494 | /* Shift the dividend << 1. */ | |
1495 | 3: src xh, xh, xl | |
1496 | sll xl, xl | |
1497 | ||
1498 | #if !XCHAL_HAVE_LOOPS | |
1499 | addi a9, a9, -1 | |
1500 | bnez a9, .Ldiv_loop | |
1501 | #endif | |
1502 | .Ldiv_loopend: | |
1503 | ||
1504 | /* Add the exponent bias (less one to account for the explicit "1.0" | |
1505 | of the mantissa that will be added to the exponent in the final | |
1506 | result). */ | |
1507 | movi a9, 0x3fe | |
1508 | add a8, a8, a9 | |
1509 | ||
1510 | /* Check for over/underflow. The value in a8 is one less than the | |
1511 | final exponent, so values in the range 0..7fd are OK here. */ | |
1512 | addmi a9, a9, 0x400 /* 0x7fe */ | |
1513 | bgeu a8, a9, .Ldiv_overflow | |
1514 | ||
1515 | .Ldiv_round: | |
1516 | /* Round. The remainder (<< 1) is in xh/xl. */ | |
1517 | bltu xh, yh, .Ldiv_rounded | |
1518 | beq xh, yh, .Ldiv_highequal3 | |
1519 | .Ldiv_roundup: | |
1520 | addi a11, a11, 1 | |
1521 | beqz a11, .Ldiv_roundcarry | |
1522 | ||
1523 | .Ldiv_rounded: | |
1524 | mov xl, a11 | |
1525 | /* Add the exponent to the mantissa. */ | |
1526 | slli a8, a8, 20 | |
1527 | add xh, a10, a8 | |
1528 | ||
1529 | .Ldiv_addsign: | |
1530 | /* Add the sign bit. */ | |
1531 | srli a7, a7, 31 | |
1532 | slli a7, a7, 31 | |
1533 | or xh, xh, a7 | |
b7974b3a | 1534 | leaf_return |
134c8a50 BW |
1535 | |
1536 | .Ldiv_highequal2: | |
1537 | bgeu xl, yl, 2b | |
1538 | j 3b | |
1539 | ||
1540 | .Ldiv_highequal3: | |
1541 | bltu xl, yl, .Ldiv_rounded | |
1542 | bne xl, yl, .Ldiv_roundup | |
1543 | ||
1544 | /* Remainder is exactly half the divisor. Round even. */ | |
1545 | addi a11, a11, 1 | |
1546 | beqz a11, .Ldiv_roundcarry | |
1547 | srli a11, a11, 1 | |
1548 | slli a11, a11, 1 | |
1549 | j .Ldiv_rounded | |
1550 | ||
1551 | .Ldiv_overflow: | |
1552 | bltz a8, .Ldiv_underflow | |
1553 | /* Return +/- Infinity. */ | |
1554 | addi a8, a9, 1 /* 0x7ff */ | |
1555 | slli xh, a8, 20 | |
1556 | movi xl, 0 | |
1557 | j .Ldiv_addsign | |
1558 | ||
1559 | .Ldiv_underflow: | |
1560 | /* Create a subnormal value, where the exponent field contains zero, | |
1561 | but the effective exponent is 1. The value of a8 is one less than | |
1562 | the actual exponent, so just negate it to get the shift amount. */ | |
1563 | neg a8, a8 | |
1564 | ssr a8 | |
1565 | bgeui a8, 32, .Ldiv_bigshift | |
1566 | ||
1567 | /* Shift a10/a11 right. Any bits that are shifted out of a11 are | |
1568 | saved in a6 for rounding the result. */ | |
1569 | sll a6, a11 | |
1570 | src a11, a10, a11 | |
1571 | srl a10, a10 | |
1572 | j 1f | |
1573 | ||
1574 | .Ldiv_bigshift: | |
1575 | bgeui a8, 64, .Ldiv_flush_to_zero | |
1576 | sll a9, a11 /* lost bits shifted out of a11 */ | |
1577 | src a6, a10, a11 | |
1578 | srl a11, a10 | |
1579 | movi a10, 0 | |
1580 | or xl, xl, a9 | |
1581 | ||
1582 | /* Set the exponent to zero. */ | |
1583 | 1: movi a8, 0 | |
1584 | ||
1585 | /* Pack any nonzero remainder (in xh/xl) into a6. */ | |
1586 | or xh, xh, xl | |
1587 | beqz xh, 1f | |
1588 | movi a9, 1 | |
1589 | or a6, a6, a9 | |
1590 | ||
1591 | /* Round a10/a11 based on the bits shifted out into a6. */ | |
1592 | 1: bgez a6, .Ldiv_rounded | |
1593 | addi a11, a11, 1 | |
1594 | beqz a11, .Ldiv_roundcarry | |
1595 | slli a6, a6, 1 | |
1596 | bnez a6, .Ldiv_rounded | |
1597 | srli a11, a11, 1 | |
1598 | slli a11, a11, 1 | |
1599 | j .Ldiv_rounded | |
1600 | ||
1601 | .Ldiv_roundcarry: | |
1602 | /* a11 is always zero when the rounding increment overflows, so | |
1603 | there's no need to round it to an even value. */ | |
1604 | addi a10, a10, 1 | |
1605 | /* Overflow to the exponent field is OK. */ | |
1606 | j .Ldiv_rounded | |
1607 | ||
1608 | .Ldiv_flush_to_zero: | |
1609 | /* Return zero with the appropriate sign bit. */ | |
1610 | srli xh, a7, 31 | |
1611 | slli xh, xh, 31 | |
1612 | movi xl, 0 | |
b7974b3a | 1613 | leaf_return |
134c8a50 | 1614 | |
66192aa1 DKC |
1615 | #endif /* XCHAL_HAVE_DFP_DIV */ |
1616 | ||
134c8a50 BW |
1617 | #endif /* L_divdf3 */ |
1618 | ||
1619 | #ifdef L_cmpdf2 | |
1620 | ||
1621 | /* Equal and Not Equal */ | |
1622 | ||
1623 | .align 4 | |
1624 | .global __eqdf2 | |
1625 | .global __nedf2 | |
1626 | .set __nedf2, __eqdf2 | |
1627 | .type __eqdf2, @function | |
1628 | __eqdf2: | |
b7974b3a | 1629 | leaf_entry sp, 16 |
134c8a50 BW |
1630 | bne xl, yl, 2f |
1631 | bne xh, yh, 4f | |
1632 | ||
1633 | /* The values are equal but NaN != NaN. Check the exponent. */ | |
1634 | movi a6, 0x7ff00000 | |
1635 | ball xh, a6, 3f | |
1636 | ||
1637 | /* Equal. */ | |
1638 | movi a2, 0 | |
b7974b3a | 1639 | leaf_return |
134c8a50 BW |
1640 | |
1641 | /* Not equal. */ | |
1642 | 2: movi a2, 1 | |
b7974b3a | 1643 | leaf_return |
134c8a50 BW |
1644 | |
1645 | /* Check if the mantissas are nonzero. */ | |
1646 | 3: slli a7, xh, 12 | |
1647 | or a7, a7, xl | |
1648 | j 5f | |
1649 | ||
1650 | /* Check if x and y are zero with different signs. */ | |
1651 | 4: or a7, xh, yh | |
1652 | slli a7, a7, 1 | |
1653 | or a7, a7, xl /* xl == yl here */ | |
1654 | ||
1655 | /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa | |
1656 | or x when exponent(x) = 0x7ff and x == y. */ | |
1657 | 5: movi a2, 0 | |
1658 | movi a3, 1 | |
1659 | movnez a2, a3, a7 | |
b7974b3a | 1660 | leaf_return |
134c8a50 BW |
1661 | |
1662 | ||
1663 | /* Greater Than */ | |
1664 | ||
1665 | .align 4 | |
1666 | .global __gtdf2 | |
1667 | .type __gtdf2, @function | |
1668 | __gtdf2: | |
b7974b3a | 1669 | leaf_entry sp, 16 |
134c8a50 BW |
1670 | movi a6, 0x7ff00000 |
1671 | ball xh, a6, 2f | |
1672 | 1: bnall yh, a6, .Lle_cmp | |
1673 | ||
1674 | /* Check if y is a NaN. */ | |
1675 | slli a7, yh, 12 | |
1676 | or a7, a7, yl | |
1677 | beqz a7, .Lle_cmp | |
1678 | movi a2, 0 | |
b7974b3a | 1679 | leaf_return |
134c8a50 BW |
1680 | |
1681 | /* Check if x is a NaN. */ | |
1682 | 2: slli a7, xh, 12 | |
1683 | or a7, a7, xl | |
1684 | beqz a7, 1b | |
1685 | movi a2, 0 | |
b7974b3a | 1686 | leaf_return |
134c8a50 BW |
1687 | |
1688 | ||
1689 | /* Less Than or Equal */ | |
1690 | ||
1691 | .align 4 | |
1692 | .global __ledf2 | |
1693 | .type __ledf2, @function | |
1694 | __ledf2: | |
b7974b3a | 1695 | leaf_entry sp, 16 |
134c8a50 BW |
1696 | movi a6, 0x7ff00000 |
1697 | ball xh, a6, 2f | |
1698 | 1: bnall yh, a6, .Lle_cmp | |
1699 | ||
1700 | /* Check if y is a NaN. */ | |
1701 | slli a7, yh, 12 | |
1702 | or a7, a7, yl | |
1703 | beqz a7, .Lle_cmp | |
1704 | movi a2, 1 | |
b7974b3a | 1705 | leaf_return |
134c8a50 BW |
1706 | |
1707 | /* Check if x is a NaN. */ | |
1708 | 2: slli a7, xh, 12 | |
1709 | or a7, a7, xl | |
1710 | beqz a7, 1b | |
1711 | movi a2, 1 | |
b7974b3a | 1712 | leaf_return |
134c8a50 BW |
1713 | |
1714 | .Lle_cmp: | |
1715 | /* Check if x and y have different signs. */ | |
1716 | xor a7, xh, yh | |
1717 | bltz a7, .Lle_diff_signs | |
1718 | ||
1719 | /* Check if x is negative. */ | |
1720 | bltz xh, .Lle_xneg | |
1721 | ||
1722 | /* Check if x <= y. */ | |
1723 | bltu xh, yh, 4f | |
1724 | bne xh, yh, 5f | |
1725 | bltu yl, xl, 5f | |
1726 | 4: movi a2, 0 | |
b7974b3a | 1727 | leaf_return |
134c8a50 BW |
1728 | |
1729 | .Lle_xneg: | |
1730 | /* Check if y <= x. */ | |
1731 | bltu yh, xh, 4b | |
1732 | bne yh, xh, 5f | |
1733 | bgeu xl, yl, 4b | |
1734 | 5: movi a2, 1 | |
b7974b3a | 1735 | leaf_return |
134c8a50 BW |
1736 | |
1737 | .Lle_diff_signs: | |
1738 | bltz xh, 4b | |
1739 | ||
1740 | /* Check if both x and y are zero. */ | |
1741 | or a7, xh, yh | |
1742 | slli a7, a7, 1 | |
1743 | or a7, a7, xl | |
1744 | or a7, a7, yl | |
1745 | movi a2, 1 | |
1746 | movi a3, 0 | |
1747 | moveqz a2, a3, a7 | |
b7974b3a | 1748 | leaf_return |
134c8a50 BW |
1749 | |
1750 | ||
1751 | /* Greater Than or Equal */ | |
1752 | ||
1753 | .align 4 | |
1754 | .global __gedf2 | |
1755 | .type __gedf2, @function | |
1756 | __gedf2: | |
b7974b3a | 1757 | leaf_entry sp, 16 |
134c8a50 BW |
1758 | movi a6, 0x7ff00000 |
1759 | ball xh, a6, 2f | |
1760 | 1: bnall yh, a6, .Llt_cmp | |
1761 | ||
1762 | /* Check if y is a NaN. */ | |
1763 | slli a7, yh, 12 | |
1764 | or a7, a7, yl | |
1765 | beqz a7, .Llt_cmp | |
1766 | movi a2, -1 | |
b7974b3a | 1767 | leaf_return |
134c8a50 BW |
1768 | |
1769 | /* Check if x is a NaN. */ | |
1770 | 2: slli a7, xh, 12 | |
1771 | or a7, a7, xl | |
1772 | beqz a7, 1b | |
1773 | movi a2, -1 | |
b7974b3a | 1774 | leaf_return |
134c8a50 BW |
1775 | |
1776 | ||
1777 | /* Less Than */ | |
1778 | ||
1779 | .align 4 | |
1780 | .global __ltdf2 | |
1781 | .type __ltdf2, @function | |
1782 | __ltdf2: | |
b7974b3a | 1783 | leaf_entry sp, 16 |
134c8a50 BW |
1784 | movi a6, 0x7ff00000 |
1785 | ball xh, a6, 2f | |
1786 | 1: bnall yh, a6, .Llt_cmp | |
1787 | ||
1788 | /* Check if y is a NaN. */ | |
1789 | slli a7, yh, 12 | |
1790 | or a7, a7, yl | |
1791 | beqz a7, .Llt_cmp | |
1792 | movi a2, 0 | |
b7974b3a | 1793 | leaf_return |
134c8a50 BW |
1794 | |
1795 | /* Check if x is a NaN. */ | |
1796 | 2: slli a7, xh, 12 | |
1797 | or a7, a7, xl | |
1798 | beqz a7, 1b | |
1799 | movi a2, 0 | |
b7974b3a | 1800 | leaf_return |
134c8a50 BW |
1801 | |
1802 | .Llt_cmp: | |
1803 | /* Check if x and y have different signs. */ | |
1804 | xor a7, xh, yh | |
1805 | bltz a7, .Llt_diff_signs | |
1806 | ||
1807 | /* Check if x is negative. */ | |
1808 | bltz xh, .Llt_xneg | |
1809 | ||
1810 | /* Check if x < y. */ | |
1811 | bltu xh, yh, 4f | |
1812 | bne xh, yh, 5f | |
1813 | bgeu xl, yl, 5f | |
1814 | 4: movi a2, -1 | |
b7974b3a | 1815 | leaf_return |
134c8a50 BW |
1816 | |
1817 | .Llt_xneg: | |
1818 | /* Check if y < x. */ | |
1819 | bltu yh, xh, 4b | |
1820 | bne yh, xh, 5f | |
1821 | bltu yl, xl, 4b | |
1822 | 5: movi a2, 0 | |
b7974b3a | 1823 | leaf_return |
134c8a50 BW |
1824 | |
1825 | .Llt_diff_signs: | |
1826 | bgez xh, 5b | |
1827 | ||
1828 | /* Check if both x and y are nonzero. */ | |
1829 | or a7, xh, yh | |
1830 | slli a7, a7, 1 | |
1831 | or a7, a7, xl | |
1832 | or a7, a7, yl | |
1833 | movi a2, 0 | |
1834 | movi a3, -1 | |
1835 | movnez a2, a3, a7 | |
b7974b3a | 1836 | leaf_return |
134c8a50 BW |
1837 | |
1838 | ||
1839 | /* Unordered */ | |
1840 | ||
1841 | .align 4 | |
1842 | .global __unorddf2 | |
1843 | .type __unorddf2, @function | |
1844 | __unorddf2: | |
b7974b3a | 1845 | leaf_entry sp, 16 |
134c8a50 BW |
1846 | movi a6, 0x7ff00000 |
1847 | ball xh, a6, 3f | |
1848 | 1: ball yh, a6, 4f | |
1849 | 2: movi a2, 0 | |
b7974b3a | 1850 | leaf_return |
134c8a50 BW |
1851 | |
1852 | 3: slli a7, xh, 12 | |
1853 | or a7, a7, xl | |
1854 | beqz a7, 1b | |
1855 | movi a2, 1 | |
b7974b3a | 1856 | leaf_return |
134c8a50 BW |
1857 | |
1858 | 4: slli a7, yh, 12 | |
1859 | or a7, a7, yl | |
1860 | beqz a7, 2b | |
1861 | movi a2, 1 | |
b7974b3a | 1862 | leaf_return |
134c8a50 BW |
1863 | |
1864 | #endif /* L_cmpdf2 */ | |
1865 | ||
1866 | #ifdef L_fixdfsi | |
1867 | ||
1868 | .align 4 | |
1869 | .global __fixdfsi | |
1870 | .type __fixdfsi, @function | |
1871 | __fixdfsi: | |
b7974b3a | 1872 | leaf_entry sp, 16 |
134c8a50 BW |
1873 | |
1874 | /* Check for NaN and Infinity. */ | |
1875 | movi a6, 0x7ff00000 | |
1876 | ball xh, a6, .Lfixdfsi_nan_or_inf | |
1877 | ||
1878 | /* Extract the exponent and check if 0 < (exp - 0x3fe) < 32. */ | |
1879 | extui a4, xh, 20, 11 | |
1880 | extui a5, a6, 19, 10 /* 0x3fe */ | |
1881 | sub a4, a4, a5 | |
1882 | bgei a4, 32, .Lfixdfsi_maxint | |
1883 | blti a4, 1, .Lfixdfsi_zero | |
1884 | ||
1885 | /* Add explicit "1.0" and shift << 11. */ | |
1886 | or a7, xh, a6 | |
1887 | ssai (32 - 11) | |
1888 | src a5, a7, xl | |
1889 | ||
1890 | /* Shift back to the right, based on the exponent. */ | |
1891 | ssl a4 /* shift by 32 - a4 */ | |
1892 | srl a5, a5 | |
1893 | ||
1894 | /* Negate the result if sign != 0. */ | |
1895 | neg a2, a5 | |
1896 | movgez a2, a5, a7 | |
b7974b3a | 1897 | leaf_return |
134c8a50 BW |
1898 | |
1899 | .Lfixdfsi_nan_or_inf: | |
1900 | /* Handle Infinity and NaN. */ | |
1901 | slli a4, xh, 12 | |
1902 | or a4, a4, xl | |
1903 | beqz a4, .Lfixdfsi_maxint | |
1904 | ||
1905 | /* Translate NaN to +maxint. */ | |
1906 | movi xh, 0 | |
1907 | ||
1908 | .Lfixdfsi_maxint: | |
1909 | slli a4, a6, 11 /* 0x80000000 */ | |
1910 | addi a5, a4, -1 /* 0x7fffffff */ | |
1911 | movgez a4, a5, xh | |
1912 | mov a2, a4 | |
b7974b3a | 1913 | leaf_return |
134c8a50 BW |
1914 | |
1915 | .Lfixdfsi_zero: | |
1916 | movi a2, 0 | |
b7974b3a | 1917 | leaf_return |
134c8a50 BW |
1918 | |
1919 | #endif /* L_fixdfsi */ | |
1920 | ||
1921 | #ifdef L_fixdfdi | |
1922 | ||
1923 | .align 4 | |
1924 | .global __fixdfdi | |
1925 | .type __fixdfdi, @function | |
1926 | __fixdfdi: | |
b7974b3a | 1927 | leaf_entry sp, 16 |
134c8a50 BW |
1928 | |
1929 | /* Check for NaN and Infinity. */ | |
1930 | movi a6, 0x7ff00000 | |
1931 | ball xh, a6, .Lfixdfdi_nan_or_inf | |
1932 | ||
1933 | /* Extract the exponent and check if 0 < (exp - 0x3fe) < 64. */ | |
1934 | extui a4, xh, 20, 11 | |
1935 | extui a5, a6, 19, 10 /* 0x3fe */ | |
1936 | sub a4, a4, a5 | |
1937 | bgei a4, 64, .Lfixdfdi_maxint | |
1938 | blti a4, 1, .Lfixdfdi_zero | |
1939 | ||
1940 | /* Add explicit "1.0" and shift << 11. */ | |
1941 | or a7, xh, a6 | |
1942 | ssai (32 - 11) | |
1943 | src xh, a7, xl | |
1944 | sll xl, xl | |
1945 | ||
1946 | /* Shift back to the right, based on the exponent. */ | |
1947 | ssl a4 /* shift by 64 - a4 */ | |
1948 | bgei a4, 32, .Lfixdfdi_smallshift | |
1949 | srl xl, xh | |
1950 | movi xh, 0 | |
1951 | ||
1952 | .Lfixdfdi_shifted: | |
1953 | /* Negate the result if sign != 0. */ | |
1954 | bgez a7, 1f | |
1955 | neg xl, xl | |
1956 | neg xh, xh | |
1957 | beqz xl, 1f | |
1958 | addi xh, xh, -1 | |
b7974b3a | 1959 | 1: leaf_return |
134c8a50 BW |
1960 | |
1961 | .Lfixdfdi_smallshift: | |
1962 | src xl, xh, xl | |
1963 | srl xh, xh | |
1964 | j .Lfixdfdi_shifted | |
1965 | ||
1966 | .Lfixdfdi_nan_or_inf: | |
1967 | /* Handle Infinity and NaN. */ | |
1968 | slli a4, xh, 12 | |
1969 | or a4, a4, xl | |
1970 | beqz a4, .Lfixdfdi_maxint | |
1971 | ||
1972 | /* Translate NaN to +maxint. */ | |
1973 | movi xh, 0 | |
1974 | ||
1975 | .Lfixdfdi_maxint: | |
1976 | slli a7, a6, 11 /* 0x80000000 */ | |
1977 | bgez xh, 1f | |
1978 | mov xh, a7 | |
1979 | movi xl, 0 | |
b7974b3a | 1980 | leaf_return |
134c8a50 BW |
1981 | |
1982 | 1: addi xh, a7, -1 /* 0x7fffffff */ | |
1983 | movi xl, -1 | |
b7974b3a | 1984 | leaf_return |
134c8a50 BW |
1985 | |
1986 | .Lfixdfdi_zero: | |
1987 | movi xh, 0 | |
1988 | movi xl, 0 | |
b7974b3a | 1989 | leaf_return |
134c8a50 BW |
1990 | |
1991 | #endif /* L_fixdfdi */ | |
1992 | ||
1993 | #ifdef L_fixunsdfsi | |
1994 | ||
1995 | .align 4 | |
1996 | .global __fixunsdfsi | |
1997 | .type __fixunsdfsi, @function | |
1998 | __fixunsdfsi: | |
b7974b3a | 1999 | leaf_entry sp, 16 |
134c8a50 BW |
2000 | |
2001 | /* Check for NaN and Infinity. */ | |
2002 | movi a6, 0x7ff00000 | |
2003 | ball xh, a6, .Lfixunsdfsi_nan_or_inf | |
2004 | ||
2005 | /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32. */ | |
2006 | extui a4, xh, 20, 11 | |
2007 | extui a5, a6, 20, 10 /* 0x3ff */ | |
2008 | sub a4, a4, a5 | |
2009 | bgei a4, 32, .Lfixunsdfsi_maxint | |
2010 | bltz a4, .Lfixunsdfsi_zero | |
2011 | ||
2012 | /* Add explicit "1.0" and shift << 11. */ | |
2013 | or a7, xh, a6 | |
2014 | ssai (32 - 11) | |
2015 | src a5, a7, xl | |
2016 | ||
2017 | /* Shift back to the right, based on the exponent. */ | |
2018 | addi a4, a4, 1 | |
2019 | beqi a4, 32, .Lfixunsdfsi_bigexp | |
2020 | ssl a4 /* shift by 32 - a4 */ | |
2021 | srl a5, a5 | |
2022 | ||
2023 | /* Negate the result if sign != 0. */ | |
2024 | neg a2, a5 | |
2025 | movgez a2, a5, a7 | |
b7974b3a | 2026 | leaf_return |
134c8a50 BW |
2027 | |
2028 | .Lfixunsdfsi_nan_or_inf: | |
2029 | /* Handle Infinity and NaN. */ | |
2030 | slli a4, xh, 12 | |
2031 | or a4, a4, xl | |
2032 | beqz a4, .Lfixunsdfsi_maxint | |
2033 | ||
2034 | /* Translate NaN to 0xffffffff. */ | |
2035 | movi a2, -1 | |
b7974b3a | 2036 | leaf_return |
134c8a50 BW |
2037 | |
2038 | .Lfixunsdfsi_maxint: | |
2039 | slli a4, a6, 11 /* 0x80000000 */ | |
2040 | movi a5, -1 /* 0xffffffff */ | |
2041 | movgez a4, a5, xh | |
2042 | mov a2, a4 | |
b7974b3a | 2043 | leaf_return |
134c8a50 BW |
2044 | |
2045 | .Lfixunsdfsi_zero: | |
2046 | movi a2, 0 | |
b7974b3a | 2047 | leaf_return |
134c8a50 BW |
2048 | |
2049 | .Lfixunsdfsi_bigexp: | |
2050 | /* Handle unsigned maximum exponent case. */ | |
2051 | bltz xh, 1f | |
2052 | mov a2, a5 /* no shift needed */ | |
b7974b3a | 2053 | leaf_return |
134c8a50 BW |
2054 | |
2055 | /* Return 0x80000000 if negative. */ | |
2056 | 1: slli a2, a6, 11 | |
b7974b3a | 2057 | leaf_return |
134c8a50 BW |
2058 | |
2059 | #endif /* L_fixunsdfsi */ | |
2060 | ||
2061 | #ifdef L_fixunsdfdi | |
2062 | ||
2063 | .align 4 | |
2064 | .global __fixunsdfdi | |
2065 | .type __fixunsdfdi, @function | |
2066 | __fixunsdfdi: | |
b7974b3a | 2067 | leaf_entry sp, 16 |
134c8a50 BW |
2068 | |
2069 | /* Check for NaN and Infinity. */ | |
2070 | movi a6, 0x7ff00000 | |
2071 | ball xh, a6, .Lfixunsdfdi_nan_or_inf | |
2072 | ||
2073 | /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64. */ | |
2074 | extui a4, xh, 20, 11 | |
2075 | extui a5, a6, 20, 10 /* 0x3ff */ | |
2076 | sub a4, a4, a5 | |
2077 | bgei a4, 64, .Lfixunsdfdi_maxint | |
2078 | bltz a4, .Lfixunsdfdi_zero | |
2079 | ||
2080 | /* Add explicit "1.0" and shift << 11. */ | |
2081 | or a7, xh, a6 | |
2082 | ssai (32 - 11) | |
2083 | src xh, a7, xl | |
2084 | sll xl, xl | |
2085 | ||
2086 | /* Shift back to the right, based on the exponent. */ | |
2087 | addi a4, a4, 1 | |
2088 | beqi a4, 64, .Lfixunsdfdi_bigexp | |
2089 | ssl a4 /* shift by 64 - a4 */ | |
2090 | bgei a4, 32, .Lfixunsdfdi_smallshift | |
2091 | srl xl, xh | |
2092 | movi xh, 0 | |
2093 | ||
2094 | .Lfixunsdfdi_shifted: | |
2095 | /* Negate the result if sign != 0. */ | |
2096 | bgez a7, 1f | |
2097 | neg xl, xl | |
2098 | neg xh, xh | |
2099 | beqz xl, 1f | |
2100 | addi xh, xh, -1 | |
b7974b3a | 2101 | 1: leaf_return |
134c8a50 BW |
2102 | |
2103 | .Lfixunsdfdi_smallshift: | |
2104 | src xl, xh, xl | |
2105 | srl xh, xh | |
2106 | j .Lfixunsdfdi_shifted | |
2107 | ||
2108 | .Lfixunsdfdi_nan_or_inf: | |
2109 | /* Handle Infinity and NaN. */ | |
2110 | slli a4, xh, 12 | |
2111 | or a4, a4, xl | |
2112 | beqz a4, .Lfixunsdfdi_maxint | |
2113 | ||
2114 | /* Translate NaN to 0xffffffff.... */ | |
2115 | 1: movi xh, -1 | |
2116 | movi xl, -1 | |
b7974b3a | 2117 | leaf_return |
134c8a50 BW |
2118 | |
2119 | .Lfixunsdfdi_maxint: | |
2120 | bgez xh, 1b | |
2121 | 2: slli xh, a6, 11 /* 0x80000000 */ | |
2122 | movi xl, 0 | |
b7974b3a | 2123 | leaf_return |
134c8a50 BW |
2124 | |
2125 | .Lfixunsdfdi_zero: | |
2126 | movi xh, 0 | |
2127 | movi xl, 0 | |
b7974b3a | 2128 | leaf_return |
134c8a50 BW |
2129 | |
2130 | .Lfixunsdfdi_bigexp: | |
2131 | /* Handle unsigned maximum exponent case. */ | |
2132 | bltz a7, 2b | |
b7974b3a | 2133 | leaf_return /* no shift needed */ |
134c8a50 BW |
2134 | |
2135 | #endif /* L_fixunsdfdi */ | |
2136 | ||
2137 | #ifdef L_floatsidf | |
2138 | ||
2139 | .align 4 | |
2140 | .global __floatunsidf | |
2141 | .type __floatunsidf, @function | |
2142 | __floatunsidf: | |
b7974b3a | 2143 | leaf_entry sp, 16 |
134c8a50 BW |
2144 | beqz a2, .Lfloatsidf_return_zero |
2145 | ||
2146 | /* Set the sign to zero and jump to the floatsidf code. */ | |
2147 | movi a7, 0 | |
2148 | j .Lfloatsidf_normalize | |
2149 | ||
2150 | .align 4 | |
2151 | .global __floatsidf | |
2152 | .type __floatsidf, @function | |
2153 | __floatsidf: | |
b7974b3a | 2154 | leaf_entry sp, 16 |
134c8a50 BW |
2155 | |
2156 | /* Check for zero. */ | |
2157 | beqz a2, .Lfloatsidf_return_zero | |
2158 | ||
2159 | /* Save the sign. */ | |
2160 | extui a7, a2, 31, 1 | |
2161 | ||
2162 | /* Get the absolute value. */ | |
2163 | #if XCHAL_HAVE_ABS | |
2164 | abs a2, a2 | |
2165 | #else | |
2166 | neg a4, a2 | |
2167 | movltz a2, a4, a2 | |
2168 | #endif | |
2169 | ||
2170 | .Lfloatsidf_normalize: | |
2171 | /* Normalize with the first 1 bit in the msb. */ | |
2172 | do_nsau a4, a2, a5, a6 | |
2173 | ssl a4 | |
2174 | sll a5, a2 | |
2175 | ||
2176 | /* Shift the mantissa into position. */ | |
2177 | srli xh, a5, 11 | |
2178 | slli xl, a5, (32 - 11) | |
2179 | ||
2180 | /* Set the exponent. */ | |
2181 | movi a5, 0x41d /* 0x3fe + 31 */ | |
2182 | sub a5, a5, a4 | |
2183 | slli a5, a5, 20 | |
2184 | add xh, xh, a5 | |
2185 | ||
2186 | /* Add the sign and return. */ | |
2187 | slli a7, a7, 31 | |
2188 | or xh, xh, a7 | |
b7974b3a | 2189 | leaf_return |
134c8a50 BW |
2190 | |
2191 | .Lfloatsidf_return_zero: | |
2192 | movi a3, 0 | |
b7974b3a | 2193 | leaf_return |
134c8a50 BW |
2194 | |
2195 | #endif /* L_floatsidf */ | |
2196 | ||
2197 | #ifdef L_floatdidf | |
2198 | ||
2199 | .align 4 | |
2200 | .global __floatundidf | |
2201 | .type __floatundidf, @function | |
2202 | __floatundidf: | |
b7974b3a | 2203 | leaf_entry sp, 16 |
134c8a50 BW |
2204 | |
2205 | /* Check for zero. */ | |
2206 | or a4, xh, xl | |
2207 | beqz a4, 2f | |
2208 | ||
2209 | /* Set the sign to zero and jump to the floatdidf code. */ | |
2210 | movi a7, 0 | |
2211 | j .Lfloatdidf_normalize | |
2212 | ||
2213 | .align 4 | |
2214 | .global __floatdidf | |
2215 | .type __floatdidf, @function | |
2216 | __floatdidf: | |
b7974b3a | 2217 | leaf_entry sp, 16 |
134c8a50 BW |
2218 | |
2219 | /* Check for zero. */ | |
2220 | or a4, xh, xl | |
2221 | beqz a4, 2f | |
2222 | ||
2223 | /* Save the sign. */ | |
2224 | extui a7, xh, 31, 1 | |
2225 | ||
2226 | /* Get the absolute value. */ | |
2227 | bgez xh, .Lfloatdidf_normalize | |
2228 | neg xl, xl | |
2229 | neg xh, xh | |
2230 | beqz xl, .Lfloatdidf_normalize | |
2231 | addi xh, xh, -1 | |
2232 | ||
2233 | .Lfloatdidf_normalize: | |
2234 | /* Normalize with the first 1 bit in the msb of xh. */ | |
2235 | beqz xh, .Lfloatdidf_bigshift | |
2236 | do_nsau a4, xh, a5, a6 | |
2237 | ssl a4 | |
2238 | src xh, xh, xl | |
2239 | sll xl, xl | |
2240 | ||
2241 | .Lfloatdidf_shifted: | |
2242 | /* Shift the mantissa into position, with rounding bits in a6. */ | |
2243 | ssai 11 | |
2244 | sll a6, xl | |
2245 | src xl, xh, xl | |
2246 | srl xh, xh | |
2247 | ||
2248 | /* Set the exponent. */ | |
2249 | movi a5, 0x43d /* 0x3fe + 63 */ | |
2250 | sub a5, a5, a4 | |
2251 | slli a5, a5, 20 | |
2252 | add xh, xh, a5 | |
2253 | ||
2254 | /* Add the sign. */ | |
2255 | slli a7, a7, 31 | |
2256 | or xh, xh, a7 | |
2257 | ||
2258 | /* Round up if the leftover fraction is >= 1/2. */ | |
2259 | bgez a6, 2f | |
2260 | addi xl, xl, 1 | |
2261 | beqz xl, .Lfloatdidf_roundcarry | |
2262 | ||
2263 | /* Check if the leftover fraction is exactly 1/2. */ | |
2264 | slli a6, a6, 1 | |
2265 | beqz a6, .Lfloatdidf_exactlyhalf | |
b7974b3a | 2266 | 2: leaf_return |
134c8a50 BW |
2267 | |
2268 | .Lfloatdidf_bigshift: | |
2269 | /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */ | |
2270 | do_nsau a4, xl, a5, a6 | |
2271 | ssl a4 | |
2272 | sll xh, xl | |
2273 | movi xl, 0 | |
2274 | addi a4, a4, 32 | |
2275 | j .Lfloatdidf_shifted | |
2276 | ||
2277 | .Lfloatdidf_exactlyhalf: | |
2278 | /* Round down to the nearest even value. */ | |
2279 | srli xl, xl, 1 | |
2280 | slli xl, xl, 1 | |
b7974b3a | 2281 | leaf_return |
134c8a50 BW |
2282 | |
2283 | .Lfloatdidf_roundcarry: | |
2284 | /* xl is always zero when the rounding increment overflows, so | |
2285 | there's no need to round it to an even value. */ | |
2286 | addi xh, xh, 1 | |
2287 | /* Overflow to the exponent is OK. */ | |
b7974b3a | 2288 | leaf_return |
134c8a50 BW |
2289 | |
2290 | #endif /* L_floatdidf */ | |
2291 | ||
2292 | #ifdef L_truncdfsf2 | |
2293 | ||
2294 | .align 4 | |
2295 | .global __truncdfsf2 | |
2296 | .type __truncdfsf2, @function | |
2297 | __truncdfsf2: | |
b7974b3a | 2298 | leaf_entry sp, 16 |
134c8a50 BW |
2299 | |
2300 | /* Adjust the exponent bias. */ | |
2301 | movi a4, (0x3ff - 0x7f) << 20 | |
2302 | sub a5, xh, a4 | |
2303 | ||
2304 | /* Check for underflow. */ | |
2305 | xor a6, xh, a5 | |
2306 | bltz a6, .Ltrunc_underflow | |
2307 | extui a6, a5, 20, 11 | |
2308 | beqz a6, .Ltrunc_underflow | |
2309 | ||
2310 | /* Check for overflow. */ | |
2311 | movi a4, 255 | |
2312 | bge a6, a4, .Ltrunc_overflow | |
2313 | ||
2314 | /* Shift a5/xl << 3 into a5/a4. */ | |
2315 | ssai (32 - 3) | |
2316 | src a5, a5, xl | |
2317 | sll a4, xl | |
2318 | ||
2319 | .Ltrunc_addsign: | |
2320 | /* Add the sign bit. */ | |
2321 | extui a6, xh, 31, 1 | |
2322 | slli a6, a6, 31 | |
2323 | or a2, a6, a5 | |
2324 | ||
2325 | /* Round up if the leftover fraction is >= 1/2. */ | |
2326 | bgez a4, 1f | |
2327 | addi a2, a2, 1 | |
2328 | /* Overflow to the exponent is OK. The answer will be correct. */ | |
2329 | ||
2330 | /* Check if the leftover fraction is exactly 1/2. */ | |
2331 | slli a4, a4, 1 | |
2332 | beqz a4, .Ltrunc_exactlyhalf | |
b7974b3a | 2333 | 1: leaf_return |
134c8a50 BW |
2334 | |
2335 | .Ltrunc_exactlyhalf: | |
2336 | /* Round down to the nearest even value. */ | |
2337 | srli a2, a2, 1 | |
2338 | slli a2, a2, 1 | |
b7974b3a | 2339 | leaf_return |
134c8a50 BW |
2340 | |
2341 | .Ltrunc_overflow: | |
2342 | /* Check if exponent == 0x7ff. */ | |
2343 | movi a4, 0x7ff00000 | |
2344 | bnall xh, a4, 1f | |
2345 | ||
2346 | /* Check if mantissa is nonzero. */ | |
2347 | slli a5, xh, 12 | |
2348 | or a5, a5, xl | |
2349 | beqz a5, 1f | |
2350 | ||
2351 | /* Shift a4 to set a bit in the mantissa, making a quiet NaN. */ | |
2352 | srli a4, a4, 1 | |
2353 | ||
2354 | 1: slli a4, a4, 4 /* 0xff000000 or 0xff800000 */ | |
2355 | /* Add the sign bit. */ | |
2356 | extui a6, xh, 31, 1 | |
2357 | ssai 1 | |
2358 | src a2, a6, a4 | |
b7974b3a | 2359 | leaf_return |
134c8a50 BW |
2360 | |
2361 | .Ltrunc_underflow: | |
2362 | /* Find shift count for a subnormal. Flush to zero if >= 32. */ | |
2363 | extui a6, xh, 20, 11 | |
2364 | movi a5, 0x3ff - 0x7f | |
2365 | sub a6, a5, a6 | |
2366 | addi a6, a6, 1 | |
2367 | bgeui a6, 32, 1f | |
2368 | ||
2369 | /* Replace the exponent with an explicit "1.0". */ | |
2370 | slli a5, a5, 13 /* 0x700000 */ | |
2371 | or a5, a5, xh | |
2372 | slli a5, a5, 11 | |
2373 | srli a5, a5, 11 | |
2374 | ||
2375 | /* Shift the mantissa left by 3 bits (into a5/a4). */ | |
2376 | ssai (32 - 3) | |
2377 | src a5, a5, xl | |
2378 | sll a4, xl | |
2379 | ||
2380 | /* Shift right by a6. */ | |
2381 | ssr a6 | |
2382 | sll a7, a4 | |
2383 | src a4, a5, a4 | |
2384 | srl a5, a5 | |
2385 | beqz a7, .Ltrunc_addsign | |
2386 | or a4, a4, a6 /* any positive, nonzero value will work */ | |
2387 | j .Ltrunc_addsign | |
2388 | ||
2389 | /* Return +/- zero. */ | |
2390 | 1: extui a2, xh, 31, 1 | |
2391 | slli a2, a2, 31 | |
b7974b3a | 2392 | leaf_return |
134c8a50 BW |
2393 | |
2394 | #endif /* L_truncdfsf2 */ | |
2395 | ||
2396 | #ifdef L_extendsfdf2 | |
2397 | ||
2398 | .align 4 | |
2399 | .global __extendsfdf2 | |
2400 | .type __extendsfdf2, @function | |
2401 | __extendsfdf2: | |
b7974b3a | 2402 | leaf_entry sp, 16 |
134c8a50 BW |
2403 | |
2404 | /* Save the sign bit and then shift it off. */ | |
2405 | extui a5, a2, 31, 1 | |
2406 | slli a5, a5, 31 | |
2407 | slli a4, a2, 1 | |
2408 | ||
2409 | /* Extract and check the exponent. */ | |
2410 | extui a6, a2, 23, 8 | |
2411 | beqz a6, .Lextend_expzero | |
2412 | addi a6, a6, 1 | |
2413 | beqi a6, 256, .Lextend_nan_or_inf | |
2414 | ||
2415 | /* Shift >> 3 into a4/xl. */ | |
2416 | srli a4, a4, 4 | |
2417 | slli xl, a2, (32 - 3) | |
2418 | ||
2419 | /* Adjust the exponent bias. */ | |
2420 | movi a6, (0x3ff - 0x7f) << 20 | |
2421 | add a4, a4, a6 | |
2422 | ||
2423 | /* Add the sign bit. */ | |
2424 | or xh, a4, a5 | |
b7974b3a | 2425 | leaf_return |
134c8a50 BW |
2426 | |
2427 | .Lextend_nan_or_inf: | |
2428 | movi a4, 0x7ff00000 | |
2429 | ||
2430 | /* Check for NaN. */ | |
2431 | slli a7, a2, 9 | |
2432 | beqz a7, 1f | |
2433 | ||
2434 | slli a6, a6, 11 /* 0x80000 */ | |
2435 | or a4, a4, a6 | |
2436 | ||
2437 | /* Add the sign and return. */ | |
2438 | 1: or xh, a4, a5 | |
2439 | movi xl, 0 | |
b7974b3a | 2440 | leaf_return |
134c8a50 BW |
2441 | |
2442 | .Lextend_expzero: | |
2443 | beqz a4, 1b | |
2444 | ||
2445 | /* Normalize it to have 8 zero bits before the first 1 bit. */ | |
2446 | do_nsau a7, a4, a2, a3 | |
2447 | addi a7, a7, -8 | |
2448 | ssl a7 | |
2449 | sll a4, a4 | |
2450 | ||
2451 | /* Shift >> 3 into a4/xl. */ | |
2452 | slli xl, a4, (32 - 3) | |
2453 | srli a4, a4, 3 | |
2454 | ||
2455 | /* Set the exponent. */ | |
2456 | movi a6, 0x3fe - 0x7f | |
2457 | sub a6, a6, a7 | |
2458 | slli a6, a6, 20 | |
2459 | add a4, a4, a6 | |
2460 | ||
2461 | /* Add the sign and return. */ | |
2462 | or xh, a4, a5 | |
b7974b3a | 2463 | leaf_return |
134c8a50 BW |
2464 | |
2465 | #endif /* L_extendsfdf2 */ | |
2466 | ||
2467 | ||
66192aa1 DKC |
2468 | #if XCHAL_HAVE_DFP_SQRT |
2469 | #ifdef L_sqrt | |
2470 | ||
2471 | .text | |
2472 | .align 4 | |
2473 | .global __ieee754_sqrt | |
2474 | .type __ieee754_sqrt, @function | |
2475 | __ieee754_sqrt: | |
2476 | leaf_entry sp, 16 | |
2477 | ||
2478 | wfrd f1, xh, xl | |
2479 | ||
2480 | sqrt0.d f2, f1 | |
2481 | const.d f4, 0 | |
2482 | maddn.d f4, f2, f2 | |
2483 | nexp01.d f3, f1 | |
2484 | const.d f0, 3 | |
2485 | addexp.d f3, f0 | |
2486 | maddn.d f0, f4, f3 | |
2487 | nexp01.d f4, f1 | |
2488 | maddn.d f2, f0, f2 | |
2489 | const.d f5, 0 | |
2490 | maddn.d f5, f2, f3 | |
2491 | const.d f0, 3 | |
2492 | maddn.d f0, f5, f2 | |
2493 | neg.d f6, f4 | |
2494 | maddn.d f2, f0, f2 | |
2495 | const.d f0, 0 | |
2496 | const.d f5, 0 | |
2497 | const.d f7, 0 | |
2498 | maddn.d f0, f6, f2 | |
2499 | maddn.d f5, f2, f3 | |
2500 | const.d f3, 3 | |
2501 | maddn.d f7, f3, f2 | |
2502 | maddn.d f4, f0, f0 | |
2503 | maddn.d f3, f5, f2 | |
2504 | neg.d f2, f7 | |
2505 | maddn.d f0, f4, f2 | |
2506 | maddn.d f7, f3, f7 | |
2507 | mksadj.d f2, f1 | |
2508 | nexp01.d f1, f1 | |
2509 | maddn.d f1, f0, f0 | |
2510 | neg.d f3, f7 | |
2511 | addexpm.d f0, f2 | |
2512 | addexp.d f3, f2 | |
2513 | divn.d f0, f1, f3 | |
2514 | ||
2515 | rfr xl, f0 | |
2516 | rfrd xh, f0 | |
2517 | ||
2518 | leaf_return | |
2519 | ||
2520 | #endif /* L_sqrt */ | |
2521 | #endif /* XCHAL_HAVE_DFP_SQRT */ | |
2522 | ||
2523 | #if XCHAL_HAVE_DFP_RECIP | |
2524 | #ifdef L_recipdf2 | |
2525 | /* Reciprocal */ | |
2526 | ||
2527 | .align 4 | |
2528 | .global __recipdf2 | |
2529 | .type __recipdf2, @function | |
2530 | __recipdf2: | |
2531 | leaf_entry sp, 16 | |
2532 | ||
2533 | wfrd f1, xh, xl | |
2534 | ||
2535 | recip0.d f0, f1 | |
2536 | const.d f2, 2 | |
2537 | msub.d f2, f1, f0 | |
2538 | mul.d f3, f1, f0 | |
2539 | const.d f4, 2 | |
2540 | mul.d f5, f0, f2 | |
2541 | msub.d f4, f3, f2 | |
2542 | const.d f2, 1 | |
2543 | mul.d f0, f5, f4 | |
2544 | msub.d f2, f1, f0 | |
2545 | maddn.d f0, f0, f2 | |
2546 | ||
2547 | rfr xl, f0 | |
2548 | rfrd xh, f0 | |
2549 | ||
2550 | leaf_return | |
2551 | ||
2552 | #endif /* L_recipdf2 */ | |
2553 | #endif /* XCHAL_HAVE_DFP_RECIP */ | |
2554 | ||
2555 | #if XCHAL_HAVE_DFP_RSQRT | |
2556 | #ifdef L_rsqrtdf2 | |
2557 | /* Reciprocal square root */ | |
2558 | ||
2559 | .align 4 | |
2560 | .global __rsqrtdf2 | |
2561 | .type __rsqrtdf2, @function | |
2562 | __rsqrtdf2: | |
2563 | leaf_entry sp, 16 | |
2564 | ||
2565 | wfrd f1, xh, xl | |
2566 | ||
2567 | rsqrt0.d f0, f1 | |
2568 | mul.d f2, f1, f0 | |
2569 | const.d f3, 3 | |
2570 | mul.d f4, f3, f0 | |
2571 | const.d f5, 1 | |
2572 | msub.d f5, f2, f0 | |
2573 | maddn.d f0, f4, f5 | |
2574 | const.d f2, 1 | |
2575 | mul.d f4, f1, f0 | |
2576 | mul.d f5, f3, f0 | |
2577 | msub.d f2, f4, f0 | |
2578 | maddn.d f0, f5, f2 | |
2579 | const.d f2, 1 | |
2580 | mul.d f1, f1, f0 | |
2581 | mul.d f3, f3, f0 | |
2582 | msub.d f2, f1, f0 | |
2583 | maddn.d f0, f3, f2 | |
2584 | ||
2585 | rfr xl, f0 | |
2586 | rfrd xh, f0 | |
2587 | ||
2588 | leaf_return | |
2589 | ||
2590 | #endif /* L_rsqrtdf2 */ | |
2591 | #endif /* XCHAL_HAVE_DFP_RSQRT */ |