]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgcc/config/xtensa/ieee754-sf.S
Update copyright years.
[thirdparty/gcc.git] / libgcc / config / xtensa / ieee754-sf.S
1 /* IEEE-754 single-precision functions for Xtensa
2 Copyright (C) 2006-2024 Free Software Foundation, Inc.
3 Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 #ifdef __XTENSA_EB__
27 #define xh a2
28 #define xl a3
29 #define yh a4
30 #define yl a5
31 #else
32 #define xh a3
33 #define xl a2
34 #define yh a5
35 #define yl a4
36 #endif
37
38 /* Warning! The branch displacements for some Xtensa branch instructions
39 are quite small, and this code has been carefully laid out to keep
40 branch targets in range. If you change anything, be sure to check that
41 the assembler is not relaxing anything to branch over a jump. */
42
43 #ifdef L_negsf2
44
45 .align 4
46 .global __negsf2
47 .type __negsf2, @function
48 __negsf2:
49 leaf_entry sp, 16
50 movi a4, 0x80000000
51 xor a2, a2, a4
52 leaf_return
53
54 #endif /* L_negsf2 */
55
56 #ifdef L_addsubsf3
57
58 .literal_position
59 /* Addition */
60 __addsf3_aux:
61
62 /* Handle NaNs and Infinities. (This code is placed before the
63 start of the function just to keep it in range of the limited
64 branch displacements.) */
65
66 .Ladd_xnan_or_inf:
67 /* If y is neither Infinity nor NaN, return x. */
68 bnall a3, a6, .Ladd_return_nan_or_inf
69 /* If x is a NaN, return it. Otherwise, return y. */
70 slli a7, a2, 9
71 bnez a7, .Ladd_return_nan
72
73 .Ladd_ynan_or_inf:
74 /* Return y. */
75 mov a2, a3
76
77 .Ladd_return_nan_or_inf:
78 slli a7, a2, 9
79 bnez a7, .Ladd_return_nan
80 leaf_return
81
82 .Ladd_return_nan:
83 movi a6, 0x400000 /* make it a quiet NaN */
84 or a2, a2, a6
85 leaf_return
86
87 .Ladd_opposite_signs:
88 /* Operand signs differ. Do a subtraction. */
89 slli a7, a6, 8
90 xor a3, a3, a7
91 j .Lsub_same_sign
92
93 .align 4
94 .global __addsf3
95 .type __addsf3, @function
96 __addsf3:
97 leaf_entry sp, 16
98 movi a6, 0x7f800000
99
100 /* Check if the two operands have the same sign. */
101 xor a7, a2, a3
102 bltz a7, .Ladd_opposite_signs
103
104 .Ladd_same_sign:
105 /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
106 ball a2, a6, .Ladd_xnan_or_inf
107 ball a3, a6, .Ladd_ynan_or_inf
108
109 /* Compare the exponents. The smaller operand will be shifted
110 right by the exponent difference and added to the larger
111 one. */
112 extui a7, a2, 23, 9
113 extui a8, a3, 23, 9
114 bltu a7, a8, .Ladd_shiftx
115
116 .Ladd_shifty:
117 /* Check if the smaller (or equal) exponent is zero. */
118 bnone a3, a6, .Ladd_yexpzero
119
120 /* Replace y sign/exponent with 0x008. */
121 or a3, a3, a6
122 slli a3, a3, 8
123 srli a3, a3, 8
124
125 .Ladd_yexpdiff:
126 /* Compute the exponent difference. */
127 sub a10, a7, a8
128
129 /* Exponent difference > 32 -- just return the bigger value. */
130 bgeui a10, 32, 1f
131
132 /* Shift y right by the exponent difference. Any bits that are
133 shifted out of y are saved in a9 for rounding the result. */
134 ssr a10
135 movi a9, 0
136 src a9, a3, a9
137 srl a3, a3
138
139 /* Do the addition. */
140 add a2, a2, a3
141
142 /* Check if the add overflowed into the exponent. */
143 extui a10, a2, 23, 9
144 beq a10, a7, .Ladd_round
145 mov a8, a7
146 j .Ladd_carry
147
148 .Ladd_yexpzero:
149 /* y is a subnormal value. Replace its sign/exponent with zero,
150 i.e., no implicit "1.0", and increment the apparent exponent
151 because subnormals behave as if they had the minimum (nonzero)
152 exponent. Test for the case when both exponents are zero. */
153 slli a3, a3, 9
154 srli a3, a3, 9
155 bnone a2, a6, .Ladd_bothexpzero
156 addi a8, a8, 1
157 j .Ladd_yexpdiff
158
159 .Ladd_bothexpzero:
160 /* Both exponents are zero. Handle this as a special case. There
161 is no need to shift or round, and the normal code for handling
162 a carry into the exponent field will not work because it
163 assumes there is an implicit "1.0" that needs to be added. */
164 add a2, a2, a3
165 1: leaf_return
166
167 .Ladd_xexpzero:
168 /* Same as "yexpzero" except skip handling the case when both
169 exponents are zero. */
170 slli a2, a2, 9
171 srli a2, a2, 9
172 addi a7, a7, 1
173 j .Ladd_xexpdiff
174
175 .Ladd_shiftx:
176 /* Same thing as the "shifty" code, but with x and y swapped. Also,
177 because the exponent difference is always nonzero in this version,
178 the shift sequence can use SLL and skip loading a constant zero. */
179 bnone a2, a6, .Ladd_xexpzero
180
181 or a2, a2, a6
182 slli a2, a2, 8
183 srli a2, a2, 8
184
185 .Ladd_xexpdiff:
186 sub a10, a8, a7
187 bgeui a10, 32, .Ladd_returny
188
189 ssr a10
190 sll a9, a2
191 srl a2, a2
192
193 add a2, a2, a3
194
195 /* Check if the add overflowed into the exponent. */
196 extui a10, a2, 23, 9
197 bne a10, a8, .Ladd_carry
198
199 .Ladd_round:
200 /* Round up if the leftover fraction is >= 1/2. */
201 bgez a9, 1f
202 addi a2, a2, 1
203
204 /* Check if the leftover fraction is exactly 1/2. */
205 slli a9, a9, 1
206 beqz a9, .Ladd_exactlyhalf
207 1: leaf_return
208
209 .Ladd_returny:
210 mov a2, a3
211 leaf_return
212
213 .Ladd_carry:
214 /* The addition has overflowed into the exponent field, so the
215 value needs to be renormalized. The mantissa of the result
216 can be recovered by subtracting the original exponent and
217 adding 0x800000 (which is the explicit "1.0" for the
218 mantissa of the non-shifted operand -- the "1.0" for the
219 shifted operand was already added). The mantissa can then
220 be shifted right by one bit. The explicit "1.0" of the
221 shifted mantissa then needs to be replaced by the exponent,
222 incremented by one to account for the normalizing shift.
223 It is faster to combine these operations: do the shift first
224 and combine the additions and subtractions. If x is the
225 original exponent, the result is:
226 shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
227 or:
228 shifted mantissa + ((x + 1) << 22)
229 Note that the exponent is incremented here by leaving the
230 explicit "1.0" of the mantissa in the exponent field. */
231
232 /* Shift x right by one bit. Save the lsb. */
233 mov a10, a2
234 srli a2, a2, 1
235
236 /* See explanation above. The original exponent is in a8. */
237 addi a8, a8, 1
238 slli a8, a8, 22
239 add a2, a2, a8
240
241 /* Return an Infinity if the exponent overflowed. */
242 ball a2, a6, .Ladd_infinity
243
244 /* Same thing as the "round" code except the msb of the leftover
245 fraction is bit 0 of a10, with the rest of the fraction in a9. */
246 bbci.l a10, 0, 1f
247 addi a2, a2, 1
248 beqz a9, .Ladd_exactlyhalf
249 1: leaf_return
250
251 .Ladd_infinity:
252 /* Clear the mantissa. */
253 srli a2, a2, 23
254 slli a2, a2, 23
255
256 /* The sign bit may have been lost in a carry-out. Put it back. */
257 slli a8, a8, 1
258 or a2, a2, a8
259 leaf_return
260
261 .Ladd_exactlyhalf:
262 /* Round down to the nearest even value. */
263 srli a2, a2, 1
264 slli a2, a2, 1
265 leaf_return
266
267
268 /* Subtraction */
269 __subsf3_aux:
270
271 /* Handle NaNs and Infinities. (This code is placed before the
272 start of the function just to keep it in range of the limited
273 branch displacements.) */
274
275 .Lsub_xnan_or_inf:
276 /* If y is neither Infinity nor NaN, return x. */
277 bnall a3, a6, .Lsub_return_nan_or_inf
278 /* Both x and y are either NaN or Inf, so the result is NaN. */
279
280 .Lsub_return_nan:
281 movi a4, 0x400000 /* make it a quiet NaN */
282 or a2, a2, a4
283 leaf_return
284
285 .Lsub_ynan_or_inf:
286 /* Negate y and return it. */
287 slli a7, a6, 8
288 xor a2, a3, a7
289
290 .Lsub_return_nan_or_inf:
291 slli a7, a2, 9
292 bnez a7, .Lsub_return_nan
293 leaf_return
294
295 .Lsub_opposite_signs:
296 /* Operand signs differ. Do an addition. */
297 slli a7, a6, 8
298 xor a3, a3, a7
299 j .Ladd_same_sign
300
301 .align 4
302 .global __subsf3
303 .type __subsf3, @function
304 __subsf3:
305 leaf_entry sp, 16
306 movi a6, 0x7f800000
307
308 /* Check if the two operands have the same sign. */
309 xor a7, a2, a3
310 bltz a7, .Lsub_opposite_signs
311
312 .Lsub_same_sign:
313 /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
314 ball a2, a6, .Lsub_xnan_or_inf
315 ball a3, a6, .Lsub_ynan_or_inf
316
317 /* Compare the operands. In contrast to addition, the entire
318 value matters here. */
319 extui a7, a2, 23, 8
320 extui a8, a3, 23, 8
321 bltu a2, a3, .Lsub_xsmaller
322
323 .Lsub_ysmaller:
324 /* Check if the smaller (or equal) exponent is zero. */
325 bnone a3, a6, .Lsub_yexpzero
326
327 /* Replace y sign/exponent with 0x008. */
328 or a3, a3, a6
329 slli a3, a3, 8
330 srli a3, a3, 8
331
332 .Lsub_yexpdiff:
333 /* Compute the exponent difference. */
334 sub a10, a7, a8
335
336 /* Exponent difference > 32 -- just return the bigger value. */
337 bgeui a10, 32, 1f
338
339 /* Shift y right by the exponent difference. Any bits that are
340 shifted out of y are saved in a9 for rounding the result. */
341 ssr a10
342 movi a9, 0
343 src a9, a3, a9
344 srl a3, a3
345
346 sub a2, a2, a3
347
348 /* Subtract the leftover bits in a9 from zero and propagate any
349 borrow from a2. */
350 neg a9, a9
351 addi a10, a2, -1
352 movnez a2, a10, a9
353
354 /* Check if the subtract underflowed into the exponent. */
355 extui a10, a2, 23, 8
356 beq a10, a7, .Lsub_round
357 j .Lsub_borrow
358
359 .Lsub_yexpzero:
360 /* Return zero if the inputs are equal. (For the non-subnormal
361 case, subtracting the "1.0" will cause a borrow from the exponent
362 and this case can be detected when handling the borrow.) */
363 beq a2, a3, .Lsub_return_zero
364
365 /* y is a subnormal value. Replace its sign/exponent with zero,
366 i.e., no implicit "1.0". Unless x is also a subnormal, increment
367 y's apparent exponent because subnormals behave as if they had
368 the minimum (nonzero) exponent. */
369 slli a3, a3, 9
370 srli a3, a3, 9
371 bnone a2, a6, .Lsub_yexpdiff
372 addi a8, a8, 1
373 j .Lsub_yexpdiff
374
375 .Lsub_returny:
376 /* Negate and return y. */
377 slli a7, a6, 8
378 xor a2, a3, a7
379 1: leaf_return
380
381 .Lsub_xsmaller:
382 /* Same thing as the "ysmaller" code, but with x and y swapped and
383 with y negated. */
384 bnone a2, a6, .Lsub_xexpzero
385
386 or a2, a2, a6
387 slli a2, a2, 8
388 srli a2, a2, 8
389
390 .Lsub_xexpdiff:
391 sub a10, a8, a7
392 bgeui a10, 32, .Lsub_returny
393
394 ssr a10
395 movi a9, 0
396 src a9, a2, a9
397 srl a2, a2
398
399 /* Negate y. */
400 slli a11, a6, 8
401 xor a3, a3, a11
402
403 sub a2, a3, a2
404
405 neg a9, a9
406 addi a10, a2, -1
407 movnez a2, a10, a9
408
409 /* Check if the subtract underflowed into the exponent. */
410 extui a10, a2, 23, 8
411 bne a10, a8, .Lsub_borrow
412
413 .Lsub_round:
414 /* Round up if the leftover fraction is >= 1/2. */
415 bgez a9, 1f
416 addi a2, a2, 1
417
418 /* Check if the leftover fraction is exactly 1/2. */
419 slli a9, a9, 1
420 beqz a9, .Lsub_exactlyhalf
421 1: leaf_return
422
423 .Lsub_xexpzero:
424 /* Same as "yexpzero". */
425 beq a2, a3, .Lsub_return_zero
426 slli a2, a2, 9
427 srli a2, a2, 9
428 bnone a3, a6, .Lsub_xexpdiff
429 addi a7, a7, 1
430 j .Lsub_xexpdiff
431
432 .Lsub_return_zero:
433 movi a2, 0
434 leaf_return
435
436 .Lsub_borrow:
437 /* The subtraction has underflowed into the exponent field, so the
438 value needs to be renormalized. Shift the mantissa left as
439 needed to remove any leading zeros and adjust the exponent
440 accordingly. If the exponent is not large enough to remove
441 all the leading zeros, the result will be a subnormal value. */
442
443 slli a8, a2, 9
444 beqz a8, .Lsub_xzero
445 do_nsau a6, a8, a7, a11
446 srli a8, a8, 9
447 bge a6, a10, .Lsub_subnormal
448 addi a6, a6, 1
449
450 .Lsub_normalize_shift:
451 /* Shift the mantissa (a8/a9) left by a6. */
452 ssl a6
453 src a8, a8, a9
454 sll a9, a9
455
456 /* Combine the shifted mantissa with the sign and exponent,
457 decrementing the exponent by a6. (The exponent has already
458 been decremented by one due to the borrow from the subtraction,
459 but adding the mantissa will increment the exponent by one.) */
460 srli a2, a2, 23
461 sub a2, a2, a6
462 slli a2, a2, 23
463 add a2, a2, a8
464 j .Lsub_round
465
466 .Lsub_exactlyhalf:
467 /* Round down to the nearest even value. */
468 srli a2, a2, 1
469 slli a2, a2, 1
470 leaf_return
471
472 .Lsub_xzero:
473 /* If there was a borrow from the exponent, and the mantissa and
474 guard digits are all zero, then the inputs were equal and the
475 result should be zero. */
476 beqz a9, .Lsub_return_zero
477
478 /* Only the guard digit is nonzero. Shift by min(24, a10). */
479 addi a11, a10, -24
480 movi a6, 24
481 movltz a6, a10, a11
482 j .Lsub_normalize_shift
483
484 .Lsub_subnormal:
485 /* The exponent is too small to shift away all the leading zeros.
486 Set a6 to the current exponent (which has already been
487 decremented by the borrow) so that the exponent of the result
488 will be zero. Do not add 1 to a6 in this case, because: (1)
489 adding the mantissa will not increment the exponent, so there is
490 no need to subtract anything extra from the exponent to
491 compensate, and (2) the effective exponent of a subnormal is 1
492 not 0 so the shift amount must be 1 smaller than normal. */
493 mov a6, a10
494 j .Lsub_normalize_shift
495
496 #endif /* L_addsubsf3 */
497
498 #ifdef L_mulsf3
499
500 /* Multiplication */
501 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
502 #define XCHAL_NO_MUL 1
503 #endif
504
505 .literal_position
506 __mulsf3_aux:
507
508 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
509 (This code is placed before the start of the function just to
510 keep it in range of the limited branch displacements.) */
511
512 .Lmul_xexpzero:
513 /* Clear the sign bit of x. */
514 slli a2, a2, 1
515 srli a2, a2, 1
516
517 /* If x is zero, return zero. */
518 beqz a2, .Lmul_return_zero
519
520 /* Normalize x. Adjust the exponent in a8. */
521 do_nsau a10, a2, a11, a12
522 addi a10, a10, -8
523 ssl a10
524 sll a2, a2
525 movi a8, 1
526 sub a8, a8, a10
527 j .Lmul_xnormalized
528
529 .Lmul_yexpzero:
530 /* Clear the sign bit of y. */
531 slli a3, a3, 1
532 srli a3, a3, 1
533
534 /* If y is zero, return zero. */
535 beqz a3, .Lmul_return_zero
536
537 /* Normalize y. Adjust the exponent in a9. */
538 do_nsau a10, a3, a11, a12
539 addi a10, a10, -8
540 ssl a10
541 sll a3, a3
542 movi a9, 1
543 sub a9, a9, a10
544 j .Lmul_ynormalized
545
546 .Lmul_return_zero:
547 /* Return zero with the appropriate sign bit. */
548 srli a2, a7, 31
549 slli a2, a2, 31
550 j .Lmul_done
551
552 .Lmul_xnan_or_inf:
553 /* If y is zero, return NaN. */
554 slli a8, a3, 1
555 beqz a8, .Lmul_return_nan
556 /* If y is NaN, return y. */
557 bnall a3, a6, .Lmul_returnx
558 slli a8, a3, 9
559 beqz a8, .Lmul_returnx
560
561 .Lmul_returny:
562 mov a2, a3
563
564 .Lmul_returnx:
565 slli a8, a2, 9
566 bnez a8, .Lmul_return_nan
567 /* Set the sign bit and return. */
568 extui a7, a7, 31, 1
569 slli a2, a2, 1
570 ssai 1
571 src a2, a7, a2
572 j .Lmul_done
573
574 .Lmul_ynan_or_inf:
575 /* If x is zero, return NaN. */
576 slli a8, a2, 1
577 bnez a8, .Lmul_returny
578 mov a2, a3
579
580 .Lmul_return_nan:
581 movi a4, 0x400000 /* make it a quiet NaN */
582 or a2, a2, a4
583 j .Lmul_done
584
585 .align 4
586 .global __mulsf3
587 .type __mulsf3, @function
588 __mulsf3:
589 #if __XTENSA_CALL0_ABI__
590 leaf_entry sp, 32
591 addi sp, sp, -32
592 s32i a12, sp, 16
593 s32i a13, sp, 20
594 s32i a14, sp, 24
595 s32i a15, sp, 28
596 #elif XCHAL_NO_MUL
597 /* This is not really a leaf function; allocate enough stack space
598 to allow CALL12s to a helper function. */
599 leaf_entry sp, 64
600 #else
601 leaf_entry sp, 32
602 #endif
603 movi a6, 0x7f800000
604
605 /* Get the sign of the result. */
606 xor a7, a2, a3
607
608 /* Check for NaN and infinity. */
609 ball a2, a6, .Lmul_xnan_or_inf
610 ball a3, a6, .Lmul_ynan_or_inf
611
612 /* Extract the exponents. */
613 extui a8, a2, 23, 8
614 extui a9, a3, 23, 8
615
616 beqz a8, .Lmul_xexpzero
617 .Lmul_xnormalized:
618 beqz a9, .Lmul_yexpzero
619 .Lmul_ynormalized:
620
621 /* Add the exponents. */
622 add a8, a8, a9
623
624 /* Replace sign/exponent fields with explicit "1.0". */
625 movi a10, 0xffffff
626 or a2, a2, a6
627 and a2, a2, a10
628 or a3, a3, a6
629 and a3, a3, a10
630
631 /* Multiply 32x32 to 64 bits. The result ends up in a2/a6. */
632
633 #if XCHAL_HAVE_MUL32_HIGH
634
635 mull a6, a2, a3
636 muluh a2, a2, a3
637
638 #else
639
640 /* Break the inputs into 16-bit chunks and compute 4 32-bit partial
641 products. These partial products are:
642
643 0 xl * yl
644
645 1 xl * yh
646 2 xh * yl
647
648 3 xh * yh
649
650 If using the Mul16 or Mul32 multiplier options, these input
651 chunks must be stored in separate registers. For Mac16, the
652 UMUL.AA.* opcodes can specify that the inputs come from either
653 half of the registers, so there is no need to shift them out
654 ahead of time. If there is no multiply hardware, the 16-bit
655 chunks can be extracted when setting up the arguments to the
656 separate multiply function. */
657
658 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
659 /* Calling a separate multiply function will clobber a0 and requires
660 use of a8 as a temporary, so save those values now. (The function
661 uses a custom ABI so nothing else needs to be saved.) */
662 s32i a0, sp, 0
663 s32i a8, sp, 4
664 #endif
665
666 #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
667
668 #define a2h a4
669 #define a3h a5
670
671 /* Get the high halves of the inputs into registers. */
672 srli a2h, a2, 16
673 srli a3h, a3, 16
674
675 #define a2l a2
676 #define a3l a3
677
678 #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
679 /* Clear the high halves of the inputs. This does not matter
680 for MUL16 because the high bits are ignored. */
681 extui a2, a2, 0, 16
682 extui a3, a3, 0, 16
683 #endif
684 #endif /* MUL16 || MUL32 */
685
686
687 #if XCHAL_HAVE_MUL16
688
689 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
690 mul16u dst, xreg ## xhalf, yreg ## yhalf
691
692 #elif XCHAL_HAVE_MUL32
693
694 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
695 mull dst, xreg ## xhalf, yreg ## yhalf
696
697 #elif XCHAL_HAVE_MAC16
698
699 /* The preprocessor insists on inserting a space when concatenating after
700 a period in the definition of do_mul below. These macros are a workaround
701 using underscores instead of periods when doing the concatenation. */
702 #define umul_aa_ll umul.aa.ll
703 #define umul_aa_lh umul.aa.lh
704 #define umul_aa_hl umul.aa.hl
705 #define umul_aa_hh umul.aa.hh
706
707 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
708 umul_aa_ ## xhalf ## yhalf xreg, yreg; \
709 rsr dst, ACCLO
710
711 #else /* no multiply hardware */
712
713 #define set_arg_l(dst, src) \
714 extui dst, src, 0, 16
715 #define set_arg_h(dst, src) \
716 srli dst, src, 16
717
718 #if __XTENSA_CALL0_ABI__
719 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
720 set_arg_ ## xhalf (a13, xreg); \
721 set_arg_ ## yhalf (a14, yreg); \
722 call0 .Lmul_mulsi3; \
723 mov dst, a12
724 #else
725 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
726 set_arg_ ## xhalf (a14, xreg); \
727 set_arg_ ## yhalf (a15, yreg); \
728 call12 .Lmul_mulsi3; \
729 mov dst, a14
730 #endif /* __XTENSA_CALL0_ABI__ */
731
732 #endif /* no multiply hardware */
733
734 /* Add pp1 and pp2 into a6 with carry-out in a9. */
735 do_mul(a6, a2, l, a3, h) /* pp 1 */
736 do_mul(a11, a2, h, a3, l) /* pp 2 */
737 movi a9, 0
738 add a6, a6, a11
739 bgeu a6, a11, 1f
740 addi a9, a9, 1
741 1:
742 /* Shift the high half of a9/a6 into position in a9. Note that
743 this value can be safely incremented without any carry-outs. */
744 ssai 16
745 src a9, a9, a6
746
747 /* Compute the low word into a6. */
748 do_mul(a11, a2, l, a3, l) /* pp 0 */
749 sll a6, a6
750 add a6, a6, a11
751 bgeu a6, a11, 1f
752 addi a9, a9, 1
753 1:
754 /* Compute the high word into a2. */
755 do_mul(a2, a2, h, a3, h) /* pp 3 */
756 add a2, a2, a9
757
758 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
759 /* Restore values saved on the stack during the multiplication. */
760 l32i a0, sp, 0
761 l32i a8, sp, 4
762 #endif
763 #endif /* ! XCHAL_HAVE_MUL32_HIGH */
764
765 /* Shift left by 9 bits, unless there was a carry-out from the
766 multiply, in which case, shift by 8 bits and increment the
767 exponent. */
768 movi a4, 9
769 srli a5, a2, 24 - 9
770 beqz a5, 1f
771 addi a4, a4, -1
772 addi a8, a8, 1
773 1: ssl a4
774 src a2, a2, a6
775 sll a6, a6
776
777 /* Subtract the extra bias from the exponent sum (plus one to account
778 for the explicit "1.0" of the mantissa that will be added to the
779 exponent in the final result). */
780 movi a4, 0x80
781 sub a8, a8, a4
782
783 /* Check for over/underflow. The value in a8 is one less than the
784 final exponent, so values in the range 0..fd are OK here. */
785 movi a4, 0xfe
786 bgeu a8, a4, .Lmul_overflow
787
788 .Lmul_round:
789 /* Round. */
790 bgez a6, .Lmul_rounded
791 addi a2, a2, 1
792 slli a6, a6, 1
793 beqz a6, .Lmul_exactlyhalf
794
795 .Lmul_rounded:
796 /* Add the exponent to the mantissa. */
797 slli a8, a8, 23
798 add a2, a2, a8
799
800 .Lmul_addsign:
801 /* Add the sign bit. */
802 srli a7, a7, 31
803 slli a7, a7, 31
804 or a2, a2, a7
805
806 .Lmul_done:
807 #if __XTENSA_CALL0_ABI__
808 l32i a12, sp, 16
809 l32i a13, sp, 20
810 l32i a14, sp, 24
811 l32i a15, sp, 28
812 addi sp, sp, 32
813 #endif
814 leaf_return
815
816 .Lmul_exactlyhalf:
817 /* Round down to the nearest even value. */
818 srli a2, a2, 1
819 slli a2, a2, 1
820 j .Lmul_rounded
821
822 .Lmul_overflow:
823 bltz a8, .Lmul_underflow
824 /* Return +/- Infinity. */
825 movi a8, 0xff
826 slli a2, a8, 23
827 j .Lmul_addsign
828
829 .Lmul_underflow:
830 /* Create a subnormal value, where the exponent field contains zero,
831 but the effective exponent is 1. The value of a8 is one less than
832 the actual exponent, so just negate it to get the shift amount. */
833 neg a8, a8
834 mov a9, a6
835 ssr a8
836 bgeui a8, 32, .Lmul_flush_to_zero
837
838 /* Shift a2 right. Any bits that are shifted out of a2 are saved
839 in a6 (combined with the shifted-out bits currently in a6) for
840 rounding the result. */
841 sll a6, a2
842 srl a2, a2
843
844 /* Set the exponent to zero. */
845 movi a8, 0
846
847 /* Pack any nonzero bits shifted out into a6. */
848 beqz a9, .Lmul_round
849 movi a9, 1
850 or a6, a6, a9
851 j .Lmul_round
852
853 .Lmul_flush_to_zero:
854 /* Return zero with the appropriate sign bit. */
855 srli a2, a7, 31
856 slli a2, a2, 31
857 j .Lmul_done
858
859 #if XCHAL_NO_MUL
860
861 /* For Xtensa processors with no multiply hardware, this simplified
862 version of _mulsi3 is used for multiplying 16-bit chunks of
863 the floating-point mantissas. When using CALL0, this function
864 uses a custom ABI: the inputs are passed in a13 and a14, the
865 result is returned in a12, and a8 and a15 are clobbered. */
866 .align 4
867 .Lmul_mulsi3:
868 leaf_entry sp, 16
869 .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
870 movi \dst, 0
871 1: add \tmp1, \src2, \dst
872 extui \tmp2, \src1, 0, 1
873 movnez \dst, \tmp1, \tmp2
874
875 do_addx2 \tmp1, \src2, \dst, \tmp1
876 extui \tmp2, \src1, 1, 1
877 movnez \dst, \tmp1, \tmp2
878
879 do_addx4 \tmp1, \src2, \dst, \tmp1
880 extui \tmp2, \src1, 2, 1
881 movnez \dst, \tmp1, \tmp2
882
883 do_addx8 \tmp1, \src2, \dst, \tmp1
884 extui \tmp2, \src1, 3, 1
885 movnez \dst, \tmp1, \tmp2
886
887 srli \src1, \src1, 4
888 slli \src2, \src2, 4
889 bnez \src1, 1b
890 .endm
891 #if __XTENSA_CALL0_ABI__
892 mul_mulsi3_body a12, a13, a14, a15, a8
893 #else
894 /* The result will be written into a2, so save that argument in a4. */
895 mov a4, a2
896 mul_mulsi3_body a2, a4, a3, a5, a6
897 #endif
898 leaf_return
899 #endif /* XCHAL_NO_MUL */
900 #endif /* L_mulsf3 */
901
902 #ifdef L_divsf3
903
904 /* Division */
905
906 #if XCHAL_HAVE_FP_DIV
907
908 .align 4
909 .global __divsf3
910 .type __divsf3, @function
911 __divsf3:
912 leaf_entry sp, 16
913
914 wfr f1, a2 /* dividend */
915 wfr f2, a3 /* divisor */
916
917 div0.s f3, f2
918 nexp01.s f4, f2
919 const.s f5, 1
920 maddn.s f5, f4, f3
921 mov.s f6, f3
922 mov.s f7, f2
923 nexp01.s f2, f1
924 maddn.s f6, f5, f6
925 const.s f5, 1
926 const.s f0, 0
927 neg.s f8, f2
928 maddn.s f5, f4, f6
929 maddn.s f0, f8, f3
930 mkdadj.s f7, f1
931 maddn.s f6, f5, f6
932 maddn.s f8, f4, f0
933 const.s f3, 1
934 maddn.s f3, f4, f6
935 maddn.s f0, f8, f6
936 neg.s f2, f2
937 maddn.s f6, f3, f6
938 maddn.s f2, f4, f0
939 addexpm.s f0, f7
940 addexp.s f6, f7
941 divn.s f0, f2, f6
942
943 rfr a2, f0
944
945 leaf_return
946
947 #else
948
949 .literal_position
950 __divsf3_aux:
951
952 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
953 (This code is placed before the start of the function just to
954 keep it in range of the limited branch displacements.) */
955
956 .Ldiv_yexpzero:
957 /* Clear the sign bit of y. */
958 slli a3, a3, 1
959 srli a3, a3, 1
960
961 /* Check for division by zero. */
962 beqz a3, .Ldiv_yzero
963
964 /* Normalize y. Adjust the exponent in a9. */
965 do_nsau a10, a3, a4, a5
966 addi a10, a10, -8
967 ssl a10
968 sll a3, a3
969 movi a9, 1
970 sub a9, a9, a10
971 j .Ldiv_ynormalized
972
973 .Ldiv_yzero:
974 /* y is zero. Return NaN if x is also zero; otherwise, infinity. */
975 slli a4, a2, 1
976 srli a4, a4, 1
977 srli a2, a7, 31
978 slli a2, a2, 31
979 or a2, a2, a6
980 bnez a4, 1f
981 movi a4, 0x400000 /* make it a quiet NaN */
982 or a2, a2, a4
983 1: leaf_return
984
985 .Ldiv_xexpzero:
986 /* Clear the sign bit of x. */
987 slli a2, a2, 1
988 srli a2, a2, 1
989
990 /* If x is zero, return zero. */
991 beqz a2, .Ldiv_return_zero
992
993 /* Normalize x. Adjust the exponent in a8. */
994 do_nsau a10, a2, a4, a5
995 addi a10, a10, -8
996 ssl a10
997 sll a2, a2
998 movi a8, 1
999 sub a8, a8, a10
1000 j .Ldiv_xnormalized
1001
1002 .Ldiv_return_zero:
1003 /* Return zero with the appropriate sign bit. */
1004 srli a2, a7, 31
1005 slli a2, a2, 31
1006 leaf_return
1007
1008 .Ldiv_xnan_or_inf:
1009 /* Set the sign bit of the result. */
1010 srli a7, a3, 31
1011 slli a7, a7, 31
1012 xor a2, a2, a7
1013 /* If y is NaN or Inf, return NaN. */
1014 ball a3, a6, .Ldiv_return_nan
1015 slli a7, a2, 9
1016 bnez a7, .Ldiv_return_nan
1017 leaf_return
1018
1019 .Ldiv_ynan_or_inf:
1020 /* If y is Infinity, return zero. */
1021 slli a8, a3, 9
1022 beqz a8, .Ldiv_return_zero
1023 /* y is NaN; return it. */
1024 mov a2, a3
1025
1026 .Ldiv_return_nan:
1027 movi a4, 0x400000 /* make it a quiet NaN */
1028 or a2, a2, a4
1029 leaf_return
1030
1031 .align 4
1032 .global __divsf3
1033 .type __divsf3, @function
1034 __divsf3:
1035 leaf_entry sp, 16
1036 movi a6, 0x7f800000
1037
1038 /* Get the sign of the result. */
1039 xor a7, a2, a3
1040
1041 /* Check for NaN and infinity. */
1042 ball a2, a6, .Ldiv_xnan_or_inf
1043 ball a3, a6, .Ldiv_ynan_or_inf
1044
1045 /* Extract the exponents. */
1046 extui a8, a2, 23, 8
1047 extui a9, a3, 23, 8
1048
1049 beqz a9, .Ldiv_yexpzero
1050 .Ldiv_ynormalized:
1051 beqz a8, .Ldiv_xexpzero
1052 .Ldiv_xnormalized:
1053
1054 /* Subtract the exponents. */
1055 sub a8, a8, a9
1056
1057 /* Replace sign/exponent fields with explicit "1.0". */
1058 movi a10, 0xffffff
1059 or a2, a2, a6
1060 and a2, a2, a10
1061 or a3, a3, a6
1062 and a3, a3, a10
1063
1064 /* The first digit of the mantissa division must be a one.
1065 Shift x (and adjust the exponent) as needed to make this true. */
1066 bltu a3, a2, 1f
1067 slli a2, a2, 1
1068 addi a8, a8, -1
1069 1:
1070 /* Do the first subtraction and shift. */
1071 sub a2, a2, a3
1072 slli a2, a2, 1
1073
1074 /* Put the quotient into a10. */
1075 movi a10, 1
1076
1077 /* Divide one bit at a time for 23 bits. */
1078 movi a9, 23
1079 #if XCHAL_HAVE_LOOPS
1080 loop a9, .Ldiv_loopend
1081 #endif
1082 .Ldiv_loop:
1083 /* Shift the quotient << 1. */
1084 slli a10, a10, 1
1085
1086 /* Is this digit a 0 or 1? */
1087 bltu a2, a3, 1f
1088
1089 /* Output a 1 and subtract. */
1090 addi a10, a10, 1
1091 sub a2, a2, a3
1092
1093 /* Shift the dividend << 1. */
1094 1: slli a2, a2, 1
1095
1096 #if !XCHAL_HAVE_LOOPS
1097 addi a9, a9, -1
1098 bnez a9, .Ldiv_loop
1099 #endif
1100 .Ldiv_loopend:
1101
1102 /* Add the exponent bias (less one to account for the explicit "1.0"
1103 of the mantissa that will be added to the exponent in the final
1104 result). */
1105 addi a8, a8, 0x7e
1106
1107 /* Check for over/underflow. The value in a8 is one less than the
1108 final exponent, so values in the range 0..fd are OK here. */
1109 movi a4, 0xfe
1110 bgeu a8, a4, .Ldiv_overflow
1111
1112 .Ldiv_round:
1113 /* Round. The remainder (<< 1) is in a2. */
1114 bltu a2, a3, .Ldiv_rounded
1115 addi a10, a10, 1
1116 beq a2, a3, .Ldiv_exactlyhalf
1117
1118 .Ldiv_rounded:
1119 /* Add the exponent to the mantissa. */
1120 slli a8, a8, 23
1121 add a2, a10, a8
1122
1123 .Ldiv_addsign:
1124 /* Add the sign bit. */
1125 srli a7, a7, 31
1126 slli a7, a7, 31
1127 or a2, a2, a7
1128 leaf_return
1129
1130 .Ldiv_overflow:
1131 bltz a8, .Ldiv_underflow
1132 /* Return +/- Infinity. */
1133 addi a8, a4, 1 /* 0xff */
1134 slli a2, a8, 23
1135 j .Ldiv_addsign
1136
1137 .Ldiv_exactlyhalf:
1138 /* Remainder is exactly half the divisor. Round even. */
1139 srli a10, a10, 1
1140 slli a10, a10, 1
1141 j .Ldiv_rounded
1142
1143 .Ldiv_underflow:
1144 /* Create a subnormal value, where the exponent field contains zero,
1145 but the effective exponent is 1. The value of a8 is one less than
1146 the actual exponent, so just negate it to get the shift amount. */
1147 neg a8, a8
1148 ssr a8
1149 bgeui a8, 32, .Ldiv_flush_to_zero
1150
1151 /* Shift a10 right. Any bits that are shifted out of a10 are
1152 saved in a6 for rounding the result. */
1153 sll a6, a10
1154 srl a10, a10
1155
1156 /* Set the exponent to zero. */
1157 movi a8, 0
1158
1159 /* Pack any nonzero remainder (in a2) into a6. */
1160 beqz a2, 1f
1161 movi a9, 1
1162 or a6, a6, a9
1163
1164 /* Round a10 based on the bits shifted out into a6. */
1165 1: bgez a6, .Ldiv_rounded
1166 addi a10, a10, 1
1167 slli a6, a6, 1
1168 bnez a6, .Ldiv_rounded
1169 srli a10, a10, 1
1170 slli a10, a10, 1
1171 j .Ldiv_rounded
1172
1173 .Ldiv_flush_to_zero:
1174 /* Return zero with the appropriate sign bit. */
1175 srli a2, a7, 31
1176 slli a2, a2, 31
1177 leaf_return
1178
1179 #endif /* XCHAL_HAVE_FP_DIV */
1180
1181 #endif /* L_divsf3 */
1182
1183 #ifdef L_cmpsf2
1184
1185 /* Equal and Not Equal */
1186
1187 .align 4
1188 .global __eqsf2
1189 .global __nesf2
1190 .set __nesf2, __eqsf2
1191 .type __eqsf2, @function
1192 __eqsf2:
1193 leaf_entry sp, 16
1194 bne a2, a3, 4f
1195
1196 /* The values are equal but NaN != NaN. Check the exponent. */
1197 movi a6, 0x7f800000
1198 ball a2, a6, 3f
1199
1200 /* Equal. */
1201 movi a2, 0
1202 leaf_return
1203
1204 /* Not equal. */
1205 2: movi a2, 1
1206 leaf_return
1207
1208 /* Check if the mantissas are nonzero. */
1209 3: slli a7, a2, 9
1210 j 5f
1211
1212 /* Check if x and y are zero with different signs. */
1213 4: or a7, a2, a3
1214 slli a7, a7, 1
1215
1216 /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
1217 or x when exponent(x) = 0x7f8 and x == y. */
1218 5: movi a2, 0
1219 movi a3, 1
1220 movnez a2, a3, a7
1221 leaf_return
1222
1223
1224 /* Greater Than */
1225
1226 .align 4
1227 .global __gtsf2
1228 .type __gtsf2, @function
1229 __gtsf2:
1230 leaf_entry sp, 16
1231 movi a6, 0x7f800000
1232 ball a2, a6, 2f
1233 1: bnall a3, a6, .Lle_cmp
1234
1235 /* Check if y is a NaN. */
1236 slli a7, a3, 9
1237 beqz a7, .Lle_cmp
1238 movi a2, 0
1239 leaf_return
1240
1241 /* Check if x is a NaN. */
1242 2: slli a7, a2, 9
1243 beqz a7, 1b
1244 movi a2, 0
1245 leaf_return
1246
1247
1248 /* Less Than or Equal */
1249
1250 .align 4
1251 .global __lesf2
1252 .type __lesf2, @function
1253 __lesf2:
1254 leaf_entry sp, 16
1255 movi a6, 0x7f800000
1256 ball a2, a6, 2f
1257 1: bnall a3, a6, .Lle_cmp
1258
1259 /* Check if y is a NaN. */
1260 slli a7, a3, 9
1261 beqz a7, .Lle_cmp
1262 movi a2, 1
1263 leaf_return
1264
1265 /* Check if x is a NaN. */
1266 2: slli a7, a2, 9
1267 beqz a7, 1b
1268 movi a2, 1
1269 leaf_return
1270
1271 .Lle_cmp:
1272 /* Check if x and y have different signs. */
1273 xor a7, a2, a3
1274 bltz a7, .Lle_diff_signs
1275
1276 /* Check if x is negative. */
1277 bltz a2, .Lle_xneg
1278
1279 /* Check if x <= y. */
1280 bltu a3, a2, 5f
1281 4: movi a2, 0
1282 leaf_return
1283
1284 .Lle_xneg:
1285 /* Check if y <= x. */
1286 bgeu a2, a3, 4b
1287 5: movi a2, 1
1288 leaf_return
1289
1290 .Lle_diff_signs:
1291 bltz a2, 4b
1292
1293 /* Check if both x and y are zero. */
1294 or a7, a2, a3
1295 slli a7, a7, 1
1296 movi a2, 1
1297 movi a3, 0
1298 moveqz a2, a3, a7
1299 leaf_return
1300
1301
1302 /* Greater Than or Equal */
1303
1304 .align 4
1305 .global __gesf2
1306 .type __gesf2, @function
1307 __gesf2:
1308 leaf_entry sp, 16
1309 movi a6, 0x7f800000
1310 ball a2, a6, 2f
1311 1: bnall a3, a6, .Llt_cmp
1312
1313 /* Check if y is a NaN. */
1314 slli a7, a3, 9
1315 beqz a7, .Llt_cmp
1316 movi a2, -1
1317 leaf_return
1318
1319 /* Check if x is a NaN. */
1320 2: slli a7, a2, 9
1321 beqz a7, 1b
1322 movi a2, -1
1323 leaf_return
1324
1325
1326 /* Less Than */
1327
1328 .align 4
1329 .global __ltsf2
1330 .type __ltsf2, @function
1331 __ltsf2:
1332 leaf_entry sp, 16
1333 movi a6, 0x7f800000
1334 ball a2, a6, 2f
1335 1: bnall a3, a6, .Llt_cmp
1336
1337 /* Check if y is a NaN. */
1338 slli a7, a3, 9
1339 beqz a7, .Llt_cmp
1340 movi a2, 0
1341 leaf_return
1342
1343 /* Check if x is a NaN. */
1344 2: slli a7, a2, 9
1345 beqz a7, 1b
1346 movi a2, 0
1347 leaf_return
1348
1349 .Llt_cmp:
1350 /* Check if x and y have different signs. */
1351 xor a7, a2, a3
1352 bltz a7, .Llt_diff_signs
1353
1354 /* Check if x is negative. */
1355 bltz a2, .Llt_xneg
1356
1357 /* Check if x < y. */
1358 bgeu a2, a3, 5f
1359 4: movi a2, -1
1360 leaf_return
1361
1362 .Llt_xneg:
1363 /* Check if y < x. */
1364 bltu a3, a2, 4b
1365 5: movi a2, 0
1366 leaf_return
1367
1368 .Llt_diff_signs:
1369 bgez a2, 5b
1370
1371 /* Check if both x and y are nonzero. */
1372 or a7, a2, a3
1373 slli a7, a7, 1
1374 movi a2, 0
1375 movi a3, -1
1376 movnez a2, a3, a7
1377 leaf_return
1378
1379
1380 /* Unordered */
1381
1382 .align 4
1383 .global __unordsf2
1384 .type __unordsf2, @function
1385 __unordsf2:
1386 leaf_entry sp, 16
1387 movi a6, 0x7f800000
1388 ball a2, a6, 3f
1389 1: ball a3, a6, 4f
1390 2: movi a2, 0
1391 leaf_return
1392
1393 3: slli a7, a2, 9
1394 beqz a7, 1b
1395 movi a2, 1
1396 leaf_return
1397
1398 4: slli a7, a3, 9
1399 beqz a7, 2b
1400 movi a2, 1
1401 leaf_return
1402
1403 #endif /* L_cmpsf2 */
1404
1405 #ifdef L_fixsfsi
1406
1407 .align 4
1408 .global __fixsfsi
1409 .type __fixsfsi, @function
1410 __fixsfsi:
1411 leaf_entry sp, 16
1412
1413 /* Check for NaN and Infinity. */
1414 movi a6, 0x7f800000
1415 ball a2, a6, .Lfixsfsi_nan_or_inf
1416
1417 /* Extract the exponent and check if 0 < (exp - 0x7e) < 32. */
1418 extui a4, a2, 23, 8
1419 addi a4, a4, -0x7e
1420 bgei a4, 32, .Lfixsfsi_maxint
1421 blti a4, 1, .Lfixsfsi_zero
1422
1423 /* Add explicit "1.0" and shift << 8. */
1424 or a7, a2, a6
1425 slli a5, a7, 8
1426
1427 /* Shift back to the right, based on the exponent. */
1428 ssl a4 /* shift by 32 - a4 */
1429 srl a5, a5
1430
1431 /* Negate the result if sign != 0. */
1432 neg a2, a5
1433 movgez a2, a5, a7
1434 leaf_return
1435
1436 .Lfixsfsi_nan_or_inf:
1437 /* Handle Infinity and NaN. */
1438 slli a4, a2, 9
1439 beqz a4, .Lfixsfsi_maxint
1440
1441 /* Translate NaN to +maxint. */
1442 movi a2, 0
1443
1444 .Lfixsfsi_maxint:
1445 slli a4, a6, 8 /* 0x80000000 */
1446 addi a5, a4, -1 /* 0x7fffffff */
1447 movgez a4, a5, a2
1448 mov a2, a4
1449 leaf_return
1450
1451 .Lfixsfsi_zero:
1452 movi a2, 0
1453 leaf_return
1454
1455 #endif /* L_fixsfsi */
1456
1457 #ifdef L_fixsfdi
1458
1459 .align 4
1460 .global __fixsfdi
1461 .type __fixsfdi, @function
1462 __fixsfdi:
1463 leaf_entry sp, 16
1464
1465 /* Check for NaN and Infinity. */
1466 movi a6, 0x7f800000
1467 ball a2, a6, .Lfixsfdi_nan_or_inf
1468
1469 /* Extract the exponent and check if 0 < (exp - 0x7e) < 64. */
1470 extui a4, a2, 23, 8
1471 addi a4, a4, -0x7e
1472 bgei a4, 64, .Lfixsfdi_maxint
1473 blti a4, 1, .Lfixsfdi_zero
1474
1475 /* Add explicit "1.0" and shift << 8. */
1476 or a7, a2, a6
1477 slli xh, a7, 8
1478
1479 /* Shift back to the right, based on the exponent. */
1480 ssl a4 /* shift by 64 - a4 */
1481 bgei a4, 32, .Lfixsfdi_smallshift
1482 srl xl, xh
1483 movi xh, 0
1484
1485 .Lfixsfdi_shifted:
1486 /* Negate the result if sign != 0. */
1487 bgez a7, 1f
1488 neg xl, xl
1489 neg xh, xh
1490 beqz xl, 1f
1491 addi xh, xh, -1
1492 1: leaf_return
1493
1494 .Lfixsfdi_smallshift:
1495 movi xl, 0
1496 sll xl, xh
1497 srl xh, xh
1498 j .Lfixsfdi_shifted
1499
1500 .Lfixsfdi_nan_or_inf:
1501 /* Handle Infinity and NaN. */
1502 slli a4, a2, 9
1503 beqz a4, .Lfixsfdi_maxint
1504
1505 /* Translate NaN to +maxint. */
1506 movi a2, 0
1507
1508 .Lfixsfdi_maxint:
1509 slli a7, a6, 8 /* 0x80000000 */
1510 bgez a2, 1f
1511 mov xh, a7
1512 movi xl, 0
1513 leaf_return
1514
1515 1: addi xh, a7, -1 /* 0x7fffffff */
1516 movi xl, -1
1517 leaf_return
1518
1519 .Lfixsfdi_zero:
1520 movi xh, 0
1521 movi xl, 0
1522 leaf_return
1523
1524 #endif /* L_fixsfdi */
1525
1526 #ifdef L_fixunssfsi
1527
1528 .align 4
1529 .global __fixunssfsi
1530 .type __fixunssfsi, @function
1531 __fixunssfsi:
1532 leaf_entry sp, 16
1533
1534 /* Check for NaN and Infinity. */
1535 movi a6, 0x7f800000
1536 ball a2, a6, .Lfixunssfsi_nan_or_inf
1537
1538 /* Extract the exponent and check if 0 <= (exp - 0x7f) < 32. */
1539 extui a4, a2, 23, 8
1540 addi a4, a4, -0x7f
1541 bgei a4, 32, .Lfixunssfsi_maxint
1542 bltz a4, .Lfixunssfsi_zero
1543
1544 /* Add explicit "1.0" and shift << 8. */
1545 or a7, a2, a6
1546 slli a5, a7, 8
1547
1548 /* Shift back to the right, based on the exponent. */
1549 addi a4, a4, 1
1550 beqi a4, 32, .Lfixunssfsi_bigexp
1551 ssl a4 /* shift by 32 - a4 */
1552 srl a5, a5
1553
1554 /* Negate the result if sign != 0. */
1555 neg a2, a5
1556 movgez a2, a5, a7
1557 leaf_return
1558
1559 .Lfixunssfsi_nan_or_inf:
1560 /* Handle Infinity and NaN. */
1561 slli a4, a2, 9
1562 beqz a4, .Lfixunssfsi_maxint
1563
1564 /* Translate NaN to 0xffffffff. */
1565 movi a2, -1
1566 leaf_return
1567
1568 .Lfixunssfsi_maxint:
1569 slli a4, a6, 8 /* 0x80000000 */
1570 movi a5, -1 /* 0xffffffff */
1571 movgez a4, a5, a2
1572 mov a2, a4
1573 leaf_return
1574
1575 .Lfixunssfsi_zero:
1576 movi a2, 0
1577 leaf_return
1578
1579 .Lfixunssfsi_bigexp:
1580 /* Handle unsigned maximum exponent case. */
1581 bltz a2, 1f
1582 mov a2, a5 /* no shift needed */
1583 leaf_return
1584
1585 /* Return 0x80000000 if negative. */
1586 1: slli a2, a6, 8
1587 leaf_return
1588
1589 #endif /* L_fixunssfsi */
1590
1591 #ifdef L_fixunssfdi
1592
1593 .align 4
1594 .global __fixunssfdi
1595 .type __fixunssfdi, @function
1596 __fixunssfdi:
1597 leaf_entry sp, 16
1598
1599 /* Check for NaN and Infinity. */
1600 movi a6, 0x7f800000
1601 ball a2, a6, .Lfixunssfdi_nan_or_inf
1602
1603 /* Extract the exponent and check if 0 <= (exp - 0x7f) < 64. */
1604 extui a4, a2, 23, 8
1605 addi a4, a4, -0x7f
1606 bgei a4, 64, .Lfixunssfdi_maxint
1607 bltz a4, .Lfixunssfdi_zero
1608
1609 /* Add explicit "1.0" and shift << 8. */
1610 or a7, a2, a6
1611 slli xh, a7, 8
1612
1613 /* Shift back to the right, based on the exponent. */
1614 addi a4, a4, 1
1615 beqi a4, 64, .Lfixunssfdi_bigexp
1616 ssl a4 /* shift by 64 - a4 */
1617 bgei a4, 32, .Lfixunssfdi_smallshift
1618 srl xl, xh
1619 movi xh, 0
1620
1621 .Lfixunssfdi_shifted:
1622 /* Negate the result if sign != 0. */
1623 bgez a7, 1f
1624 neg xl, xl
1625 neg xh, xh
1626 beqz xl, 1f
1627 addi xh, xh, -1
1628 1: leaf_return
1629
1630 .Lfixunssfdi_smallshift:
1631 movi xl, 0
1632 src xl, xh, xl
1633 srl xh, xh
1634 j .Lfixunssfdi_shifted
1635
1636 .Lfixunssfdi_nan_or_inf:
1637 /* Handle Infinity and NaN. */
1638 slli a4, a2, 9
1639 beqz a4, .Lfixunssfdi_maxint
1640
1641 /* Translate NaN to 0xffffffff.... */
1642 1: movi xh, -1
1643 movi xl, -1
1644 leaf_return
1645
1646 .Lfixunssfdi_maxint:
1647 bgez a2, 1b
1648 2: slli xh, a6, 8 /* 0x80000000 */
1649 movi xl, 0
1650 leaf_return
1651
1652 .Lfixunssfdi_zero:
1653 movi xh, 0
1654 movi xl, 0
1655 leaf_return
1656
1657 .Lfixunssfdi_bigexp:
1658 /* Handle unsigned maximum exponent case. */
1659 bltz a7, 2b
1660 movi xl, 0
1661 leaf_return /* no shift needed */
1662
1663 #endif /* L_fixunssfdi */
1664
1665 #ifdef L_floatsisf
1666
1667 .align 4
1668 .global __floatunsisf
1669 .type __floatunsisf, @function
1670 __floatunsisf:
1671 leaf_entry sp, 16
1672 beqz a2, .Lfloatsisf_return
1673
1674 /* Set the sign to zero and jump to the floatsisf code. */
1675 movi a7, 0
1676 j .Lfloatsisf_normalize
1677
1678 .align 4
1679 .global __floatsisf
1680 .type __floatsisf, @function
1681 __floatsisf:
1682 leaf_entry sp, 16
1683
1684 /* Check for zero. */
1685 beqz a2, .Lfloatsisf_return
1686
1687 /* Save the sign. */
1688 extui a7, a2, 31, 1
1689
1690 /* Get the absolute value. */
1691 #if XCHAL_HAVE_ABS
1692 abs a2, a2
1693 #else
1694 neg a4, a2
1695 movltz a2, a4, a2
1696 #endif
1697
1698 .Lfloatsisf_normalize:
1699 /* Normalize with the first 1 bit in the msb. */
1700 do_nsau a4, a2, a5, a6
1701 ssl a4
1702 sll a5, a2
1703
1704 /* Shift the mantissa into position, with rounding bits in a6. */
1705 srli a2, a5, 8
1706 slli a6, a5, (32 - 8)
1707
1708 /* Set the exponent. */
1709 movi a5, 0x9d /* 0x7e + 31 */
1710 sub a5, a5, a4
1711 slli a5, a5, 23
1712 add a2, a2, a5
1713
1714 /* Add the sign. */
1715 slli a7, a7, 31
1716 or a2, a2, a7
1717
1718 /* Round up if the leftover fraction is >= 1/2. */
1719 bgez a6, .Lfloatsisf_return
1720 addi a2, a2, 1 /* Overflow to the exponent is OK. */
1721
1722 /* Check if the leftover fraction is exactly 1/2. */
1723 slli a6, a6, 1
1724 beqz a6, .Lfloatsisf_exactlyhalf
1725
1726 .Lfloatsisf_return:
1727 leaf_return
1728
1729 .Lfloatsisf_exactlyhalf:
1730 /* Round down to the nearest even value. */
1731 srli a2, a2, 1
1732 slli a2, a2, 1
1733 leaf_return
1734
1735 #endif /* L_floatsisf */
1736
1737 #ifdef L_floatdisf
1738
1739 .align 4
1740 .global __floatundisf
1741 .type __floatundisf, @function
1742 __floatundisf:
1743 leaf_entry sp, 16
1744
1745 /* Check for zero. */
1746 or a4, xh, xl
1747 beqz a4, 2f
1748
1749 /* Set the sign to zero and jump to the floatdisf code. */
1750 movi a7, 0
1751 j .Lfloatdisf_normalize
1752
1753 .align 4
1754 .global __floatdisf
1755 .type __floatdisf, @function
1756 __floatdisf:
1757 leaf_entry sp, 16
1758
1759 /* Check for zero. */
1760 or a4, xh, xl
1761 beqz a4, 2f
1762
1763 /* Save the sign. */
1764 extui a7, xh, 31, 1
1765
1766 /* Get the absolute value. */
1767 bgez xh, .Lfloatdisf_normalize
1768 neg xl, xl
1769 neg xh, xh
1770 beqz xl, .Lfloatdisf_normalize
1771 addi xh, xh, -1
1772
1773 .Lfloatdisf_normalize:
1774 /* Normalize with the first 1 bit in the msb of xh. */
1775 beqz xh, .Lfloatdisf_bigshift
1776 do_nsau a4, xh, a5, a6
1777 ssl a4
1778 src xh, xh, xl
1779 sll xl, xl
1780
1781 .Lfloatdisf_shifted:
1782 /* Shift the mantissa into position, with rounding bits in a6. */
1783 ssai 8
1784 sll a5, xl
1785 src a6, xh, xl
1786 srl xh, xh
1787 beqz a5, 1f
1788 movi a5, 1
1789 or a6, a6, a5
1790 1:
1791 /* Set the exponent. */
1792 movi a5, 0xbd /* 0x7e + 63 */
1793 sub a5, a5, a4
1794 slli a5, a5, 23
1795 add a2, xh, a5
1796
1797 /* Add the sign. */
1798 slli a7, a7, 31
1799 or a2, a2, a7
1800
1801 /* Round up if the leftover fraction is >= 1/2. */
1802 bgez a6, 2f
1803 addi a2, a2, 1 /* Overflow to the exponent is OK. */
1804
1805 /* Check if the leftover fraction is exactly 1/2. */
1806 slli a6, a6, 1
1807 beqz a6, .Lfloatdisf_exactlyhalf
1808 2: leaf_return
1809
1810 .Lfloatdisf_bigshift:
1811 /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */
1812 do_nsau a4, xl, a5, a6
1813 ssl a4
1814 sll xh, xl
1815 movi xl, 0
1816 addi a4, a4, 32
1817 j .Lfloatdisf_shifted
1818
1819 .Lfloatdisf_exactlyhalf:
1820 /* Round down to the nearest even value. */
1821 srli a2, a2, 1
1822 slli a2, a2, 1
1823 leaf_return
1824
1825 #endif /* L_floatdisf */
1826
1827 #if XCHAL_HAVE_FP_SQRT
1828 #ifdef L_sqrtf
1829 /* Square root */
1830
1831 .align 4
1832 .global __ieee754_sqrtf
1833 .type __ieee754_sqrtf, @function
1834 __ieee754_sqrtf:
1835 leaf_entry sp, 16
1836
1837 wfr f1, a2
1838
1839 sqrt0.s f2, f1
1840 const.s f3, 0
1841 maddn.s f3, f2, f2
1842 nexp01.s f4, f1
1843 const.s f0, 3
1844 addexp.s f4, f0
1845 maddn.s f0, f3, f4
1846 nexp01.s f3, f1
1847 neg.s f5, f3
1848 maddn.s f2, f0, f2
1849 const.s f0, 0
1850 const.s f6, 0
1851 const.s f7, 0
1852 maddn.s f0, f5, f2
1853 maddn.s f6, f2, f4
1854 const.s f4, 3
1855 maddn.s f7, f4, f2
1856 maddn.s f3, f0, f0
1857 maddn.s f4, f6, f2
1858 neg.s f2, f7
1859 maddn.s f0, f3, f2
1860 maddn.s f7, f4, f7
1861 mksadj.s f2, f1
1862 nexp01.s f1, f1
1863 maddn.s f1, f0, f0
1864 neg.s f3, f7
1865 addexpm.s f0, f2
1866 addexp.s f3, f2
1867 divn.s f0, f1, f3
1868
1869 rfr a2, f0
1870
1871 leaf_return
1872
1873 #endif /* L_sqrtf */
1874 #endif /* XCHAL_HAVE_FP_SQRT */
1875
1876 #if XCHAL_HAVE_FP_RECIP
1877 #ifdef L_recipsf2
1878 /* Reciprocal */
1879
1880 .align 4
1881 .global __recipsf2
1882 .type __recipsf2, @function
1883 __recipsf2:
1884 leaf_entry sp, 16
1885
1886 wfr f1, a2
1887
1888 recip0.s f0, f1
1889 const.s f2, 1
1890 msub.s f2, f1, f0
1891 maddn.s f0, f0, f2
1892 const.s f2, 1
1893 msub.s f2, f1, f0
1894 maddn.s f0, f0, f2
1895
1896 rfr a2, f0
1897
1898 leaf_return
1899
1900 #endif /* L_recipsf2 */
1901 #endif /* XCHAL_HAVE_FP_RECIP */
1902
1903 #if XCHAL_HAVE_FP_RSQRT
1904 #ifdef L_rsqrtsf2
1905 /* Reciprocal square root */
1906
1907 .align 4
1908 .global __rsqrtsf2
1909 .type __rsqrtsf2, @function
1910 __rsqrtsf2:
1911 leaf_entry sp, 16
1912
1913 wfr f1, a2
1914
1915 rsqrt0.s f0, f1
1916 mul.s f2, f1, f0
1917 const.s f3, 3;
1918 mul.s f4, f3, f0
1919 const.s f5, 1
1920 msub.s f5, f2, f0
1921 maddn.s f0, f4, f5
1922 mul.s f2, f1, f0
1923 mul.s f1, f3, f0
1924 const.s f3, 1
1925 msub.s f3, f2, f0
1926 maddn.s f0, f1, f3
1927
1928 rfr a2, f0
1929
1930 leaf_return
1931
1932 #endif /* L_rsqrtsf2 */
1933 #endif /* XCHAL_HAVE_FP_RSQRT */