]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgcc/config/xtensa/ieee754-df.S
Update copyright years.
[thirdparty/gcc.git] / libgcc / config / xtensa / ieee754-df.S
CommitLineData
134c8a50 1/* IEEE-754 double-precision functions for Xtensa
83ffe9cd 2 Copyright (C) 2006-2023 Free Software Foundation, Inc.
134c8a50
BW
3 Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
748086b7 9 the Free Software Foundation; either version 3, or (at your option)
134c8a50
BW
10 any later version.
11
134c8a50
BW
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
748086b7
JJ
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
134c8a50
BW
25
26#ifdef __XTENSA_EB__
27#define xh a2
28#define xl a3
29#define yh a4
30#define yl a5
31#else
32#define xh a3
33#define xl a2
34#define yh a5
35#define yl a4
36#endif
37
38/* Warning! The branch displacements for some Xtensa branch instructions
39 are quite small, and this code has been carefully laid out to keep
40 branch targets in range. If you change anything, be sure to check that
41 the assembler is not relaxing anything to branch over a jump. */
42
43#ifdef L_negdf2
44
45 .align 4
46 .global __negdf2
47 .type __negdf2, @function
48__negdf2:
b7974b3a 49 leaf_entry sp, 16
134c8a50
BW
50 movi a4, 0x80000000
51 xor xh, xh, a4
b7974b3a 52 leaf_return
134c8a50
BW
53
54#endif /* L_negdf2 */
55
56#ifdef L_addsubdf3
57
faef260e 58 .literal_position
134c8a50
BW
59 /* Addition */
60__adddf3_aux:
61
62 /* Handle NaNs and Infinities. (This code is placed before the
63 start of the function just to keep it in range of the limited
64 branch displacements.) */
65
66.Ladd_xnan_or_inf:
67 /* If y is neither Infinity nor NaN, return x. */
0889f168 68 bnall yh, a6, .Ladd_return_nan_or_inf
134c8a50
BW
69 /* If x is a NaN, return it. Otherwise, return y. */
70 slli a7, xh, 12
71 or a7, a7, xl
0889f168 72 bnez a7, .Ladd_return_nan
134c8a50
BW
73
74.Ladd_ynan_or_inf:
75 /* Return y. */
76 mov xh, yh
77 mov xl, yl
0889f168
MF
78
79.Ladd_return_nan_or_inf:
80 slli a7, xh, 12
81 or a7, a7, xl
82 bnez a7, .Ladd_return_nan
83 leaf_return
84
85.Ladd_return_nan:
86 movi a4, 0x80000 /* make it a quiet NaN */
87 or xh, xh, a4
b7974b3a 88 leaf_return
134c8a50
BW
89
90.Ladd_opposite_signs:
91 /* Operand signs differ. Do a subtraction. */
92 slli a7, a6, 11
93 xor yh, yh, a7
94 j .Lsub_same_sign
95
96 .align 4
97 .global __adddf3
98 .type __adddf3, @function
99__adddf3:
b7974b3a 100 leaf_entry sp, 16
134c8a50
BW
101 movi a6, 0x7ff00000
102
103 /* Check if the two operands have the same sign. */
104 xor a7, xh, yh
105 bltz a7, .Ladd_opposite_signs
106
107.Ladd_same_sign:
108 /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */
109 ball xh, a6, .Ladd_xnan_or_inf
110 ball yh, a6, .Ladd_ynan_or_inf
111
112 /* Compare the exponents. The smaller operand will be shifted
113 right by the exponent difference and added to the larger
114 one. */
115 extui a7, xh, 20, 12
116 extui a8, yh, 20, 12
117 bltu a7, a8, .Ladd_shiftx
118
119.Ladd_shifty:
120 /* Check if the smaller (or equal) exponent is zero. */
121 bnone yh, a6, .Ladd_yexpzero
122
123 /* Replace yh sign/exponent with 0x001. */
124 or yh, yh, a6
125 slli yh, yh, 11
126 srli yh, yh, 11
127
128.Ladd_yexpdiff:
129 /* Compute the exponent difference. Optimize for difference < 32. */
130 sub a10, a7, a8
131 bgeui a10, 32, .Ladd_bigshifty
132
133 /* Shift yh/yl right by the exponent difference. Any bits that are
134 shifted out of yl are saved in a9 for rounding the result. */
135 ssr a10
136 movi a9, 0
137 src a9, yl, a9
138 src yl, yh, yl
139 srl yh, yh
140
141.Ladd_addy:
142 /* Do the 64-bit addition. */
143 add xl, xl, yl
144 add xh, xh, yh
145 bgeu xl, yl, 1f
146 addi xh, xh, 1
1471:
148 /* Check if the add overflowed into the exponent. */
149 extui a10, xh, 20, 12
150 beq a10, a7, .Ladd_round
151 mov a8, a7
152 j .Ladd_carry
153
154.Ladd_yexpzero:
155 /* y is a subnormal value. Replace its sign/exponent with zero,
156 i.e., no implicit "1.0", and increment the apparent exponent
157 because subnormals behave as if they had the minimum (nonzero)
158 exponent. Test for the case when both exponents are zero. */
159 slli yh, yh, 12
160 srli yh, yh, 12
161 bnone xh, a6, .Ladd_bothexpzero
162 addi a8, a8, 1
163 j .Ladd_yexpdiff
164
165.Ladd_bothexpzero:
166 /* Both exponents are zero. Handle this as a special case. There
167 is no need to shift or round, and the normal code for handling
168 a carry into the exponent field will not work because it
169 assumes there is an implicit "1.0" that needs to be added. */
170 add xl, xl, yl
171 add xh, xh, yh
172 bgeu xl, yl, 1f
173 addi xh, xh, 1
b7974b3a 1741: leaf_return
134c8a50
BW
175
176.Ladd_bigshifty:
177 /* Exponent difference > 64 -- just return the bigger value. */
178 bgeui a10, 64, 1b
179
180 /* Shift yh/yl right by the exponent difference. Any bits that are
181 shifted out are saved in a9 for rounding the result. */
182 ssr a10
183 sll a11, yl /* lost bits shifted out of yl */
184 src a9, yh, yl
185 srl yl, yh
186 movi yh, 0
187 beqz a11, .Ladd_addy
188 or a9, a9, a10 /* any positive, nonzero value will work */
189 j .Ladd_addy
190
191.Ladd_xexpzero:
192 /* Same as "yexpzero" except skip handling the case when both
193 exponents are zero. */
194 slli xh, xh, 12
195 srli xh, xh, 12
196 addi a7, a7, 1
197 j .Ladd_xexpdiff
198
199.Ladd_shiftx:
200 /* Same thing as the "shifty" code, but with x and y swapped. Also,
201 because the exponent difference is always nonzero in this version,
202 the shift sequence can use SLL and skip loading a constant zero. */
203 bnone xh, a6, .Ladd_xexpzero
204
205 or xh, xh, a6
206 slli xh, xh, 11
207 srli xh, xh, 11
208
209.Ladd_xexpdiff:
210 sub a10, a8, a7
211 bgeui a10, 32, .Ladd_bigshiftx
212
213 ssr a10
214 sll a9, xl
215 src xl, xh, xl
216 srl xh, xh
217
218.Ladd_addx:
219 add xl, xl, yl
220 add xh, xh, yh
221 bgeu xl, yl, 1f
222 addi xh, xh, 1
2231:
224 /* Check if the add overflowed into the exponent. */
225 extui a10, xh, 20, 12
226 bne a10, a8, .Ladd_carry
227
228.Ladd_round:
229 /* Round up if the leftover fraction is >= 1/2. */
230 bgez a9, 1f
231 addi xl, xl, 1
232 beqz xl, .Ladd_roundcarry
233
234 /* Check if the leftover fraction is exactly 1/2. */
235 slli a9, a9, 1
236 beqz a9, .Ladd_exactlyhalf
b7974b3a 2371: leaf_return
134c8a50
BW
238
239.Ladd_bigshiftx:
240 /* Mostly the same thing as "bigshifty".... */
241 bgeui a10, 64, .Ladd_returny
242
243 ssr a10
244 sll a11, xl
245 src a9, xh, xl
246 srl xl, xh
247 movi xh, 0
248 beqz a11, .Ladd_addx
249 or a9, a9, a10
250 j .Ladd_addx
251
252.Ladd_returny:
253 mov xh, yh
254 mov xl, yl
b7974b3a 255 leaf_return
134c8a50
BW
256
257.Ladd_carry:
258 /* The addition has overflowed into the exponent field, so the
259 value needs to be renormalized. The mantissa of the result
260 can be recovered by subtracting the original exponent and
261 adding 0x100000 (which is the explicit "1.0" for the
262 mantissa of the non-shifted operand -- the "1.0" for the
263 shifted operand was already added). The mantissa can then
264 be shifted right by one bit. The explicit "1.0" of the
265 shifted mantissa then needs to be replaced by the exponent,
266 incremented by one to account for the normalizing shift.
267 It is faster to combine these operations: do the shift first
268 and combine the additions and subtractions. If x is the
269 original exponent, the result is:
270 shifted mantissa - (x << 19) + (1 << 19) + (x << 20)
271 or:
272 shifted mantissa + ((x + 1) << 19)
273 Note that the exponent is incremented here by leaving the
274 explicit "1.0" of the mantissa in the exponent field. */
275
276 /* Shift xh/xl right by one bit. Save the lsb of xl. */
277 mov a10, xl
278 ssai 1
279 src xl, xh, xl
280 srl xh, xh
281
282 /* See explanation above. The original exponent is in a8. */
283 addi a8, a8, 1
284 slli a8, a8, 19
285 add xh, xh, a8
286
287 /* Return an Infinity if the exponent overflowed. */
288 ball xh, a6, .Ladd_infinity
289
290 /* Same thing as the "round" code except the msb of the leftover
291 fraction is bit 0 of a10, with the rest of the fraction in a9. */
292 bbci.l a10, 0, 1f
293 addi xl, xl, 1
294 beqz xl, .Ladd_roundcarry
295 beqz a9, .Ladd_exactlyhalf
b7974b3a 2961: leaf_return
134c8a50
BW
297
298.Ladd_infinity:
299 /* Clear the mantissa. */
300 movi xl, 0
301 srli xh, xh, 20
302 slli xh, xh, 20
303
304 /* The sign bit may have been lost in a carry-out. Put it back. */
305 slli a8, a8, 1
306 or xh, xh, a8
b7974b3a 307 leaf_return
134c8a50
BW
308
309.Ladd_exactlyhalf:
310 /* Round down to the nearest even value. */
311 srli xl, xl, 1
312 slli xl, xl, 1
b7974b3a 313 leaf_return
134c8a50
BW
314
315.Ladd_roundcarry:
316 /* xl is always zero when the rounding increment overflows, so
317 there's no need to round it to an even value. */
318 addi xh, xh, 1
319 /* Overflow to the exponent is OK. */
b7974b3a 320 leaf_return
134c8a50
BW
321
322
323 /* Subtraction */
324__subdf3_aux:
325
326 /* Handle NaNs and Infinities. (This code is placed before the
327 start of the function just to keep it in range of the limited
328 branch displacements.) */
329
330.Lsub_xnan_or_inf:
331 /* If y is neither Infinity nor NaN, return x. */
0889f168
MF
332 bnall yh, a6, .Lsub_return_nan_or_inf
333
334.Lsub_return_nan:
134c8a50
BW
335 /* Both x and y are either NaN or Inf, so the result is NaN. */
336 movi a4, 0x80000 /* make it a quiet NaN */
337 or xh, xh, a4
0889f168 338 leaf_return
134c8a50
BW
339
340.Lsub_ynan_or_inf:
341 /* Negate y and return it. */
342 slli a7, a6, 11
343 xor xh, yh, a7
344 mov xl, yl
0889f168
MF
345
346.Lsub_return_nan_or_inf:
347 slli a7, xh, 12
348 or a7, a7, xl
349 bnez a7, .Lsub_return_nan
b7974b3a 350 leaf_return
134c8a50
BW
351
352.Lsub_opposite_signs:
353 /* Operand signs differ. Do an addition. */
354 slli a7, a6, 11
355 xor yh, yh, a7
356 j .Ladd_same_sign
357
358 .align 4
359 .global __subdf3
360 .type __subdf3, @function
361__subdf3:
b7974b3a 362 leaf_entry sp, 16
134c8a50
BW
363 movi a6, 0x7ff00000
364
365 /* Check if the two operands have the same sign. */
366 xor a7, xh, yh
367 bltz a7, .Lsub_opposite_signs
368
369.Lsub_same_sign:
370 /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */
371 ball xh, a6, .Lsub_xnan_or_inf
372 ball yh, a6, .Lsub_ynan_or_inf
373
374 /* Compare the operands. In contrast to addition, the entire
375 value matters here. */
376 extui a7, xh, 20, 11
377 extui a8, yh, 20, 11
378 bltu xh, yh, .Lsub_xsmaller
379 beq xh, yh, .Lsub_compare_low
380
381.Lsub_ysmaller:
382 /* Check if the smaller (or equal) exponent is zero. */
383 bnone yh, a6, .Lsub_yexpzero
384
385 /* Replace yh sign/exponent with 0x001. */
386 or yh, yh, a6
387 slli yh, yh, 11
388 srli yh, yh, 11
389
390.Lsub_yexpdiff:
391 /* Compute the exponent difference. Optimize for difference < 32. */
392 sub a10, a7, a8
393 bgeui a10, 32, .Lsub_bigshifty
394
395 /* Shift yh/yl right by the exponent difference. Any bits that are
396 shifted out of yl are saved in a9 for rounding the result. */
397 ssr a10
398 movi a9, 0
399 src a9, yl, a9
400 src yl, yh, yl
401 srl yh, yh
402
403.Lsub_suby:
404 /* Do the 64-bit subtraction. */
405 sub xh, xh, yh
406 bgeu xl, yl, 1f
407 addi xh, xh, -1
4081: sub xl, xl, yl
409
410 /* Subtract the leftover bits in a9 from zero and propagate any
411 borrow from xh/xl. */
412 neg a9, a9
413 beqz a9, 1f
414 addi a5, xh, -1
415 moveqz xh, a5, xl
416 addi xl, xl, -1
4171:
418 /* Check if the subtract underflowed into the exponent. */
419 extui a10, xh, 20, 11
420 beq a10, a7, .Lsub_round
421 j .Lsub_borrow
422
423.Lsub_compare_low:
424 /* The high words are equal. Compare the low words. */
425 bltu xl, yl, .Lsub_xsmaller
426 bltu yl, xl, .Lsub_ysmaller
427 /* The operands are equal. Return 0.0. */
428 movi xh, 0
429 movi xl, 0
b7974b3a 4301: leaf_return
134c8a50
BW
431
432.Lsub_yexpzero:
433 /* y is a subnormal value. Replace its sign/exponent with zero,
434 i.e., no implicit "1.0". Unless x is also a subnormal, increment
435 y's apparent exponent because subnormals behave as if they had
436 the minimum (nonzero) exponent. */
437 slli yh, yh, 12
438 srli yh, yh, 12
439 bnone xh, a6, .Lsub_yexpdiff
440 addi a8, a8, 1
441 j .Lsub_yexpdiff
442
443.Lsub_bigshifty:
444 /* Exponent difference > 64 -- just return the bigger value. */
445 bgeui a10, 64, 1b
446
447 /* Shift yh/yl right by the exponent difference. Any bits that are
448 shifted out are saved in a9 for rounding the result. */
449 ssr a10
450 sll a11, yl /* lost bits shifted out of yl */
451 src a9, yh, yl
452 srl yl, yh
453 movi yh, 0
454 beqz a11, .Lsub_suby
455 or a9, a9, a10 /* any positive, nonzero value will work */
456 j .Lsub_suby
457
458.Lsub_xsmaller:
459 /* Same thing as the "ysmaller" code, but with x and y swapped and
460 with y negated. */
461 bnone xh, a6, .Lsub_xexpzero
462
463 or xh, xh, a6
464 slli xh, xh, 11
465 srli xh, xh, 11
466
467.Lsub_xexpdiff:
468 sub a10, a8, a7
469 bgeui a10, 32, .Lsub_bigshiftx
470
471 ssr a10
472 movi a9, 0
473 src a9, xl, a9
474 src xl, xh, xl
475 srl xh, xh
476
477 /* Negate y. */
478 slli a11, a6, 11
479 xor yh, yh, a11
480
481.Lsub_subx:
482 sub xl, yl, xl
483 sub xh, yh, xh
484 bgeu yl, xl, 1f
485 addi xh, xh, -1
4861:
487 /* Subtract the leftover bits in a9 from zero and propagate any
488 borrow from xh/xl. */
489 neg a9, a9
490 beqz a9, 1f
491 addi a5, xh, -1
492 moveqz xh, a5, xl
493 addi xl, xl, -1
4941:
495 /* Check if the subtract underflowed into the exponent. */
496 extui a10, xh, 20, 11
497 bne a10, a8, .Lsub_borrow
498
499.Lsub_round:
500 /* Round up if the leftover fraction is >= 1/2. */
501 bgez a9, 1f
502 addi xl, xl, 1
503 beqz xl, .Lsub_roundcarry
504
505 /* Check if the leftover fraction is exactly 1/2. */
506 slli a9, a9, 1
507 beqz a9, .Lsub_exactlyhalf
b7974b3a 5081: leaf_return
134c8a50
BW
509
510.Lsub_xexpzero:
511 /* Same as "yexpzero". */
512 slli xh, xh, 12
513 srli xh, xh, 12
514 bnone yh, a6, .Lsub_xexpdiff
515 addi a7, a7, 1
516 j .Lsub_xexpdiff
517
518.Lsub_bigshiftx:
519 /* Mostly the same thing as "bigshifty", but with the sign bit of the
520 shifted value set so that the subsequent subtraction flips the
521 sign of y. */
522 bgeui a10, 64, .Lsub_returny
523
524 ssr a10
525 sll a11, xl
526 src a9, xh, xl
527 srl xl, xh
528 slli xh, a6, 11 /* set sign bit of xh */
529 beqz a11, .Lsub_subx
530 or a9, a9, a10
531 j .Lsub_subx
532
533.Lsub_returny:
534 /* Negate and return y. */
535 slli a7, a6, 11
536 xor xh, yh, a7
537 mov xl, yl
b7974b3a 538 leaf_return
134c8a50
BW
539
540.Lsub_borrow:
541 /* The subtraction has underflowed into the exponent field, so the
542 value needs to be renormalized. Shift the mantissa left as
543 needed to remove any leading zeros and adjust the exponent
544 accordingly. If the exponent is not large enough to remove
545 all the leading zeros, the result will be a subnormal value. */
546
547 slli a8, xh, 12
548 beqz a8, .Lsub_xhzero
549 do_nsau a6, a8, a7, a11
550 srli a8, a8, 12
551 bge a6, a10, .Lsub_subnormal
552 addi a6, a6, 1
553
554.Lsub_shift_lt32:
555 /* Shift the mantissa (a8/xl/a9) left by a6. */
556 ssl a6
557 src a8, a8, xl
558 src xl, xl, a9
559 sll a9, a9
560
561 /* Combine the shifted mantissa with the sign and exponent,
562 decrementing the exponent by a6. (The exponent has already
563 been decremented by one due to the borrow from the subtraction,
564 but adding the mantissa will increment the exponent by one.) */
565 srli xh, xh, 20
566 sub xh, xh, a6
567 slli xh, xh, 20
568 add xh, xh, a8
569 j .Lsub_round
570
571.Lsub_exactlyhalf:
572 /* Round down to the nearest even value. */
573 srli xl, xl, 1
574 slli xl, xl, 1
b7974b3a 575 leaf_return
134c8a50
BW
576
577.Lsub_roundcarry:
578 /* xl is always zero when the rounding increment overflows, so
579 there's no need to round it to an even value. */
580 addi xh, xh, 1
581 /* Overflow to the exponent is OK. */
b7974b3a 582 leaf_return
134c8a50
BW
583
584.Lsub_xhzero:
585 /* When normalizing the result, all the mantissa bits in the high
586 word are zero. Shift by "20 + (leading zero count of xl) + 1". */
587 do_nsau a6, xl, a7, a11
588 addi a6, a6, 21
589 blt a10, a6, .Lsub_subnormal
590
591.Lsub_normalize_shift:
592 bltui a6, 32, .Lsub_shift_lt32
593
594 ssl a6
595 src a8, xl, a9
596 sll xl, a9
597 movi a9, 0
598
599 srli xh, xh, 20
600 sub xh, xh, a6
601 slli xh, xh, 20
602 add xh, xh, a8
603 j .Lsub_round
604
605.Lsub_subnormal:
606 /* The exponent is too small to shift away all the leading zeros.
607 Set a6 to the current exponent (which has already been
608 decremented by the borrow) so that the exponent of the result
609 will be zero. Do not add 1 to a6 in this case, because: (1)
610 adding the mantissa will not increment the exponent, so there is
611 no need to subtract anything extra from the exponent to
612 compensate, and (2) the effective exponent of a subnormal is 1
613 not 0 so the shift amount must be 1 smaller than normal. */
614 mov a6, a10
615 j .Lsub_normalize_shift
616
617#endif /* L_addsubdf3 */
618
619#ifdef L_muldf3
620
621 /* Multiplication */
7f0ee694
BW
622#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
623#define XCHAL_NO_MUL 1
624#endif
625
9bfcbdee 626 .literal_position
134c8a50
BW
627__muldf3_aux:
628
629 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
630 (This code is placed before the start of the function just to
631 keep it in range of the limited branch displacements.) */
632
633.Lmul_xexpzero:
634 /* Clear the sign bit of x. */
635 slli xh, xh, 1
636 srli xh, xh, 1
637
638 /* If x is zero, return zero. */
639 or a10, xh, xl
640 beqz a10, .Lmul_return_zero
641
642 /* Normalize x. Adjust the exponent in a8. */
643 beqz xh, .Lmul_xh_zero
644 do_nsau a10, xh, a11, a12
645 addi a10, a10, -11
646 ssl a10
647 src xh, xh, xl
648 sll xl, xl
649 movi a8, 1
650 sub a8, a8, a10
651 j .Lmul_xnormalized
652.Lmul_xh_zero:
653 do_nsau a10, xl, a11, a12
654 addi a10, a10, -11
655 movi a8, -31
656 sub a8, a8, a10
657 ssl a10
658 bltz a10, .Lmul_xl_srl
659 sll xh, xl
660 movi xl, 0
661 j .Lmul_xnormalized
662.Lmul_xl_srl:
663 srl xh, xl
664 sll xl, xl
665 j .Lmul_xnormalized
666
667.Lmul_yexpzero:
668 /* Clear the sign bit of y. */
669 slli yh, yh, 1
670 srli yh, yh, 1
671
672 /* If y is zero, return zero. */
673 or a10, yh, yl
674 beqz a10, .Lmul_return_zero
675
676 /* Normalize y. Adjust the exponent in a9. */
677 beqz yh, .Lmul_yh_zero
678 do_nsau a10, yh, a11, a12
679 addi a10, a10, -11
680 ssl a10
681 src yh, yh, yl
682 sll yl, yl
683 movi a9, 1
684 sub a9, a9, a10
685 j .Lmul_ynormalized
686.Lmul_yh_zero:
687 do_nsau a10, yl, a11, a12
688 addi a10, a10, -11
689 movi a9, -31
690 sub a9, a9, a10
691 ssl a10
692 bltz a10, .Lmul_yl_srl
693 sll yh, yl
694 movi yl, 0
695 j .Lmul_ynormalized
696.Lmul_yl_srl:
697 srl yh, yl
698 sll yl, yl
699 j .Lmul_ynormalized
700
701.Lmul_return_zero:
702 /* Return zero with the appropriate sign bit. */
703 srli xh, a7, 31
704 slli xh, xh, 31
705 movi xl, 0
706 j .Lmul_done
707
708.Lmul_xnan_or_inf:
709 /* If y is zero, return NaN. */
710 bnez yl, 1f
711 slli a8, yh, 1
0889f168 712 beqz a8, .Lmul_return_nan
134c8a50
BW
7131:
714 /* If y is NaN, return y. */
715 bnall yh, a6, .Lmul_returnx
716 slli a8, yh, 12
717 or a8, a8, yl
718 beqz a8, .Lmul_returnx
719
720.Lmul_returny:
721 mov xh, yh
722 mov xl, yl
723
724.Lmul_returnx:
0889f168
MF
725 slli a8, xh, 12
726 or a8, a8, xl
727 bnez a8, .Lmul_return_nan
134c8a50
BW
728 /* Set the sign bit and return. */
729 extui a7, a7, 31, 1
730 slli xh, xh, 1
731 ssai 1
732 src xh, a7, xh
733 j .Lmul_done
734
735.Lmul_ynan_or_inf:
736 /* If x is zero, return NaN. */
737 bnez xl, .Lmul_returny
738 slli a8, xh, 1
739 bnez a8, .Lmul_returny
0889f168
MF
740 mov xh, yh
741
742.Lmul_return_nan:
743 movi a4, 0x80000 /* make it a quiet NaN */
744 or xh, xh, a4
134c8a50
BW
745 j .Lmul_done
746
747 .align 4
748 .global __muldf3
749 .type __muldf3, @function
750__muldf3:
134c8a50 751#if __XTENSA_CALL0_ABI__
7f0ee694 752 leaf_entry sp, 32
134c8a50
BW
753 addi sp, sp, -32
754 s32i a12, sp, 16
755 s32i a13, sp, 20
756 s32i a14, sp, 24
757 s32i a15, sp, 28
7f0ee694
BW
758#elif XCHAL_NO_MUL
759 /* This is not really a leaf function; allocate enough stack space
760 to allow CALL12s to a helper function. */
761 leaf_entry sp, 64
762#else
763 leaf_entry sp, 32
134c8a50
BW
764#endif
765 movi a6, 0x7ff00000
766
767 /* Get the sign of the result. */
768 xor a7, xh, yh
769
770 /* Check for NaN and infinity. */
771 ball xh, a6, .Lmul_xnan_or_inf
772 ball yh, a6, .Lmul_ynan_or_inf
773
774 /* Extract the exponents. */
775 extui a8, xh, 20, 11
776 extui a9, yh, 20, 11
777
778 beqz a8, .Lmul_xexpzero
779.Lmul_xnormalized:
780 beqz a9, .Lmul_yexpzero
781.Lmul_ynormalized:
782
783 /* Add the exponents. */
784 add a8, a8, a9
785
786 /* Replace sign/exponent fields with explicit "1.0". */
787 movi a10, 0x1fffff
788 or xh, xh, a6
789 and xh, xh, a10
790 or yh, yh, a6
791 and yh, yh, a10
792
793 /* Multiply 64x64 to 128 bits. The result ends up in xh/xl/a6.
794 The least-significant word of the result is thrown away except
795 that if it is nonzero, the lsb of a6 is set to 1. */
796#if XCHAL_HAVE_MUL32_HIGH
797
798 /* Compute a6 with any carry-outs in a10. */
799 movi a10, 0
800 mull a6, xl, yh
801 mull a11, xh, yl
802 add a6, a6, a11
803 bgeu a6, a11, 1f
804 addi a10, a10, 1
8051:
806 muluh a11, xl, yl
807 add a6, a6, a11
808 bgeu a6, a11, 1f
809 addi a10, a10, 1
8101:
811 /* If the low word of the result is nonzero, set the lsb of a6. */
812 mull a11, xl, yl
813 beqz a11, 1f
814 movi a9, 1
815 or a6, a6, a9
8161:
817 /* Compute xl with any carry-outs in a9. */
818 movi a9, 0
819 mull a11, xh, yh
820 add a10, a10, a11
821 bgeu a10, a11, 1f
822 addi a9, a9, 1
8231:
824 muluh a11, xh, yl
825 add a10, a10, a11
826 bgeu a10, a11, 1f
827 addi a9, a9, 1
8281:
829 muluh xl, xl, yh
830 add xl, xl, a10
831 bgeu xl, a10, 1f
832 addi a9, a9, 1
8331:
834 /* Compute xh. */
835 muluh xh, xh, yh
836 add xh, xh, a9
837
7f0ee694 838#else /* ! XCHAL_HAVE_MUL32_HIGH */
134c8a50
BW
839
840 /* Break the inputs into 16-bit chunks and compute 16 32-bit partial
841 products. These partial products are:
842
843 0 xll * yll
844
845 1 xll * ylh
846 2 xlh * yll
847
848 3 xll * yhl
849 4 xlh * ylh
850 5 xhl * yll
851
852 6 xll * yhh
853 7 xlh * yhl
854 8 xhl * ylh
855 9 xhh * yll
856
857 10 xlh * yhh
858 11 xhl * yhl
859 12 xhh * ylh
860
861 13 xhl * yhh
862 14 xhh * yhl
863
864 15 xhh * yhh
865
866 where the input chunks are (hh, hl, lh, ll). If using the Mul16
867 or Mul32 multiplier options, these input chunks must be stored in
868 separate registers. For Mac16, the UMUL.AA.* opcodes can specify
869 that the inputs come from either half of the registers, so there
870 is no need to shift them out ahead of time. If there is no
871 multiply hardware, the 16-bit chunks can be extracted when setting
872 up the arguments to the separate multiply function. */
873
874 /* Save a7 since it is needed to hold a temporary value. */
875 s32i a7, sp, 4
7f0ee694 876#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
134c8a50
BW
877 /* Calling a separate multiply function will clobber a0 and requires
878 use of a8 as a temporary, so save those values now. (The function
879 uses a custom ABI so nothing else needs to be saved.) */
880 s32i a0, sp, 0
881 s32i a8, sp, 8
882#endif
883
884#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
885
886#define xlh a12
887#define ylh a13
888#define xhh a14
889#define yhh a15
890
891 /* Get the high halves of the inputs into registers. */
892 srli xlh, xl, 16
893 srli ylh, yl, 16
894 srli xhh, xh, 16
895 srli yhh, yh, 16
896
897#define xll xl
898#define yll yl
899#define xhl xh
900#define yhl yh
901
902#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
903 /* Clear the high halves of the inputs. This does not matter
904 for MUL16 because the high bits are ignored. */
905 extui xl, xl, 0, 16
906 extui xh, xh, 0, 16
907 extui yl, yl, 0, 16
908 extui yh, yh, 0, 16
909#endif
910#endif /* MUL16 || MUL32 */
911
912
913#if XCHAL_HAVE_MUL16
914
915#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
916 mul16u dst, xreg ## xhalf, yreg ## yhalf
917
918#elif XCHAL_HAVE_MUL32
919
920#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
921 mull dst, xreg ## xhalf, yreg ## yhalf
922
923#elif XCHAL_HAVE_MAC16
924
925/* The preprocessor insists on inserting a space when concatenating after
926 a period in the definition of do_mul below. These macros are a workaround
927 using underscores instead of periods when doing the concatenation. */
928#define umul_aa_ll umul.aa.ll
929#define umul_aa_lh umul.aa.lh
930#define umul_aa_hl umul.aa.hl
931#define umul_aa_hh umul.aa.hh
932
933#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
934 umul_aa_ ## xhalf ## yhalf xreg, yreg; \
935 rsr dst, ACCLO
936
937#else /* no multiply hardware */
938
939#define set_arg_l(dst, src) \
940 extui dst, src, 0, 16
941#define set_arg_h(dst, src) \
942 srli dst, src, 16
943
7f0ee694 944#if __XTENSA_CALL0_ABI__
134c8a50
BW
945#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
946 set_arg_ ## xhalf (a13, xreg); \
947 set_arg_ ## yhalf (a14, yreg); \
948 call0 .Lmul_mulsi3; \
949 mov dst, a12
7f0ee694
BW
950#else
951#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
952 set_arg_ ## xhalf (a14, xreg); \
953 set_arg_ ## yhalf (a15, yreg); \
954 call12 .Lmul_mulsi3; \
955 mov dst, a14
956#endif /* __XTENSA_CALL0_ABI__ */
957
958#endif /* no multiply hardware */
134c8a50
BW
959
960 /* Add pp1 and pp2 into a10 with carry-out in a9. */
961 do_mul(a10, xl, l, yl, h) /* pp 1 */
962 do_mul(a11, xl, h, yl, l) /* pp 2 */
963 movi a9, 0
964 add a10, a10, a11
965 bgeu a10, a11, 1f
966 addi a9, a9, 1
9671:
968 /* Initialize a6 with a9/a10 shifted into position. Note that
969 this value can be safely incremented without any carry-outs. */
970 ssai 16
971 src a6, a9, a10
972
973 /* Compute the low word into a10. */
974 do_mul(a11, xl, l, yl, l) /* pp 0 */
975 sll a10, a10
976 add a10, a10, a11
977 bgeu a10, a11, 1f
978 addi a6, a6, 1
9791:
980 /* Compute the contributions of pp0-5 to a6, with carry-outs in a9.
981 This is good enough to determine the low half of a6, so that any
982 nonzero bits from the low word of the result can be collapsed
983 into a6, freeing up a register. */
984 movi a9, 0
985 do_mul(a11, xl, l, yh, l) /* pp 3 */
986 add a6, a6, a11
987 bgeu a6, a11, 1f
988 addi a9, a9, 1
9891:
990 do_mul(a11, xl, h, yl, h) /* pp 4 */
991 add a6, a6, a11
992 bgeu a6, a11, 1f
993 addi a9, a9, 1
9941:
995 do_mul(a11, xh, l, yl, l) /* pp 5 */
996 add a6, a6, a11
997 bgeu a6, a11, 1f
998 addi a9, a9, 1
9991:
1000 /* Collapse any nonzero bits from the low word into a6. */
1001 beqz a10, 1f
1002 movi a11, 1
1003 or a6, a6, a11
10041:
1005 /* Add pp6-9 into a11 with carry-outs in a10. */
1006 do_mul(a7, xl, l, yh, h) /* pp 6 */
1007 do_mul(a11, xh, h, yl, l) /* pp 9 */
1008 movi a10, 0
1009 add a11, a11, a7
1010 bgeu a11, a7, 1f
1011 addi a10, a10, 1
10121:
1013 do_mul(a7, xl, h, yh, l) /* pp 7 */
1014 add a11, a11, a7
1015 bgeu a11, a7, 1f
1016 addi a10, a10, 1
10171:
1018 do_mul(a7, xh, l, yl, h) /* pp 8 */
1019 add a11, a11, a7
1020 bgeu a11, a7, 1f
1021 addi a10, a10, 1
10221:
1023 /* Shift a10/a11 into position, and add low half of a11 to a6. */
1024 src a10, a10, a11
1025 add a10, a10, a9
1026 sll a11, a11
1027 add a6, a6, a11
1028 bgeu a6, a11, 1f
1029 addi a10, a10, 1
10301:
1031 /* Add pp10-12 into xl with carry-outs in a9. */
1032 movi a9, 0
1033 do_mul(xl, xl, h, yh, h) /* pp 10 */
1034 add xl, xl, a10
1035 bgeu xl, a10, 1f
1036 addi a9, a9, 1
10371:
1038 do_mul(a10, xh, l, yh, l) /* pp 11 */
1039 add xl, xl, a10
1040 bgeu xl, a10, 1f
1041 addi a9, a9, 1
10421:
1043 do_mul(a10, xh, h, yl, h) /* pp 12 */
1044 add xl, xl, a10
1045 bgeu xl, a10, 1f
1046 addi a9, a9, 1
10471:
1048 /* Add pp13-14 into a11 with carry-outs in a10. */
1049 do_mul(a11, xh, l, yh, h) /* pp 13 */
1050 do_mul(a7, xh, h, yh, l) /* pp 14 */
1051 movi a10, 0
1052 add a11, a11, a7
1053 bgeu a11, a7, 1f
1054 addi a10, a10, 1
10551:
1056 /* Shift a10/a11 into position, and add low half of a11 to a6. */
1057 src a10, a10, a11
1058 add a10, a10, a9
1059 sll a11, a11
1060 add xl, xl, a11
1061 bgeu xl, a11, 1f
1062 addi a10, a10, 1
10631:
1064 /* Compute xh. */
1065 do_mul(xh, xh, h, yh, h) /* pp 15 */
1066 add xh, xh, a10
1067
1068 /* Restore values saved on the stack during the multiplication. */
1069 l32i a7, sp, 4
7f0ee694 1070#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
134c8a50
BW
1071 l32i a0, sp, 0
1072 l32i a8, sp, 8
1073#endif
7f0ee694 1074#endif /* ! XCHAL_HAVE_MUL32_HIGH */
134c8a50
BW
1075
1076 /* Shift left by 12 bits, unless there was a carry-out from the
1077 multiply, in which case, shift by 11 bits and increment the
1078 exponent. Note: It is convenient to use the constant 0x3ff
1079 instead of 0x400 when removing the extra exponent bias (so that
1080 it is easy to construct 0x7fe for the overflow check). Reverse
1081 the logic here to decrement the exponent sum by one unless there
1082 was a carry-out. */
1083 movi a4, 11
1084 srli a5, xh, 21 - 12
1085 bnez a5, 1f
1086 addi a4, a4, 1
1087 addi a8, a8, -1
10881: ssl a4
1089 src xh, xh, xl
1090 src xl, xl, a6
1091 sll a6, a6
1092
1093 /* Subtract the extra bias from the exponent sum (plus one to account
1094 for the explicit "1.0" of the mantissa that will be added to the
1095 exponent in the final result). */
1096 movi a4, 0x3ff
1097 sub a8, a8, a4
1098
1099 /* Check for over/underflow. The value in a8 is one less than the
1100 final exponent, so values in the range 0..7fd are OK here. */
1101 slli a4, a4, 1 /* 0x7fe */
1102 bgeu a8, a4, .Lmul_overflow
1103
1104.Lmul_round:
1105 /* Round. */
1106 bgez a6, .Lmul_rounded
1107 addi xl, xl, 1
1108 beqz xl, .Lmul_roundcarry
1109 slli a6, a6, 1
1110 beqz a6, .Lmul_exactlyhalf
1111
1112.Lmul_rounded:
1113 /* Add the exponent to the mantissa. */
1114 slli a8, a8, 20
1115 add xh, xh, a8
1116
1117.Lmul_addsign:
1118 /* Add the sign bit. */
1119 srli a7, a7, 31
1120 slli a7, a7, 31
1121 or xh, xh, a7
1122
1123.Lmul_done:
1124#if __XTENSA_CALL0_ABI__
1125 l32i a12, sp, 16
1126 l32i a13, sp, 20
1127 l32i a14, sp, 24
1128 l32i a15, sp, 28
1129 addi sp, sp, 32
1130#endif
b7974b3a 1131 leaf_return
134c8a50
BW
1132
1133.Lmul_exactlyhalf:
1134 /* Round down to the nearest even value. */
1135 srli xl, xl, 1
1136 slli xl, xl, 1
1137 j .Lmul_rounded
1138
1139.Lmul_roundcarry:
1140 /* xl is always zero when the rounding increment overflows, so
1141 there's no need to round it to an even value. */
1142 addi xh, xh, 1
1143 /* Overflow is OK -- it will be added to the exponent. */
1144 j .Lmul_rounded
1145
1146.Lmul_overflow:
1147 bltz a8, .Lmul_underflow
1148 /* Return +/- Infinity. */
1149 addi a8, a4, 1 /* 0x7ff */
1150 slli xh, a8, 20
1151 movi xl, 0
1152 j .Lmul_addsign
1153
1154.Lmul_underflow:
1155 /* Create a subnormal value, where the exponent field contains zero,
1156 but the effective exponent is 1. The value of a8 is one less than
1157 the actual exponent, so just negate it to get the shift amount. */
1158 neg a8, a8
1159 mov a9, a6
1160 ssr a8
1161 bgeui a8, 32, .Lmul_bigshift
1162
1163 /* Shift xh/xl right. Any bits that are shifted out of xl are saved
1164 in a6 (combined with the shifted-out bits currently in a6) for
1165 rounding the result. */
1166 sll a6, xl
1167 src xl, xh, xl
1168 srl xh, xh
1169 j 1f
1170
1171.Lmul_bigshift:
1172 bgeui a8, 64, .Lmul_flush_to_zero
1173 sll a10, xl /* lost bits shifted out of xl */
1174 src a6, xh, xl
1175 srl xl, xh
1176 movi xh, 0
1177 or a9, a9, a10
1178
1179 /* Set the exponent to zero. */
11801: movi a8, 0
1181
1182 /* Pack any nonzero bits shifted out into a6. */
1183 beqz a9, .Lmul_round
1184 movi a9, 1
1185 or a6, a6, a9
1186 j .Lmul_round
1187
1188.Lmul_flush_to_zero:
1189 /* Return zero with the appropriate sign bit. */
1190 srli xh, a7, 31
1191 slli xh, xh, 31
1192 movi xl, 0
1193 j .Lmul_done
1194
7f0ee694 1195#if XCHAL_NO_MUL
134c8a50
BW
1196
1197 /* For Xtensa processors with no multiply hardware, this simplified
1198 version of _mulsi3 is used for multiplying 16-bit chunks of
7f0ee694
BW
1199 the floating-point mantissas. When using CALL0, this function
1200 uses a custom ABI: the inputs are passed in a13 and a14, the
1201 result is returned in a12, and a8 and a15 are clobbered. */
134c8a50
BW
1202 .align 4
1203.Lmul_mulsi3:
7f0ee694
BW
1204 leaf_entry sp, 16
1205 .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
1206 movi \dst, 0
12071: add \tmp1, \src2, \dst
1208 extui \tmp2, \src1, 0, 1
1209 movnez \dst, \tmp1, \tmp2
1210
1211 do_addx2 \tmp1, \src2, \dst, \tmp1
1212 extui \tmp2, \src1, 1, 1
1213 movnez \dst, \tmp1, \tmp2
1214
1215 do_addx4 \tmp1, \src2, \dst, \tmp1
1216 extui \tmp2, \src1, 2, 1
1217 movnez \dst, \tmp1, \tmp2
1218
1219 do_addx8 \tmp1, \src2, \dst, \tmp1
1220 extui \tmp2, \src1, 3, 1
1221 movnez \dst, \tmp1, \tmp2
1222
1223 srli \src1, \src1, 4
1224 slli \src2, \src2, 4
1225 bnez \src1, 1b
1226 .endm
1227#if __XTENSA_CALL0_ABI__
1228 mul_mulsi3_body a12, a13, a14, a15, a8
1229#else
1230 /* The result will be written into a2, so save that argument in a4. */
1231 mov a4, a2
1232 mul_mulsi3_body a2, a4, a3, a5, a6
1233#endif
1234 leaf_return
1235#endif /* XCHAL_NO_MUL */
134c8a50
BW
1236#endif /* L_muldf3 */
1237
1238#ifdef L_divdf3
1239
1240 /* Division */
66192aa1
DKC
1241
1242#if XCHAL_HAVE_DFP_DIV
1243
1244 .text
1245 .align 4
1246 .global __divdf3
1247 .type __divdf3, @function
1248__divdf3:
1249 leaf_entry sp, 16
1250
1251 wfrd f1, xh, xl
1252 wfrd f2, yh, yl
1253
1254 div0.d f3, f2
1255 nexp01.d f4, f2
1256 const.d f0, 1
1257 maddn.d f0, f4, f3
1258 const.d f5, 0
1259 mov.d f7, f2
1260 mkdadj.d f7, f1
1261 maddn.d f3, f0, f3
1262 maddn.d f5, f0, f0
1263 nexp01.d f1, f1
1264 div0.d f2, f2
1265 maddn.d f3, f5, f3
1266 const.d f5, 1
1267 const.d f0, 0
1268 neg.d f6, f1
1269 maddn.d f5, f4, f3
1270 maddn.d f0, f6, f2
1271 maddn.d f3, f5, f3
1272 maddn.d f6, f4, f0
1273 const.d f2, 1
1274 maddn.d f2, f4, f3
1275 maddn.d f0, f6, f3
1276 neg.d f1, f1
1277 maddn.d f3, f2, f3
1278 maddn.d f1, f4, f0
1279 addexpm.d f0, f7
1280 addexp.d f3, f7
1281 divn.d f0, f1, f3
1282
1283 rfr xl, f0
1284 rfrd xh, f0
1285
1286 leaf_return
1287
1288#else
1289
1290 .literal_position
1291
134c8a50
BW
1292__divdf3_aux:
1293
1294 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
1295 (This code is placed before the start of the function just to
1296 keep it in range of the limited branch displacements.) */
1297
1298.Ldiv_yexpzero:
1299 /* Clear the sign bit of y. */
1300 slli yh, yh, 1
1301 srli yh, yh, 1
1302
1303 /* Check for division by zero. */
1304 or a10, yh, yl
1305 beqz a10, .Ldiv_yzero
1306
1307 /* Normalize y. Adjust the exponent in a9. */
1308 beqz yh, .Ldiv_yh_zero
1309 do_nsau a10, yh, a11, a9
1310 addi a10, a10, -11
1311 ssl a10
1312 src yh, yh, yl
1313 sll yl, yl
1314 movi a9, 1
1315 sub a9, a9, a10
1316 j .Ldiv_ynormalized
1317.Ldiv_yh_zero:
1318 do_nsau a10, yl, a11, a9
1319 addi a10, a10, -11
1320 movi a9, -31
1321 sub a9, a9, a10
1322 ssl a10
1323 bltz a10, .Ldiv_yl_srl
1324 sll yh, yl
1325 movi yl, 0
1326 j .Ldiv_ynormalized
1327.Ldiv_yl_srl:
1328 srl yh, yl
1329 sll yl, yl
1330 j .Ldiv_ynormalized
1331
1332.Ldiv_yzero:
1333 /* y is zero. Return NaN if x is also zero; otherwise, infinity. */
1334 slli xh, xh, 1
1335 srli xh, xh, 1
1336 or xl, xl, xh
1337 srli xh, a7, 31
1338 slli xh, xh, 31
1339 or xh, xh, a6
1340 bnez xl, 1f
1341 movi a4, 0x80000 /* make it a quiet NaN */
1342 or xh, xh, a4
13431: movi xl, 0
b7974b3a 1344 leaf_return
134c8a50
BW
1345
1346.Ldiv_xexpzero:
1347 /* Clear the sign bit of x. */
1348 slli xh, xh, 1
1349 srli xh, xh, 1
1350
1351 /* If x is zero, return zero. */
1352 or a10, xh, xl
1353 beqz a10, .Ldiv_return_zero
1354
1355 /* Normalize x. Adjust the exponent in a8. */
1356 beqz xh, .Ldiv_xh_zero
1357 do_nsau a10, xh, a11, a8
1358 addi a10, a10, -11
1359 ssl a10
1360 src xh, xh, xl
1361 sll xl, xl
1362 movi a8, 1
1363 sub a8, a8, a10
1364 j .Ldiv_xnormalized
1365.Ldiv_xh_zero:
1366 do_nsau a10, xl, a11, a8
1367 addi a10, a10, -11
1368 movi a8, -31
1369 sub a8, a8, a10
1370 ssl a10
1371 bltz a10, .Ldiv_xl_srl
1372 sll xh, xl
1373 movi xl, 0
1374 j .Ldiv_xnormalized
1375.Ldiv_xl_srl:
1376 srl xh, xl
1377 sll xl, xl
1378 j .Ldiv_xnormalized
1379
1380.Ldiv_return_zero:
1381 /* Return zero with the appropriate sign bit. */
1382 srli xh, a7, 31
1383 slli xh, xh, 31
1384 movi xl, 0
b7974b3a 1385 leaf_return
134c8a50
BW
1386
1387.Ldiv_xnan_or_inf:
1388 /* Set the sign bit of the result. */
1389 srli a7, yh, 31
1390 slli a7, a7, 31
1391 xor xh, xh, a7
1392 /* If y is NaN or Inf, return NaN. */
0889f168
MF
1393 ball yh, a6, .Ldiv_return_nan
1394 slli a8, xh, 12
1395 or a8, a8, xl
1396 bnez a8, .Ldiv_return_nan
1397 leaf_return
134c8a50
BW
1398
1399.Ldiv_ynan_or_inf:
1400 /* If y is Infinity, return zero. */
1401 slli a8, yh, 12
1402 or a8, a8, yl
1403 beqz a8, .Ldiv_return_zero
1404 /* y is NaN; return it. */
1405 mov xh, yh
1406 mov xl, yl
0889f168
MF
1407
1408.Ldiv_return_nan:
1409 movi a4, 0x80000 /* make it a quiet NaN */
1410 or xh, xh, a4
b7974b3a 1411 leaf_return
134c8a50
BW
1412
1413.Ldiv_highequal1:
1414 bltu xl, yl, 2f
1415 j 3f
1416
1417 .align 4
1418 .global __divdf3
1419 .type __divdf3, @function
1420__divdf3:
b7974b3a 1421 leaf_entry sp, 16
134c8a50
BW
1422 movi a6, 0x7ff00000
1423
1424 /* Get the sign of the result. */
1425 xor a7, xh, yh
1426
1427 /* Check for NaN and infinity. */
1428 ball xh, a6, .Ldiv_xnan_or_inf
1429 ball yh, a6, .Ldiv_ynan_or_inf
1430
1431 /* Extract the exponents. */
1432 extui a8, xh, 20, 11
1433 extui a9, yh, 20, 11
1434
1435 beqz a9, .Ldiv_yexpzero
1436.Ldiv_ynormalized:
1437 beqz a8, .Ldiv_xexpzero
1438.Ldiv_xnormalized:
1439
1440 /* Subtract the exponents. */
1441 sub a8, a8, a9
1442
1443 /* Replace sign/exponent fields with explicit "1.0". */
1444 movi a10, 0x1fffff
1445 or xh, xh, a6
1446 and xh, xh, a10
1447 or yh, yh, a6
1448 and yh, yh, a10
1449
1450 /* Set SAR for left shift by one. */
1451 ssai (32 - 1)
1452
1453 /* The first digit of the mantissa division must be a one.
1454 Shift x (and adjust the exponent) as needed to make this true. */
1455 bltu yh, xh, 3f
1456 beq yh, xh, .Ldiv_highequal1
14572: src xh, xh, xl
1458 sll xl, xl
1459 addi a8, a8, -1
14603:
1461 /* Do the first subtraction and shift. */
1462 sub xh, xh, yh
1463 bgeu xl, yl, 1f
1464 addi xh, xh, -1
14651: sub xl, xl, yl
1466 src xh, xh, xl
1467 sll xl, xl
1468
1469 /* Put the quotient into a10/a11. */
1470 movi a10, 0
1471 movi a11, 1
1472
1473 /* Divide one bit at a time for 52 bits. */
1474 movi a9, 52
1475#if XCHAL_HAVE_LOOPS
1476 loop a9, .Ldiv_loopend
1477#endif
1478.Ldiv_loop:
1479 /* Shift the quotient << 1. */
1480 src a10, a10, a11
1481 sll a11, a11
1482
1483 /* Is this digit a 0 or 1? */
1484 bltu xh, yh, 3f
1485 beq xh, yh, .Ldiv_highequal2
1486
1487 /* Output a 1 and subtract. */
14882: addi a11, a11, 1
1489 sub xh, xh, yh
1490 bgeu xl, yl, 1f
1491 addi xh, xh, -1
14921: sub xl, xl, yl
1493
1494 /* Shift the dividend << 1. */
14953: src xh, xh, xl
1496 sll xl, xl
1497
1498#if !XCHAL_HAVE_LOOPS
1499 addi a9, a9, -1
1500 bnez a9, .Ldiv_loop
1501#endif
1502.Ldiv_loopend:
1503
1504 /* Add the exponent bias (less one to account for the explicit "1.0"
1505 of the mantissa that will be added to the exponent in the final
1506 result). */
1507 movi a9, 0x3fe
1508 add a8, a8, a9
1509
1510 /* Check for over/underflow. The value in a8 is one less than the
1511 final exponent, so values in the range 0..7fd are OK here. */
1512 addmi a9, a9, 0x400 /* 0x7fe */
1513 bgeu a8, a9, .Ldiv_overflow
1514
1515.Ldiv_round:
1516 /* Round. The remainder (<< 1) is in xh/xl. */
1517 bltu xh, yh, .Ldiv_rounded
1518 beq xh, yh, .Ldiv_highequal3
1519.Ldiv_roundup:
1520 addi a11, a11, 1
1521 beqz a11, .Ldiv_roundcarry
1522
1523.Ldiv_rounded:
1524 mov xl, a11
1525 /* Add the exponent to the mantissa. */
1526 slli a8, a8, 20
1527 add xh, a10, a8
1528
1529.Ldiv_addsign:
1530 /* Add the sign bit. */
1531 srli a7, a7, 31
1532 slli a7, a7, 31
1533 or xh, xh, a7
b7974b3a 1534 leaf_return
134c8a50
BW
1535
1536.Ldiv_highequal2:
1537 bgeu xl, yl, 2b
1538 j 3b
1539
1540.Ldiv_highequal3:
1541 bltu xl, yl, .Ldiv_rounded
1542 bne xl, yl, .Ldiv_roundup
1543
1544 /* Remainder is exactly half the divisor. Round even. */
1545 addi a11, a11, 1
1546 beqz a11, .Ldiv_roundcarry
1547 srli a11, a11, 1
1548 slli a11, a11, 1
1549 j .Ldiv_rounded
1550
1551.Ldiv_overflow:
1552 bltz a8, .Ldiv_underflow
1553 /* Return +/- Infinity. */
1554 addi a8, a9, 1 /* 0x7ff */
1555 slli xh, a8, 20
1556 movi xl, 0
1557 j .Ldiv_addsign
1558
1559.Ldiv_underflow:
1560 /* Create a subnormal value, where the exponent field contains zero,
1561 but the effective exponent is 1. The value of a8 is one less than
1562 the actual exponent, so just negate it to get the shift amount. */
1563 neg a8, a8
1564 ssr a8
1565 bgeui a8, 32, .Ldiv_bigshift
1566
1567 /* Shift a10/a11 right. Any bits that are shifted out of a11 are
1568 saved in a6 for rounding the result. */
1569 sll a6, a11
1570 src a11, a10, a11
1571 srl a10, a10
1572 j 1f
1573
1574.Ldiv_bigshift:
1575 bgeui a8, 64, .Ldiv_flush_to_zero
1576 sll a9, a11 /* lost bits shifted out of a11 */
1577 src a6, a10, a11
1578 srl a11, a10
1579 movi a10, 0
1580 or xl, xl, a9
1581
1582 /* Set the exponent to zero. */
15831: movi a8, 0
1584
1585 /* Pack any nonzero remainder (in xh/xl) into a6. */
1586 or xh, xh, xl
1587 beqz xh, 1f
1588 movi a9, 1
1589 or a6, a6, a9
1590
1591 /* Round a10/a11 based on the bits shifted out into a6. */
15921: bgez a6, .Ldiv_rounded
1593 addi a11, a11, 1
1594 beqz a11, .Ldiv_roundcarry
1595 slli a6, a6, 1
1596 bnez a6, .Ldiv_rounded
1597 srli a11, a11, 1
1598 slli a11, a11, 1
1599 j .Ldiv_rounded
1600
1601.Ldiv_roundcarry:
1602 /* a11 is always zero when the rounding increment overflows, so
1603 there's no need to round it to an even value. */
1604 addi a10, a10, 1
1605 /* Overflow to the exponent field is OK. */
1606 j .Ldiv_rounded
1607
1608.Ldiv_flush_to_zero:
1609 /* Return zero with the appropriate sign bit. */
1610 srli xh, a7, 31
1611 slli xh, xh, 31
1612 movi xl, 0
b7974b3a 1613 leaf_return
134c8a50 1614
66192aa1
DKC
1615#endif /* XCHAL_HAVE_DFP_DIV */
1616
134c8a50
BW
1617#endif /* L_divdf3 */
1618
1619#ifdef L_cmpdf2
1620
1621 /* Equal and Not Equal */
1622
1623 .align 4
1624 .global __eqdf2
1625 .global __nedf2
1626 .set __nedf2, __eqdf2
1627 .type __eqdf2, @function
1628__eqdf2:
b7974b3a 1629 leaf_entry sp, 16
134c8a50
BW
1630 bne xl, yl, 2f
1631 bne xh, yh, 4f
1632
1633 /* The values are equal but NaN != NaN. Check the exponent. */
1634 movi a6, 0x7ff00000
1635 ball xh, a6, 3f
1636
1637 /* Equal. */
1638 movi a2, 0
b7974b3a 1639 leaf_return
134c8a50
BW
1640
1641 /* Not equal. */
16422: movi a2, 1
b7974b3a 1643 leaf_return
134c8a50
BW
1644
1645 /* Check if the mantissas are nonzero. */
16463: slli a7, xh, 12
1647 or a7, a7, xl
1648 j 5f
1649
1650 /* Check if x and y are zero with different signs. */
16514: or a7, xh, yh
1652 slli a7, a7, 1
1653 or a7, a7, xl /* xl == yl here */
1654
1655 /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
1656 or x when exponent(x) = 0x7ff and x == y. */
16575: movi a2, 0
1658 movi a3, 1
1659 movnez a2, a3, a7
b7974b3a 1660 leaf_return
134c8a50
BW
1661
1662
1663 /* Greater Than */
1664
1665 .align 4
1666 .global __gtdf2
1667 .type __gtdf2, @function
1668__gtdf2:
b7974b3a 1669 leaf_entry sp, 16
134c8a50
BW
1670 movi a6, 0x7ff00000
1671 ball xh, a6, 2f
16721: bnall yh, a6, .Lle_cmp
1673
1674 /* Check if y is a NaN. */
1675 slli a7, yh, 12
1676 or a7, a7, yl
1677 beqz a7, .Lle_cmp
1678 movi a2, 0
b7974b3a 1679 leaf_return
134c8a50
BW
1680
1681 /* Check if x is a NaN. */
16822: slli a7, xh, 12
1683 or a7, a7, xl
1684 beqz a7, 1b
1685 movi a2, 0
b7974b3a 1686 leaf_return
134c8a50
BW
1687
1688
1689 /* Less Than or Equal */
1690
1691 .align 4
1692 .global __ledf2
1693 .type __ledf2, @function
1694__ledf2:
b7974b3a 1695 leaf_entry sp, 16
134c8a50
BW
1696 movi a6, 0x7ff00000
1697 ball xh, a6, 2f
16981: bnall yh, a6, .Lle_cmp
1699
1700 /* Check if y is a NaN. */
1701 slli a7, yh, 12
1702 or a7, a7, yl
1703 beqz a7, .Lle_cmp
1704 movi a2, 1
b7974b3a 1705 leaf_return
134c8a50
BW
1706
1707 /* Check if x is a NaN. */
17082: slli a7, xh, 12
1709 or a7, a7, xl
1710 beqz a7, 1b
1711 movi a2, 1
b7974b3a 1712 leaf_return
134c8a50
BW
1713
1714.Lle_cmp:
1715 /* Check if x and y have different signs. */
1716 xor a7, xh, yh
1717 bltz a7, .Lle_diff_signs
1718
1719 /* Check if x is negative. */
1720 bltz xh, .Lle_xneg
1721
1722 /* Check if x <= y. */
1723 bltu xh, yh, 4f
1724 bne xh, yh, 5f
1725 bltu yl, xl, 5f
17264: movi a2, 0
b7974b3a 1727 leaf_return
134c8a50
BW
1728
1729.Lle_xneg:
1730 /* Check if y <= x. */
1731 bltu yh, xh, 4b
1732 bne yh, xh, 5f
1733 bgeu xl, yl, 4b
17345: movi a2, 1
b7974b3a 1735 leaf_return
134c8a50
BW
1736
1737.Lle_diff_signs:
1738 bltz xh, 4b
1739
1740 /* Check if both x and y are zero. */
1741 or a7, xh, yh
1742 slli a7, a7, 1
1743 or a7, a7, xl
1744 or a7, a7, yl
1745 movi a2, 1
1746 movi a3, 0
1747 moveqz a2, a3, a7
b7974b3a 1748 leaf_return
134c8a50
BW
1749
1750
1751 /* Greater Than or Equal */
1752
1753 .align 4
1754 .global __gedf2
1755 .type __gedf2, @function
1756__gedf2:
b7974b3a 1757 leaf_entry sp, 16
134c8a50
BW
1758 movi a6, 0x7ff00000
1759 ball xh, a6, 2f
17601: bnall yh, a6, .Llt_cmp
1761
1762 /* Check if y is a NaN. */
1763 slli a7, yh, 12
1764 or a7, a7, yl
1765 beqz a7, .Llt_cmp
1766 movi a2, -1
b7974b3a 1767 leaf_return
134c8a50
BW
1768
1769 /* Check if x is a NaN. */
17702: slli a7, xh, 12
1771 or a7, a7, xl
1772 beqz a7, 1b
1773 movi a2, -1
b7974b3a 1774 leaf_return
134c8a50
BW
1775
1776
1777 /* Less Than */
1778
1779 .align 4
1780 .global __ltdf2
1781 .type __ltdf2, @function
1782__ltdf2:
b7974b3a 1783 leaf_entry sp, 16
134c8a50
BW
1784 movi a6, 0x7ff00000
1785 ball xh, a6, 2f
17861: bnall yh, a6, .Llt_cmp
1787
1788 /* Check if y is a NaN. */
1789 slli a7, yh, 12
1790 or a7, a7, yl
1791 beqz a7, .Llt_cmp
1792 movi a2, 0
b7974b3a 1793 leaf_return
134c8a50
BW
1794
1795 /* Check if x is a NaN. */
17962: slli a7, xh, 12
1797 or a7, a7, xl
1798 beqz a7, 1b
1799 movi a2, 0
b7974b3a 1800 leaf_return
134c8a50
BW
1801
1802.Llt_cmp:
1803 /* Check if x and y have different signs. */
1804 xor a7, xh, yh
1805 bltz a7, .Llt_diff_signs
1806
1807 /* Check if x is negative. */
1808 bltz xh, .Llt_xneg
1809
1810 /* Check if x < y. */
1811 bltu xh, yh, 4f
1812 bne xh, yh, 5f
1813 bgeu xl, yl, 5f
18144: movi a2, -1
b7974b3a 1815 leaf_return
134c8a50
BW
1816
1817.Llt_xneg:
1818 /* Check if y < x. */
1819 bltu yh, xh, 4b
1820 bne yh, xh, 5f
1821 bltu yl, xl, 4b
18225: movi a2, 0
b7974b3a 1823 leaf_return
134c8a50
BW
1824
1825.Llt_diff_signs:
1826 bgez xh, 5b
1827
1828 /* Check if both x and y are nonzero. */
1829 or a7, xh, yh
1830 slli a7, a7, 1
1831 or a7, a7, xl
1832 or a7, a7, yl
1833 movi a2, 0
1834 movi a3, -1
1835 movnez a2, a3, a7
b7974b3a 1836 leaf_return
134c8a50
BW
1837
1838
1839 /* Unordered */
1840
1841 .align 4
1842 .global __unorddf2
1843 .type __unorddf2, @function
1844__unorddf2:
b7974b3a 1845 leaf_entry sp, 16
134c8a50
BW
1846 movi a6, 0x7ff00000
1847 ball xh, a6, 3f
18481: ball yh, a6, 4f
18492: movi a2, 0
b7974b3a 1850 leaf_return
134c8a50
BW
1851
18523: slli a7, xh, 12
1853 or a7, a7, xl
1854 beqz a7, 1b
1855 movi a2, 1
b7974b3a 1856 leaf_return
134c8a50
BW
1857
18584: slli a7, yh, 12
1859 or a7, a7, yl
1860 beqz a7, 2b
1861 movi a2, 1
b7974b3a 1862 leaf_return
134c8a50
BW
1863
1864#endif /* L_cmpdf2 */
1865
1866#ifdef L_fixdfsi
1867
1868 .align 4
1869 .global __fixdfsi
1870 .type __fixdfsi, @function
1871__fixdfsi:
b7974b3a 1872 leaf_entry sp, 16
134c8a50
BW
1873
1874 /* Check for NaN and Infinity. */
1875 movi a6, 0x7ff00000
1876 ball xh, a6, .Lfixdfsi_nan_or_inf
1877
1878 /* Extract the exponent and check if 0 < (exp - 0x3fe) < 32. */
1879 extui a4, xh, 20, 11
1880 extui a5, a6, 19, 10 /* 0x3fe */
1881 sub a4, a4, a5
1882 bgei a4, 32, .Lfixdfsi_maxint
1883 blti a4, 1, .Lfixdfsi_zero
1884
1885 /* Add explicit "1.0" and shift << 11. */
1886 or a7, xh, a6
1887 ssai (32 - 11)
1888 src a5, a7, xl
1889
1890 /* Shift back to the right, based on the exponent. */
1891 ssl a4 /* shift by 32 - a4 */
1892 srl a5, a5
1893
1894 /* Negate the result if sign != 0. */
1895 neg a2, a5
1896 movgez a2, a5, a7
b7974b3a 1897 leaf_return
134c8a50
BW
1898
1899.Lfixdfsi_nan_or_inf:
1900 /* Handle Infinity and NaN. */
1901 slli a4, xh, 12
1902 or a4, a4, xl
1903 beqz a4, .Lfixdfsi_maxint
1904
1905 /* Translate NaN to +maxint. */
1906 movi xh, 0
1907
1908.Lfixdfsi_maxint:
1909 slli a4, a6, 11 /* 0x80000000 */
1910 addi a5, a4, -1 /* 0x7fffffff */
1911 movgez a4, a5, xh
1912 mov a2, a4
b7974b3a 1913 leaf_return
134c8a50
BW
1914
1915.Lfixdfsi_zero:
1916 movi a2, 0
b7974b3a 1917 leaf_return
134c8a50
BW
1918
1919#endif /* L_fixdfsi */
1920
1921#ifdef L_fixdfdi
1922
1923 .align 4
1924 .global __fixdfdi
1925 .type __fixdfdi, @function
1926__fixdfdi:
b7974b3a 1927 leaf_entry sp, 16
134c8a50
BW
1928
1929 /* Check for NaN and Infinity. */
1930 movi a6, 0x7ff00000
1931 ball xh, a6, .Lfixdfdi_nan_or_inf
1932
1933 /* Extract the exponent and check if 0 < (exp - 0x3fe) < 64. */
1934 extui a4, xh, 20, 11
1935 extui a5, a6, 19, 10 /* 0x3fe */
1936 sub a4, a4, a5
1937 bgei a4, 64, .Lfixdfdi_maxint
1938 blti a4, 1, .Lfixdfdi_zero
1939
1940 /* Add explicit "1.0" and shift << 11. */
1941 or a7, xh, a6
1942 ssai (32 - 11)
1943 src xh, a7, xl
1944 sll xl, xl
1945
1946 /* Shift back to the right, based on the exponent. */
1947 ssl a4 /* shift by 64 - a4 */
1948 bgei a4, 32, .Lfixdfdi_smallshift
1949 srl xl, xh
1950 movi xh, 0
1951
1952.Lfixdfdi_shifted:
1953 /* Negate the result if sign != 0. */
1954 bgez a7, 1f
1955 neg xl, xl
1956 neg xh, xh
1957 beqz xl, 1f
1958 addi xh, xh, -1
b7974b3a 19591: leaf_return
134c8a50
BW
1960
1961.Lfixdfdi_smallshift:
1962 src xl, xh, xl
1963 srl xh, xh
1964 j .Lfixdfdi_shifted
1965
1966.Lfixdfdi_nan_or_inf:
1967 /* Handle Infinity and NaN. */
1968 slli a4, xh, 12
1969 or a4, a4, xl
1970 beqz a4, .Lfixdfdi_maxint
1971
1972 /* Translate NaN to +maxint. */
1973 movi xh, 0
1974
1975.Lfixdfdi_maxint:
1976 slli a7, a6, 11 /* 0x80000000 */
1977 bgez xh, 1f
1978 mov xh, a7
1979 movi xl, 0
b7974b3a 1980 leaf_return
134c8a50
BW
1981
19821: addi xh, a7, -1 /* 0x7fffffff */
1983 movi xl, -1
b7974b3a 1984 leaf_return
134c8a50
BW
1985
1986.Lfixdfdi_zero:
1987 movi xh, 0
1988 movi xl, 0
b7974b3a 1989 leaf_return
134c8a50
BW
1990
1991#endif /* L_fixdfdi */
1992
1993#ifdef L_fixunsdfsi
1994
1995 .align 4
1996 .global __fixunsdfsi
1997 .type __fixunsdfsi, @function
1998__fixunsdfsi:
b7974b3a 1999 leaf_entry sp, 16
134c8a50
BW
2000
2001 /* Check for NaN and Infinity. */
2002 movi a6, 0x7ff00000
2003 ball xh, a6, .Lfixunsdfsi_nan_or_inf
2004
2005 /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32. */
2006 extui a4, xh, 20, 11
2007 extui a5, a6, 20, 10 /* 0x3ff */
2008 sub a4, a4, a5
2009 bgei a4, 32, .Lfixunsdfsi_maxint
2010 bltz a4, .Lfixunsdfsi_zero
2011
2012 /* Add explicit "1.0" and shift << 11. */
2013 or a7, xh, a6
2014 ssai (32 - 11)
2015 src a5, a7, xl
2016
2017 /* Shift back to the right, based on the exponent. */
2018 addi a4, a4, 1
2019 beqi a4, 32, .Lfixunsdfsi_bigexp
2020 ssl a4 /* shift by 32 - a4 */
2021 srl a5, a5
2022
2023 /* Negate the result if sign != 0. */
2024 neg a2, a5
2025 movgez a2, a5, a7
b7974b3a 2026 leaf_return
134c8a50
BW
2027
2028.Lfixunsdfsi_nan_or_inf:
2029 /* Handle Infinity and NaN. */
2030 slli a4, xh, 12
2031 or a4, a4, xl
2032 beqz a4, .Lfixunsdfsi_maxint
2033
2034 /* Translate NaN to 0xffffffff. */
2035 movi a2, -1
b7974b3a 2036 leaf_return
134c8a50
BW
2037
2038.Lfixunsdfsi_maxint:
2039 slli a4, a6, 11 /* 0x80000000 */
2040 movi a5, -1 /* 0xffffffff */
2041 movgez a4, a5, xh
2042 mov a2, a4
b7974b3a 2043 leaf_return
134c8a50
BW
2044
2045.Lfixunsdfsi_zero:
2046 movi a2, 0
b7974b3a 2047 leaf_return
134c8a50
BW
2048
2049.Lfixunsdfsi_bigexp:
2050 /* Handle unsigned maximum exponent case. */
2051 bltz xh, 1f
2052 mov a2, a5 /* no shift needed */
b7974b3a 2053 leaf_return
134c8a50
BW
2054
2055 /* Return 0x80000000 if negative. */
20561: slli a2, a6, 11
b7974b3a 2057 leaf_return
134c8a50
BW
2058
2059#endif /* L_fixunsdfsi */
2060
2061#ifdef L_fixunsdfdi
2062
2063 .align 4
2064 .global __fixunsdfdi
2065 .type __fixunsdfdi, @function
2066__fixunsdfdi:
b7974b3a 2067 leaf_entry sp, 16
134c8a50
BW
2068
2069 /* Check for NaN and Infinity. */
2070 movi a6, 0x7ff00000
2071 ball xh, a6, .Lfixunsdfdi_nan_or_inf
2072
2073 /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64. */
2074 extui a4, xh, 20, 11
2075 extui a5, a6, 20, 10 /* 0x3ff */
2076 sub a4, a4, a5
2077 bgei a4, 64, .Lfixunsdfdi_maxint
2078 bltz a4, .Lfixunsdfdi_zero
2079
2080 /* Add explicit "1.0" and shift << 11. */
2081 or a7, xh, a6
2082 ssai (32 - 11)
2083 src xh, a7, xl
2084 sll xl, xl
2085
2086 /* Shift back to the right, based on the exponent. */
2087 addi a4, a4, 1
2088 beqi a4, 64, .Lfixunsdfdi_bigexp
2089 ssl a4 /* shift by 64 - a4 */
2090 bgei a4, 32, .Lfixunsdfdi_smallshift
2091 srl xl, xh
2092 movi xh, 0
2093
2094.Lfixunsdfdi_shifted:
2095 /* Negate the result if sign != 0. */
2096 bgez a7, 1f
2097 neg xl, xl
2098 neg xh, xh
2099 beqz xl, 1f
2100 addi xh, xh, -1
b7974b3a 21011: leaf_return
134c8a50
BW
2102
2103.Lfixunsdfdi_smallshift:
2104 src xl, xh, xl
2105 srl xh, xh
2106 j .Lfixunsdfdi_shifted
2107
2108.Lfixunsdfdi_nan_or_inf:
2109 /* Handle Infinity and NaN. */
2110 slli a4, xh, 12
2111 or a4, a4, xl
2112 beqz a4, .Lfixunsdfdi_maxint
2113
2114 /* Translate NaN to 0xffffffff.... */
21151: movi xh, -1
2116 movi xl, -1
b7974b3a 2117 leaf_return
134c8a50
BW
2118
2119.Lfixunsdfdi_maxint:
2120 bgez xh, 1b
21212: slli xh, a6, 11 /* 0x80000000 */
2122 movi xl, 0
b7974b3a 2123 leaf_return
134c8a50
BW
2124
2125.Lfixunsdfdi_zero:
2126 movi xh, 0
2127 movi xl, 0
b7974b3a 2128 leaf_return
134c8a50
BW
2129
2130.Lfixunsdfdi_bigexp:
2131 /* Handle unsigned maximum exponent case. */
2132 bltz a7, 2b
b7974b3a 2133 leaf_return /* no shift needed */
134c8a50
BW
2134
2135#endif /* L_fixunsdfdi */
2136
2137#ifdef L_floatsidf
2138
2139 .align 4
2140 .global __floatunsidf
2141 .type __floatunsidf, @function
2142__floatunsidf:
b7974b3a 2143 leaf_entry sp, 16
134c8a50
BW
2144 beqz a2, .Lfloatsidf_return_zero
2145
2146 /* Set the sign to zero and jump to the floatsidf code. */
2147 movi a7, 0
2148 j .Lfloatsidf_normalize
2149
2150 .align 4
2151 .global __floatsidf
2152 .type __floatsidf, @function
2153__floatsidf:
b7974b3a 2154 leaf_entry sp, 16
134c8a50
BW
2155
2156 /* Check for zero. */
2157 beqz a2, .Lfloatsidf_return_zero
2158
2159 /* Save the sign. */
2160 extui a7, a2, 31, 1
2161
2162 /* Get the absolute value. */
2163#if XCHAL_HAVE_ABS
2164 abs a2, a2
2165#else
2166 neg a4, a2
2167 movltz a2, a4, a2
2168#endif
2169
2170.Lfloatsidf_normalize:
2171 /* Normalize with the first 1 bit in the msb. */
2172 do_nsau a4, a2, a5, a6
2173 ssl a4
2174 sll a5, a2
2175
2176 /* Shift the mantissa into position. */
2177 srli xh, a5, 11
2178 slli xl, a5, (32 - 11)
2179
2180 /* Set the exponent. */
2181 movi a5, 0x41d /* 0x3fe + 31 */
2182 sub a5, a5, a4
2183 slli a5, a5, 20
2184 add xh, xh, a5
2185
2186 /* Add the sign and return. */
2187 slli a7, a7, 31
2188 or xh, xh, a7
b7974b3a 2189 leaf_return
134c8a50
BW
2190
2191.Lfloatsidf_return_zero:
2192 movi a3, 0
b7974b3a 2193 leaf_return
134c8a50
BW
2194
2195#endif /* L_floatsidf */
2196
2197#ifdef L_floatdidf
2198
2199 .align 4
2200 .global __floatundidf
2201 .type __floatundidf, @function
2202__floatundidf:
b7974b3a 2203 leaf_entry sp, 16
134c8a50
BW
2204
2205 /* Check for zero. */
2206 or a4, xh, xl
2207 beqz a4, 2f
2208
2209 /* Set the sign to zero and jump to the floatdidf code. */
2210 movi a7, 0
2211 j .Lfloatdidf_normalize
2212
2213 .align 4
2214 .global __floatdidf
2215 .type __floatdidf, @function
2216__floatdidf:
b7974b3a 2217 leaf_entry sp, 16
134c8a50
BW
2218
2219 /* Check for zero. */
2220 or a4, xh, xl
2221 beqz a4, 2f
2222
2223 /* Save the sign. */
2224 extui a7, xh, 31, 1
2225
2226 /* Get the absolute value. */
2227 bgez xh, .Lfloatdidf_normalize
2228 neg xl, xl
2229 neg xh, xh
2230 beqz xl, .Lfloatdidf_normalize
2231 addi xh, xh, -1
2232
2233.Lfloatdidf_normalize:
2234 /* Normalize with the first 1 bit in the msb of xh. */
2235 beqz xh, .Lfloatdidf_bigshift
2236 do_nsau a4, xh, a5, a6
2237 ssl a4
2238 src xh, xh, xl
2239 sll xl, xl
2240
2241.Lfloatdidf_shifted:
2242 /* Shift the mantissa into position, with rounding bits in a6. */
2243 ssai 11
2244 sll a6, xl
2245 src xl, xh, xl
2246 srl xh, xh
2247
2248 /* Set the exponent. */
2249 movi a5, 0x43d /* 0x3fe + 63 */
2250 sub a5, a5, a4
2251 slli a5, a5, 20
2252 add xh, xh, a5
2253
2254 /* Add the sign. */
2255 slli a7, a7, 31
2256 or xh, xh, a7
2257
2258 /* Round up if the leftover fraction is >= 1/2. */
2259 bgez a6, 2f
2260 addi xl, xl, 1
2261 beqz xl, .Lfloatdidf_roundcarry
2262
2263 /* Check if the leftover fraction is exactly 1/2. */
2264 slli a6, a6, 1
2265 beqz a6, .Lfloatdidf_exactlyhalf
b7974b3a 22662: leaf_return
134c8a50
BW
2267
2268.Lfloatdidf_bigshift:
2269 /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */
2270 do_nsau a4, xl, a5, a6
2271 ssl a4
2272 sll xh, xl
2273 movi xl, 0
2274 addi a4, a4, 32
2275 j .Lfloatdidf_shifted
2276
2277.Lfloatdidf_exactlyhalf:
2278 /* Round down to the nearest even value. */
2279 srli xl, xl, 1
2280 slli xl, xl, 1
b7974b3a 2281 leaf_return
134c8a50
BW
2282
2283.Lfloatdidf_roundcarry:
2284 /* xl is always zero when the rounding increment overflows, so
2285 there's no need to round it to an even value. */
2286 addi xh, xh, 1
2287 /* Overflow to the exponent is OK. */
b7974b3a 2288 leaf_return
134c8a50
BW
2289
2290#endif /* L_floatdidf */
2291
2292#ifdef L_truncdfsf2
2293
2294 .align 4
2295 .global __truncdfsf2
2296 .type __truncdfsf2, @function
2297__truncdfsf2:
b7974b3a 2298 leaf_entry sp, 16
134c8a50
BW
2299
2300 /* Adjust the exponent bias. */
2301 movi a4, (0x3ff - 0x7f) << 20
2302 sub a5, xh, a4
2303
2304 /* Check for underflow. */
2305 xor a6, xh, a5
2306 bltz a6, .Ltrunc_underflow
2307 extui a6, a5, 20, 11
2308 beqz a6, .Ltrunc_underflow
2309
2310 /* Check for overflow. */
2311 movi a4, 255
2312 bge a6, a4, .Ltrunc_overflow
2313
2314 /* Shift a5/xl << 3 into a5/a4. */
2315 ssai (32 - 3)
2316 src a5, a5, xl
2317 sll a4, xl
2318
2319.Ltrunc_addsign:
2320 /* Add the sign bit. */
2321 extui a6, xh, 31, 1
2322 slli a6, a6, 31
2323 or a2, a6, a5
2324
2325 /* Round up if the leftover fraction is >= 1/2. */
2326 bgez a4, 1f
2327 addi a2, a2, 1
2328 /* Overflow to the exponent is OK. The answer will be correct. */
2329
2330 /* Check if the leftover fraction is exactly 1/2. */
2331 slli a4, a4, 1
2332 beqz a4, .Ltrunc_exactlyhalf
b7974b3a 23331: leaf_return
134c8a50
BW
2334
2335.Ltrunc_exactlyhalf:
2336 /* Round down to the nearest even value. */
2337 srli a2, a2, 1
2338 slli a2, a2, 1
b7974b3a 2339 leaf_return
134c8a50
BW
2340
2341.Ltrunc_overflow:
2342 /* Check if exponent == 0x7ff. */
2343 movi a4, 0x7ff00000
2344 bnall xh, a4, 1f
2345
2346 /* Check if mantissa is nonzero. */
2347 slli a5, xh, 12
2348 or a5, a5, xl
2349 beqz a5, 1f
2350
2351 /* Shift a4 to set a bit in the mantissa, making a quiet NaN. */
2352 srli a4, a4, 1
2353
23541: slli a4, a4, 4 /* 0xff000000 or 0xff800000 */
2355 /* Add the sign bit. */
2356 extui a6, xh, 31, 1
2357 ssai 1
2358 src a2, a6, a4
b7974b3a 2359 leaf_return
134c8a50
BW
2360
2361.Ltrunc_underflow:
2362 /* Find shift count for a subnormal. Flush to zero if >= 32. */
2363 extui a6, xh, 20, 11
2364 movi a5, 0x3ff - 0x7f
2365 sub a6, a5, a6
2366 addi a6, a6, 1
2367 bgeui a6, 32, 1f
2368
2369 /* Replace the exponent with an explicit "1.0". */
2370 slli a5, a5, 13 /* 0x700000 */
2371 or a5, a5, xh
2372 slli a5, a5, 11
2373 srli a5, a5, 11
2374
2375 /* Shift the mantissa left by 3 bits (into a5/a4). */
2376 ssai (32 - 3)
2377 src a5, a5, xl
2378 sll a4, xl
2379
2380 /* Shift right by a6. */
2381 ssr a6
2382 sll a7, a4
2383 src a4, a5, a4
2384 srl a5, a5
2385 beqz a7, .Ltrunc_addsign
2386 or a4, a4, a6 /* any positive, nonzero value will work */
2387 j .Ltrunc_addsign
2388
2389 /* Return +/- zero. */
23901: extui a2, xh, 31, 1
2391 slli a2, a2, 31
b7974b3a 2392 leaf_return
134c8a50
BW
2393
2394#endif /* L_truncdfsf2 */
2395
2396#ifdef L_extendsfdf2
2397
2398 .align 4
2399 .global __extendsfdf2
2400 .type __extendsfdf2, @function
2401__extendsfdf2:
b7974b3a 2402 leaf_entry sp, 16
134c8a50
BW
2403
2404 /* Save the sign bit and then shift it off. */
2405 extui a5, a2, 31, 1
2406 slli a5, a5, 31
2407 slli a4, a2, 1
2408
2409 /* Extract and check the exponent. */
2410 extui a6, a2, 23, 8
2411 beqz a6, .Lextend_expzero
2412 addi a6, a6, 1
2413 beqi a6, 256, .Lextend_nan_or_inf
2414
2415 /* Shift >> 3 into a4/xl. */
2416 srli a4, a4, 4
2417 slli xl, a2, (32 - 3)
2418
2419 /* Adjust the exponent bias. */
2420 movi a6, (0x3ff - 0x7f) << 20
2421 add a4, a4, a6
2422
2423 /* Add the sign bit. */
2424 or xh, a4, a5
b7974b3a 2425 leaf_return
134c8a50
BW
2426
2427.Lextend_nan_or_inf:
2428 movi a4, 0x7ff00000
2429
2430 /* Check for NaN. */
2431 slli a7, a2, 9
2432 beqz a7, 1f
2433
2434 slli a6, a6, 11 /* 0x80000 */
2435 or a4, a4, a6
2436
2437 /* Add the sign and return. */
24381: or xh, a4, a5
2439 movi xl, 0
b7974b3a 2440 leaf_return
134c8a50
BW
2441
2442.Lextend_expzero:
2443 beqz a4, 1b
2444
2445 /* Normalize it to have 8 zero bits before the first 1 bit. */
2446 do_nsau a7, a4, a2, a3
2447 addi a7, a7, -8
2448 ssl a7
2449 sll a4, a4
2450
2451 /* Shift >> 3 into a4/xl. */
2452 slli xl, a4, (32 - 3)
2453 srli a4, a4, 3
2454
2455 /* Set the exponent. */
2456 movi a6, 0x3fe - 0x7f
2457 sub a6, a6, a7
2458 slli a6, a6, 20
2459 add a4, a4, a6
2460
2461 /* Add the sign and return. */
2462 or xh, a4, a5
b7974b3a 2463 leaf_return
134c8a50
BW
2464
2465#endif /* L_extendsfdf2 */
2466
2467
66192aa1
DKC
2468#if XCHAL_HAVE_DFP_SQRT
2469#ifdef L_sqrt
2470
2471 .text
2472 .align 4
2473 .global __ieee754_sqrt
2474 .type __ieee754_sqrt, @function
2475__ieee754_sqrt:
2476 leaf_entry sp, 16
2477
2478 wfrd f1, xh, xl
2479
2480 sqrt0.d f2, f1
2481 const.d f4, 0
2482 maddn.d f4, f2, f2
2483 nexp01.d f3, f1
2484 const.d f0, 3
2485 addexp.d f3, f0
2486 maddn.d f0, f4, f3
2487 nexp01.d f4, f1
2488 maddn.d f2, f0, f2
2489 const.d f5, 0
2490 maddn.d f5, f2, f3
2491 const.d f0, 3
2492 maddn.d f0, f5, f2
2493 neg.d f6, f4
2494 maddn.d f2, f0, f2
2495 const.d f0, 0
2496 const.d f5, 0
2497 const.d f7, 0
2498 maddn.d f0, f6, f2
2499 maddn.d f5, f2, f3
2500 const.d f3, 3
2501 maddn.d f7, f3, f2
2502 maddn.d f4, f0, f0
2503 maddn.d f3, f5, f2
2504 neg.d f2, f7
2505 maddn.d f0, f4, f2
2506 maddn.d f7, f3, f7
2507 mksadj.d f2, f1
2508 nexp01.d f1, f1
2509 maddn.d f1, f0, f0
2510 neg.d f3, f7
2511 addexpm.d f0, f2
2512 addexp.d f3, f2
2513 divn.d f0, f1, f3
2514
2515 rfr xl, f0
2516 rfrd xh, f0
2517
2518 leaf_return
2519
2520#endif /* L_sqrt */
2521#endif /* XCHAL_HAVE_DFP_SQRT */
2522
2523#if XCHAL_HAVE_DFP_RECIP
2524#ifdef L_recipdf2
2525 /* Reciprocal */
2526
2527 .align 4
2528 .global __recipdf2
2529 .type __recipdf2, @function
2530__recipdf2:
2531 leaf_entry sp, 16
2532
2533 wfrd f1, xh, xl
2534
2535 recip0.d f0, f1
2536 const.d f2, 2
2537 msub.d f2, f1, f0
2538 mul.d f3, f1, f0
2539 const.d f4, 2
2540 mul.d f5, f0, f2
2541 msub.d f4, f3, f2
2542 const.d f2, 1
2543 mul.d f0, f5, f4
2544 msub.d f2, f1, f0
2545 maddn.d f0, f0, f2
2546
2547 rfr xl, f0
2548 rfrd xh, f0
2549
2550 leaf_return
2551
2552#endif /* L_recipdf2 */
2553#endif /* XCHAL_HAVE_DFP_RECIP */
2554
2555#if XCHAL_HAVE_DFP_RSQRT
2556#ifdef L_rsqrtdf2
2557 /* Reciprocal square root */
2558
2559 .align 4
2560 .global __rsqrtdf2
2561 .type __rsqrtdf2, @function
2562__rsqrtdf2:
2563 leaf_entry sp, 16
2564
2565 wfrd f1, xh, xl
2566
2567 rsqrt0.d f0, f1
2568 mul.d f2, f1, f0
2569 const.d f3, 3
2570 mul.d f4, f3, f0
2571 const.d f5, 1
2572 msub.d f5, f2, f0
2573 maddn.d f0, f4, f5
2574 const.d f2, 1
2575 mul.d f4, f1, f0
2576 mul.d f5, f3, f0
2577 msub.d f2, f4, f0
2578 maddn.d f0, f5, f2
2579 const.d f2, 1
2580 mul.d f1, f1, f0
2581 mul.d f3, f3, f0
2582 msub.d f2, f1, f0
2583 maddn.d f0, f3, f2
2584
2585 rfr xl, f0
2586 rfrd xh, f0
2587
2588 leaf_return
2589
2590#endif /* L_rsqrtdf2 */
2591#endif /* XCHAL_HAVE_DFP_RSQRT */