1 /* ieee754-sf.S single-precision floating point support for ARM
3 Copyright (C) 2003-2021 Free Software Foundation, Inc.
4 Contributed by Nicolas Pitre (nico@fluxnic.net)
6 This file is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 This file is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>. */
28 * The goal of this code is to be as fast as possible. This is
29 * not meant to be easy to understand for the casual reader.
31 * Only the default rounding mode is intended for best performances.
32 * Exceptions aren't supported yet, but that can be added quite easily
33 * if necessary without impacting performances.
35 * In the CFI related comments, 'previousOffset' refers to the previous offset
36 * from sp used to compute the CFA.
42 ARM_FUNC_ALIAS aeabi_fneg negsf2
45 eor r0, r0, #0x80000000 @ flip sign bit
54 #ifdef L_arm_addsubsf3
56 ARM_FUNC_START aeabi_frsub
59 eor r0, r0, #0x80000000 @ flip sign bit of first arg
63 ARM_FUNC_ALIAS aeabi_fsub subsf3
65 eor r1, r1, #0x80000000 @ flip sign bit of second arg
66 #if defined(__INTERWORKING_STUBS__)
67 b 1f @ Skip Thumb-code prologue
71 ARM_FUNC_ALIAS aeabi_fadd addsf3
73 1: @ Look for zeroes, equal values, INF, or NAN.
76 COND(mov,s,ne) r3, r1, lsl #1
78 COND(mvn,s,ne) ip, r2, asr #24
79 COND(mvn,s,ne) ip, r3, asr #24
82 @ Compute exponent difference. Make largest exponent in r2,
83 @ corresponding arg in r0, and positive exponent difference in r3.
85 rsbs r3, r2, r3, lsr #24
94 @ If exponent difference is too large, return largest argument
95 @ already in r0. We need up to 25 bit to handle proper rounding
101 @ Convert mantissa to signed integer.
103 orr r0, r0, #0x00800000
104 bic r0, r0, #0xff000000
108 orr r1, r1, #0x00800000
109 bic r1, r1, #0xff000000
113 @ If exponent == difference, one or both args were denormalized.
114 @ Since this is not common case, rescale them off line.
119 @ Compensate for the exponent overlapping the mantissa MSB added later
122 @ Shift and add second arg to first arg in r0.
123 @ Keep leftover bits into r1.
124 shiftop adds r0 r0 r1 asr r3 ip
126 shift1 lsl, r1, r1, r3
128 @ Keep absolute value in r0-r1, sign in r3 (the n bit was set above)
129 and r3, r0, #0x80000000
131 #if defined(__thumb2__)
133 sbc r0, r0, r0, lsl #1
139 @ Determine how to normalize the result.
146 @ Result needs to be shifted right.
151 @ Make sure we did not bust our exponent.
155 @ Our result is now properly aligned into r0, remaining bits in r1.
156 @ Pack final result together.
157 @ Round with MSB of r1. If halfway between two numbers, round towards
161 adc r0, r0, r2, lsl #23
167 @ Result must be shifted left and exponent adjusted.
173 cmphs r0, #0x00800000
176 @ No rounding necessary since r1 will always be 0 at this point.
179 #if !defined (__ARM_FEATURE_CLZ)
182 moveq r0, r0, lsl #12
202 shift1 lsl, r0, r0, ip
206 @ Final result with sign
207 @ If exponent negative, denormalize result.
209 addge r0, r0, r2, lsl #23
212 #if defined(__thumb2__)
217 orrlt r0, r3, r0, lsr r2
221 @ Fixup and adjust bit position for denormalized arguments.
222 @ Note that r2 must not remain equal to 0.
225 eor r1, r1, #0x00800000
227 eoreq r0, r0, #0x00800000
237 COND(mvn,s,ne) ip, r3, asr #24
243 @ Result is x + 0.0 = x or 0.0 + y = y.
251 @ Result is x - x = 0.
256 @ Result is x + x = 2x.
261 orrcs r0, r0, #0x80000000
263 2: adds r2, r2, #(2 << 24)
265 addcc r0, r0, #(1 << 23)
267 and r3, r0, #0x80000000
269 @ Overflow: return INF.
271 orr r0, r3, #0x7f000000
272 orr r0, r0, #0x00800000
275 @ At least one of r0/r1 is INF/NAN.
276 @ if r0 != INF/NAN: return r1 (which is INF/NAN)
277 @ if r1 != INF/NAN: return r0 (which is INF/NAN)
278 @ if r0 or r1 is NAN: return NAN
279 @ if opposite sign: return NAN
280 @ otherwise return r0 (which is INF or -INF)
285 COND(mvn,s,eq) r3, r3, asr #24
289 COND(mov,s,eq) r3, r1, lsl #9
291 orrne r0, r0, #0x00400000 @ quiet NAN
301 ARM_FUNC_START floatunsisf
302 ARM_FUNC_ALIAS aeabi_ui2f floatunsisf
308 ARM_FUNC_START floatsisf
309 ARM_FUNC_ALIAS aeabi_i2f floatsisf
311 ands r3, r0, #0x80000000
319 @ Add initial exponent to sign
320 orr r3, r3, #((127 + 23) << 23)
334 ARM_FUNC_START floatundisf
335 ARM_FUNC_ALIAS aeabi_ul2f floatundisf
345 ARM_FUNC_START floatdisf
346 ARM_FUNC_ALIAS aeabi_l2f floatdisf
352 ands r3, ah, #0x80000000 @ sign bit in r3
354 #if defined(__thumb2__)
356 sbc ah, ah, ah, lsl #1
368 @ Add initial exponent to sign
369 orr r3, r3, #((127 + 23 + 32) << 23)
371 subeq r3, r3, #(32 << 23)
372 2: sub r3, r3, #(1 << 23)
374 #if !defined (__ARM_FEATURE_CLZ)
379 movhs ip, ip, lsr #16
392 sublo r2, r2, ip, lsr #1
393 subs r2, r2, ip, lsr #3
402 sub r3, r3, r2, lsl #23
405 shiftop add r3 r3 ah lsl r2 ip
406 shift1 lsl, ip, al, r2
409 shiftop adc r0 r3 al lsr r2 r2
415 shift1 lsl, ip, ah, r2
417 orrs al, al, ip, lsl #1
418 shiftop adc r0 r3 ah lsr r2 r2
420 biceq r0, r0, ip, lsr #31
429 #endif /* L_addsubsf3 */
431 #if defined(L_arm_mulsf3) || defined(L_arm_muldivsf3)
433 @ Define multiplication as weak in _arm_mulsf3.o so that it can be overriden
434 @ by the global definition in _arm_muldivsf3.o. This allows a program only
435 @ using multiplication to take the weak definition which does not contain the
436 @ division code. Programs using only division or both division and
437 @ multiplication will pull _arm_muldivsf3.o from which both the multiplication
438 @ and division are taken thanks to the override.
444 ARM_FUNC_START mulsf3
445 ARM_FUNC_ALIAS aeabi_fmul mulsf3
448 @ Mask out exponents, trap any zero/denormal/INF/NAN.
450 ands r2, ip, r0, lsr #23
452 COND(and,s,ne) r3, ip, r1, lsr #23
458 @ Add exponents together
461 @ Determine final sign.
464 @ Convert mantissa to unsigned integer.
465 @ If power of two, branch to a separate path.
466 @ Make up for final alignment.
469 COND(mov,s,ne) r1, r1, lsl #9
472 orr r0, r3, r0, lsr #5
473 orr r1, r3, r1, lsr #5
475 @ The actual multiplication.
476 @ This code works on architecture versions >= 4
479 @ Put final sign in r0.
480 and r0, ip, #0x80000000
482 @ Adjust result upon the MSB position.
486 orrcc r1, r1, r3, lsr #31
489 @ Add sign to result.
492 @ Apply exponent bias, check for under/overflow.
497 @ Round the result, merge final exponent.
499 adc r0, r0, r2, lsl #23
504 @ Multiplication by 0x1p*: let''s shortcut a lot of code.
507 and ip, ip, #0x80000000
510 orr r0, ip, r0, lsr #9
511 orr r0, r0, r1, lsr #9
514 COND(rsb,s,gt) r3, r2, #255
515 orrgt r0, r0, r2, lsl #23
518 @ Under/overflow: fix things up for the code below.
519 orr r0, r0, #0x00800000
527 @ Check if denormalized result is possible, otherwise return signed 0.
530 bicle r0, r0, #0x7fffffff
533 @ Shift value right, round, etc.
536 shift1 lsr, r1, r1, r2
538 shift1 lsl, ip, r0, r2
541 orrs r3, r3, ip, lsl #1
543 biceq r0, r0, ip, lsr #31
546 @ One or both arguments are denormalized.
547 @ Scale them leftwards and preserve sign bit.
550 and ip, r0, #0x80000000
553 tsteq r0, #0x00800000
558 and ip, r1, #0x80000000
561 tsteq r1, #0x00800000
568 @ Isolate the INF and NAN cases away
569 and r3, ip, r1, lsr #23
575 @ Here, one or more arguments are either denormalized or zero.
576 bics ip, r0, #0x80000000
578 COND(bic,s,ne) ip, r1, #0x80000000
581 @ Result is 0, but determine sign anyway.
584 bic r0, r0, #0x7fffffff
587 1: @ One or both args are INF or NAN.
590 teqne r0, #0x80000000
593 teqne r1, #0x80000000
594 beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN
598 bne LSYM(Lml_n) @ NAN * <anything> -> NAN
604 bne LSYM(Lml_n) @ <anything> * NAN -> NAN
606 @ Result is INF, but we need to determine its sign.
610 @ Overflow: return INF (sign already in r0).
612 and r0, r0, #0x80000000
613 orr r0, r0, #0x7f000000
614 orr r0, r0, #0x00800000
617 @ Return a quiet NAN.
619 orr r0, r0, #0x7f000000
620 orr r0, r0, #0x00c00000
627 #ifdef L_arm_muldivsf3
629 ARM_FUNC_START divsf3
630 ARM_FUNC_ALIAS aeabi_fdiv divsf3
633 @ Mask out exponents, trap any zero/denormal/INF/NAN.
635 ands r2, ip, r0, lsr #23
637 COND(and,s,ne) r3, ip, r1, lsr #23
643 @ Subtract divisor exponent from dividend''s
646 @ Preserve final sign into ip.
649 @ Convert mantissa to unsigned integer.
650 @ Dividend -> r3, divisor -> r1.
655 orr r1, r3, r1, lsr #4
656 orr r3, r3, r0, lsr #4
658 @ Initialize r0 (result) with final sign bit.
659 and r0, ip, #0x80000000
661 @ Ensure result will land to known bit position.
662 @ Apply exponent bias accordingly.
666 adc r2, r2, #(127 - 2)
668 @ The actual division loop.
676 subcs r3, r3, r1, lsr #1
677 orrcs r0, r0, ip, lsr #1
680 subcs r3, r3, r1, lsr #2
681 orrcs r0, r0, ip, lsr #2
684 subcs r3, r3, r1, lsr #3
685 orrcs r0, r0, ip, lsr #3
688 COND(mov,s,ne) ip, ip, lsr #4
691 @ Check exponent for under/overflow.
695 @ Round the result, merge final exponent.
697 adc r0, r0, r2, lsl #23
702 @ Division by 0x1p*: let''s shortcut a lot of code.
704 and ip, ip, #0x80000000
705 orr r0, ip, r0, lsr #9
708 COND(rsb,s,gt) r3, r2, #255
709 orrgt r0, r0, r2, lsl #23
712 orr r0, r0, #0x00800000
717 @ One or both arguments are denormalized.
718 @ Scale them leftwards and preserve sign bit.
721 and ip, r0, #0x80000000
724 tsteq r0, #0x00800000
729 and ip, r1, #0x80000000
732 tsteq r1, #0x00800000
738 @ One or both arguments are either INF, NAN, zero or denormalized.
740 and r3, ip, r1, lsr #23
744 bne LSYM(Lml_n) @ NAN / <anything> -> NAN
746 bne LSYM(Lml_i) @ INF / <anything> -> INF
748 b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN
752 beq LSYM(Lml_z) @ <anything> / INF -> 0
754 b LSYM(Lml_n) @ <anything> / NAN -> NAN
755 2: @ If both are nonzero, we need to normalize and resume above.
756 bics ip, r0, #0x80000000
758 COND(bic,s,ne) ip, r1, #0x80000000
760 @ One or both arguments are zero.
761 bics r2, r0, #0x80000000
762 bne LSYM(Lml_i) @ <non_zero> / 0 -> INF
763 bics r3, r1, #0x80000000
764 bne LSYM(Lml_z) @ 0 / <non_zero> -> 0
765 b LSYM(Lml_n) @ 0 / 0 -> NAN
771 #endif /* L_muldivsf3 */
772 #endif /* L_arm_mulsf3 || L_arm_muldivsf3 */
776 @ The return value in r0 is
778 @ 0 if the operands are equal
779 @ 1 if the first operand is greater than the second, or
780 @ the operands are unordered and the operation is
781 @ CMP, LT, LE, NE, or EQ.
782 @ -1 if the first operand is less than the second, or
783 @ the operands are unordered and the operation is GT
786 @ The Z flag will be set iff the operands are equal.
788 @ The following registers are clobbered by this function:
792 ARM_FUNC_ALIAS gesf2 gtsf2
798 ARM_FUNC_ALIAS lesf2 ltsf2
802 ARM_FUNC_START cmpsf2
803 ARM_FUNC_ALIAS nesf2 cmpsf2
804 ARM_FUNC_ALIAS eqsf2 cmpsf2
805 mov ip, #1 @ how should we specify unordered here?
807 1: str ip, [sp, #-4]!
808 .cfi_adjust_cfa_offset 4 @ CFA is now sp + previousOffset + 4.
809 @ We're not adding CFI for ip as it's pushed into the stack only because
810 @ it may be popped off later as a return value (i.e. we're not preserving
813 @ Trap any INF/NAN first.
818 COND(mvn,s,ne) ip, r3, asr #24
821 @ Save the current CFI state. This is done because the branch is conditional,
822 @ and if we don't take it we'll issue a .cfi_adjust_cfa_offset and return.
823 @ If we do take it, however, the .cfi_adjust_cfa_offset from the non-branch
824 @ code will affect the branch code as well. To avoid this we'll restore
825 @ the current state before executing the branch code.
828 @ Note that 0.0 is equal to -0.0.
830 .cfi_adjust_cfa_offset -4 @ CFA is now sp + previousOffset.
832 orrs ip, r2, r3, lsr #1 @ test if both are 0, clear C flag
834 teqne r0, r1 @ if not 0 compare sign
836 COND(sub,s,pl) r0, r2, r3 @ if same sign compare values, set r0
840 movhi r0, r1, asr #31
842 mvnlo r0, r1, asr #31
849 @ Restore the previous CFI state (i.e. keep the CFI state as it was
850 @ before the branch).
857 4: mvns ip, r3, asr #24
860 beq 2b @ r1 is not NAN
862 5: ldr r0, [sp], #4 @ return unordered code.
863 .cfi_adjust_cfa_offset -4 @ CFA is now sp + previousOffset.
875 ARM_FUNC_START aeabi_cfrcmple
883 ARM_FUNC_START aeabi_cfcmpeq
884 ARM_FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq
886 @ The status-returning routines are required to preserve all
887 @ registers except ip, lr, and cpsr.
888 6: do_push {r0, r1, r2, r3, lr}
889 .cfi_adjust_cfa_offset 20 @ CFA is at sp + previousOffset + 20
890 .cfi_rel_offset r0, 0 @ Registers are saved from sp to sp + 16
891 .cfi_rel_offset r1, 4
892 .cfi_rel_offset r2, 8
893 .cfi_rel_offset r3, 12
894 .cfi_rel_offset lr, 16
897 @ Set the Z flag correctly, and the C flag unconditionally.
899 @ Clear the C flag if the return value was -1, indicating
900 @ that the first operand was smaller than the second.
903 RETLDM "r0, r1, r2, r3"
906 FUNC_END aeabi_cfcmple
907 FUNC_END aeabi_cfcmpeq
908 FUNC_END aeabi_cfrcmple
910 ARM_FUNC_START aeabi_fcmpeq
913 str lr, [sp, #-8]! @ sp -= 8
914 .cfi_adjust_cfa_offset 8 @ CFA is now sp + previousOffset + 8
915 .cfi_rel_offset lr, 0 @ lr is at sp
917 ARM_CALL aeabi_cfcmple
919 moveq r0, #1 @ Equal to.
920 movne r0, #0 @ Less than, greater than, or unordered.
924 FUNC_END aeabi_fcmpeq
926 ARM_FUNC_START aeabi_fcmplt
929 str lr, [sp, #-8]! @ sp -= 8
930 .cfi_adjust_cfa_offset 8 @ CFA is now sp + previousOffset + 8
931 .cfi_rel_offset lr, 0 @ lr is at sp
933 ARM_CALL aeabi_cfcmple
935 movcc r0, #1 @ Less than.
936 movcs r0, #0 @ Equal to, greater than, or unordered.
940 FUNC_END aeabi_fcmplt
942 ARM_FUNC_START aeabi_fcmple
945 str lr, [sp, #-8]! @ sp -= 8
946 .cfi_adjust_cfa_offset 8 @ CFA is now sp + previousOffset + 8
947 .cfi_rel_offset lr, 0 @ lr is at sp
949 ARM_CALL aeabi_cfcmple
951 movls r0, #1 @ Less than or equal to.
952 movhi r0, #0 @ Greater than or unordered.
956 FUNC_END aeabi_fcmple
958 ARM_FUNC_START aeabi_fcmpge
961 str lr, [sp, #-8]! @ sp -= 8
962 .cfi_adjust_cfa_offset 8 @ CFA is now sp + previousOffset + 8
963 .cfi_rel_offset lr, 0 @ lr is at sp
965 ARM_CALL aeabi_cfrcmple
967 movls r0, #1 @ Operand 2 is less than or equal to operand 1.
968 movhi r0, #0 @ Operand 2 greater than operand 1, or unordered.
972 FUNC_END aeabi_fcmpge
974 ARM_FUNC_START aeabi_fcmpgt
977 str lr, [sp, #-8]! @ sp -= 8
978 .cfi_adjust_cfa_offset 8 @ CFA is now sp + previousOffset + 8
979 .cfi_rel_offset lr, 0 @ lr is at sp
981 ARM_CALL aeabi_cfrcmple
983 movcc r0, #1 @ Operand 2 is less than operand 1.
984 movcs r0, #0 @ Operand 2 is greater than or equal to operand 1,
985 @ or they are unordered.
989 FUNC_END aeabi_fcmpgt
991 #endif /* L_cmpsf2 */
993 #ifdef L_arm_unordsf2
995 ARM_FUNC_START unordsf2
996 ARM_FUNC_ALIAS aeabi_fcmpun unordsf2
1001 mvns ip, r2, asr #24
1005 1: mvns ip, r3, asr #24
1009 2: mov r0, #0 @ arguments are ordered.
1011 3: mov r0, #1 @ arguments are unordered.
1015 FUNC_END aeabi_fcmpun
1018 #endif /* L_unordsf2 */
1020 #ifdef L_arm_fixsfsi
1022 ARM_FUNC_START fixsfsi
1023 ARM_FUNC_ALIAS aeabi_f2iz fixsfsi
1026 @ check exponent range.
1028 cmp r2, #(127 << 24)
1029 bcc 1f @ value is too small
1031 subs r2, r3, r2, lsr #24
1032 bls 2f @ value is too large
1036 orr r3, r3, #0x80000000
1037 tst r0, #0x80000000 @ the sign bit
1038 shift1 lsr, r0, r3, r2
1046 2: cmp r2, #(127 + 31 - 0xff)
1050 3: ands r0, r0, #0x80000000 @ the sign bit
1052 moveq r0, #0x7fffffff @ the maximum signed positive si
1055 4: mov r0, #0 @ What should we convert NAN to?
1062 #endif /* L_fixsfsi */
1064 #ifdef L_arm_fixunssfsi
1066 ARM_FUNC_START fixunssfsi
1067 ARM_FUNC_ALIAS aeabi_f2uiz fixunssfsi
1070 @ check exponent range.
1072 bcs 1f @ value is negative
1073 cmp r2, #(127 << 24)
1074 bcc 1f @ value is too small
1076 subs r2, r3, r2, lsr #24
1077 bmi 2f @ value is too large
1081 orr r3, r3, #0x80000000
1082 shift1 lsr, r0, r3, r2
1088 2: cmp r2, #(127 + 31 - 0xff)
1092 3: mov r0, #0xffffffff @ maximum unsigned si
1095 4: mov r0, #0 @ What should we convert NAN to?
1099 FUNC_END aeabi_f2uiz
1102 #endif /* L_fixunssfsi */