1 @ libgcc routines for ARM cpu.
2 @ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
4 /* Copyright (C) 1995-2020 Free Software Foundation, Inc.
6 This file is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 This file is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>. */
25 /* An executable stack is *not* required for these functions. */
26 #if defined(__ELF__) && defined(__linux__)
27 .section .note.GNU-stack,"",%progbits
29 #endif /* __ELF__ and __linux__ */
32 /* Some attributes that are common to all routines in this file. */
33 /* Tag_ABI_align_needed: This code does not require 8-byte
34 alignment from the caller. */
35 /* .eabi_attribute 24, 0 -- default setting. */
36 /* Tag_ABI_align_preserved: This code preserves 8-byte
37 alignment in any callee. */
39 #endif /* __ARM_EABI__ */
40 /* ------------------------------------------------------------------------ */
42 /* We need to know what prefix to add to function names. */
44 #ifndef __USER_LABEL_PREFIX__
45 #error __USER_LABEL_PREFIX__ not defined
48 /* ANSI concatenation macros. */
50 #define CONCAT1(a, b) CONCAT2(a, b)
51 #define CONCAT2(a, b) a ## b
53 /* Use the right prefix for global labels. */
55 #define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
59 #define __PLT__ /* Not supported in Thumb assembler (for now). */
60 #elif defined __vxworks && !defined __PIC__
61 #define __PLT__ /* Not supported by the kernel loader. */
65 #define TYPE(x) .type SYM(x),function
66 #define SIZE(x) .size SYM(x), . - SYM(x)
75 /* Function end macros. Variants for interworking. */
77 /* There are times when we might prefer Thumb1 code even if ARM code is
78 permitted, for example, the code might be smaller, or there might be
79 interworking problems with switching to ARM state if interworking is
81 #if (defined(__thumb__) \
82 && !defined(__thumb2__) \
83 && (!defined(__THUMB_INTERWORK__) \
84 || defined (__OPTIMIZE_SIZE__) \
85 || !__ARM_ARCH_ISA_ARM))
86 # define __prefer_thumb__
89 #if !__ARM_ARCH_ISA_ARM && __ARM_ARCH_ISA_THUMB == 1
90 #define NOT_ISA_TARGET_32BIT 1
93 /* How to return from a function call depends on the architecture variant. */
95 #if (__ARM_ARCH > 4) || defined(__ARM_ARCH_4T__)
98 # define RETc(x) bx##x lr
100 /* Special precautions for interworking on armv4t. */
101 # if (__ARM_ARCH == 4)
103 /* Always use bx, not ldr pc. */
104 # if (defined(__thumb__) || defined(__THUMB_INTERWORK__))
105 # define __INTERWORKING__
106 # endif /* __THUMB__ || __THUMB_INTERWORK__ */
108 /* Include thumb stub before arm mode code. */
109 # if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
110 # define __INTERWORKING_STUBS__
111 # endif /* __thumb__ && !__THUMB_INTERWORK__ */
113 #endif /* __ARM_ARCH == 4 */
117 # define RET mov pc, lr
118 # define RETc(x) mov##x pc, lr
122 .macro cfi_pop advance, reg, cfa_offset
124 .pushsection .debug_frame
125 .byte 0x4 /* DW_CFA_advance_loc4 */
127 .byte (0xc0 | \reg) /* DW_CFA_restore */
128 .byte 0xe /* DW_CFA_def_cfa_offset */
133 .macro cfi_push advance, reg, offset, cfa_offset
135 .pushsection .debug_frame
136 .byte 0x4 /* DW_CFA_advance_loc4 */
138 .byte (0x80 | \reg) /* DW_CFA_offset */
139 .uleb128 (\offset / -4)
140 .byte 0xe /* DW_CFA_def_cfa_offset */
145 .macro cfi_start start_label, end_label
147 .pushsection .debug_frame
149 .4byte LSYM(Lend_cie) - LSYM(Lstart_cie) @ Length of CIE
151 .4byte 0xffffffff @ CIE Identifier Tag
152 .byte 0x1 @ CIE Version
153 .ascii "\0" @ CIE Augmentation
154 .uleb128 0x1 @ CIE Code Alignment Factor
155 .sleb128 -4 @ CIE Data Alignment Factor
156 .byte 0xe @ CIE RA Column
157 .byte 0xc @ DW_CFA_def_cfa
163 .4byte LSYM(Lend_fde)-LSYM(Lstart_fde) @ FDE Length
165 .4byte LSYM(Lstart_frame) @ FDE CIE offset
166 .4byte \start_label @ FDE initial location
167 .4byte \end_label-\start_label @ FDE address range
171 .macro cfi_end end_label
173 .pushsection .debug_frame
181 /* Don't pass dirn, it's there just to get token pasting right. */
183 .macro RETLDM regs=, cond=, unwind=, dirn=ia
184 #if defined (__INTERWORKING__)
186 ldr\cond lr, [sp], #8
188 # if defined(__thumb2__)
191 ldm\cond\dirn sp!, {\regs, lr}
195 /* Mark LR as restored. */
196 97: cfi_pop 97b - \unwind, 0xe, 0x0
200 /* Caller is responsible for providing IT instruction. */
202 ldr\cond pc, [sp], #8
204 # if defined(__thumb2__)
207 ldm\cond\dirn sp!, {\regs, pc}
213 /* The Unified assembly syntax allows the same code to be assembled for both
214 ARM and Thumb-2. However this is only supported by recent gas, so define
215 a set of macros to allow ARM code on older assemblers. */
216 #if defined(__thumb2__)
217 .macro do_it cond, suffix=""
220 .macro shift1 op, arg0, arg1, arg2
221 \op \arg0, \arg1, \arg2
225 #define COND(op1, op2, cond) op1 ## op2 ## cond
226 /* Perform an arithmetic operation with a variable shift operand. This
227 requires two instructions and a scratch register on Thumb-2. */
228 .macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
229 \shiftop \tmp, \src2, \shiftreg
230 \name \dest, \src1, \tmp
233 .macro do_it cond, suffix=""
235 .macro shift1 op, arg0, arg1, arg2
236 mov \arg0, \arg1, \op \arg2
238 #define do_push stmfd sp!,
239 #define do_pop ldmfd sp!,
240 #define COND(op1, op2, cond) op1 ## cond ## op2
241 .macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
242 \name \dest, \src1, \src2, \shiftop \shiftreg
247 .macro ARM_LDIV0 name signed
249 .ifc \signed, unsigned
250 movne r0, #0xffffffff
252 movgt r0, #0x7fffffff
253 movlt r0, #0x80000000
255 b SYM (__aeabi_idiv0) __PLT__
258 .macro ARM_LDIV0 name signed
260 98: cfi_push 98b - __\name, 0xe, -0x8, 0x8
261 bl SYM (__div0) __PLT__
262 mov r0, #0 @ About as wrong as it could be.
269 .macro THUMB_LDIV0 name signed
270 #ifdef NOT_ISA_TARGET_32BIT
274 bl SYM(__aeabi_idiv0)
275 @ We know we are not on armv4t, so pop pc is safe.
278 #elif defined(__thumb2__)
280 .ifc \signed, unsigned
287 movgt r0, #0x7fffffff
289 movlt r0, #0x80000000
291 b.w SYM(__aeabi_idiv0) __PLT__
298 .ifc \signed, unsigned
299 movne r0, #0xffffffff
301 movgt r0, #0x7fffffff
302 movlt r0, #0x80000000
304 b SYM(__aeabi_idiv0) __PLT__
309 .macro THUMB_LDIV0 name signed
311 98: cfi_push 98b - __\name, 0xe, -0x4, 0x8
313 mov r0, #0 @ About as wrong as it could be.
314 #if defined (__INTERWORKING__)
327 .macro DIV_FUNC_END name signed
328 cfi_start __\name, LSYM(Lend_div0)
331 THUMB_LDIV0 \name \signed
333 ARM_LDIV0 \name \signed
335 cfi_end LSYM(Lend_div0)
339 .macro THUMB_FUNC_START name
346 /* Function start macros. Variants for ARM and Thumb. */
349 #define THUMB_FUNC .thumb_func
350 #define THUMB_CODE .force_thumb
351 # if defined(__thumb2__)
352 #define THUMB_SYNTAX .syntax divided
362 .macro FUNC_START name
373 .macro ARM_SYM_START name
383 /* Special function that will always be coded in ARM assembly, even if
384 in Thumb-only compilation. */
386 #if defined(__thumb2__)
388 /* For Thumb-2 we build everything in thumb mode. */
389 .macro ARM_FUNC_START name
393 #define EQUIV .thumb_set
398 #elif defined(__INTERWORKING_STUBS__)
400 .macro ARM_FUNC_START name
405 /* A hook to tell gdb that we've switched to ARM mode. Also used to call
406 directly from other local arm routines. */
409 #define EQUIV .thumb_set
410 /* Branch directly to a function declared with ARM_FUNC_START.
411 Must be called in arm mode. */
416 #else /* !(__INTERWORKING_STUBS__ || __thumb2__) */
418 #ifdef NOT_ISA_TARGET_32BIT
419 #define EQUIV .thumb_set
421 .macro ARM_FUNC_START name
437 .macro FUNC_ALIAS new old
439 #if defined (__thumb__)
440 .thumb_set SYM (__\new), SYM (__\old)
442 .set SYM (__\new), SYM (__\old)
446 #ifndef NOT_ISA_TARGET_32BIT
447 .macro ARM_FUNC_ALIAS new old
449 EQUIV SYM (__\new), SYM (__\old)
450 #if defined(__INTERWORKING_STUBS__)
451 .set SYM (_L__\new), SYM (_L__\old)
475 /* Register aliases. */
477 work .req r4 @ XXXX is this safe ?
491 /* ------------------------------------------------------------------------ */
492 /* Bodies of the division and modulo routines. */
493 /* ------------------------------------------------------------------------ */
494 .macro ARM_DIV_BODY dividend, divisor, result, curbit
496 #if defined (__ARM_FEATURE_CLZ) && ! defined (__OPTIMIZE_SIZE__)
498 #if defined (__thumb2__)
499 clz \curbit, \dividend
500 clz \result, \divisor
501 sub \curbit, \result, \curbit
502 rsb \curbit, \curbit, #31
504 add \curbit, \result, \curbit, lsl #4
511 .set shift, shift - 1
512 cmp.w \dividend, \divisor, lsl #shift
514 adc.w \result, \result, \result
516 subcs.w \dividend, \dividend, \divisor, lsl #shift
519 clz \curbit, \dividend
520 clz \result, \divisor
521 sub \curbit, \result, \curbit
522 rsbs \curbit, \curbit, #31
523 addne \curbit, \curbit, \curbit, lsl #1
525 addne pc, pc, \curbit, lsl #2
529 .set shift, shift - 1
530 cmp \dividend, \divisor, lsl #shift
531 adc \result, \result, \result
532 subcs \dividend, \dividend, \divisor, lsl #shift
536 #else /* !defined (__ARM_FEATURE_CLZ) || defined (__OPTIMIZE_SIZE__) */
537 #if defined (__ARM_FEATURE_CLZ)
539 clz \curbit, \divisor
540 clz \result, \dividend
541 sub \result, \curbit, \result
543 mov \divisor, \divisor, lsl \result
544 mov \curbit, \curbit, lsl \result
547 #else /* !defined (__ARM_FEATURE_CLZ) */
549 @ Initially shift the divisor left 3 bits if possible,
550 @ set curbit accordingly. This allows for curbit to be located
551 @ at the left end of each 4-bit nibbles in the division loop
552 @ to save one loop in most cases.
553 tst \divisor, #0xe0000000
554 moveq \divisor, \divisor, lsl #3
558 @ Unless the divisor is very big, shift it up in multiples of
559 @ four bits, since this is the amount of unwinding in the main
560 @ division loop. Continue shifting until the divisor is
561 @ larger than the dividend.
562 1: cmp \divisor, #0x10000000
563 cmplo \divisor, \dividend
564 movlo \divisor, \divisor, lsl #4
565 movlo \curbit, \curbit, lsl #4
568 @ For very big divisors, we must shift it a bit at a time, or
569 @ we will be in danger of overflowing.
570 1: cmp \divisor, #0x80000000
571 cmplo \divisor, \dividend
572 movlo \divisor, \divisor, lsl #1
573 movlo \curbit, \curbit, lsl #1
578 #endif /* !defined (__ARM_FEATURE_CLZ) */
581 1: cmp \dividend, \divisor
583 subhs \dividend, \dividend, \divisor
584 orrhs \result, \result, \curbit
585 cmp \dividend, \divisor, lsr #1
587 subhs \dividend, \dividend, \divisor, lsr #1
588 orrhs \result, \result, \curbit, lsr #1
589 cmp \dividend, \divisor, lsr #2
591 subhs \dividend, \dividend, \divisor, lsr #2
592 orrhs \result, \result, \curbit, lsr #2
593 cmp \dividend, \divisor, lsr #3
595 subhs \dividend, \dividend, \divisor, lsr #3
596 orrhs \result, \result, \curbit, lsr #3
597 cmp \dividend, #0 @ Early termination?
599 movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
600 movne \divisor, \divisor, lsr #4
603 #endif /* !defined (__ARM_FEATURE_CLZ) || defined (__OPTIMIZE_SIZE__) */
606 /* ------------------------------------------------------------------------ */
607 .macro ARM_DIV2_ORDER divisor, order
609 #if defined (__ARM_FEATURE_CLZ)
612 rsb \order, \order, #31
616 cmp \divisor, #(1 << 16)
617 movhs \divisor, \divisor, lsr #16
621 cmp \divisor, #(1 << 8)
622 movhs \divisor, \divisor, lsr #8
623 addhs \order, \order, #8
625 cmp \divisor, #(1 << 4)
626 movhs \divisor, \divisor, lsr #4
627 addhs \order, \order, #4
629 cmp \divisor, #(1 << 2)
630 addhi \order, \order, #3
631 addls \order, \order, \divisor, lsr #1
636 /* ------------------------------------------------------------------------ */
637 .macro ARM_MOD_BODY dividend, divisor, order, spare
639 #if defined(__ARM_FEATURE_CLZ) && ! defined (__OPTIMIZE_SIZE__)
642 clz \spare, \dividend
643 sub \order, \order, \spare
644 rsbs \order, \order, #31
645 addne pc, pc, \order, lsl #3
649 .set shift, shift - 1
650 cmp \dividend, \divisor, lsl #shift
651 subcs \dividend, \dividend, \divisor, lsl #shift
654 #else /* !defined (__ARM_FEATURE_CLZ) || defined (__OPTIMIZE_SIZE__) */
655 #if defined (__ARM_FEATURE_CLZ)
658 clz \spare, \dividend
659 sub \order, \order, \spare
660 mov \divisor, \divisor, lsl \order
662 #else /* !defined (__ARM_FEATURE_CLZ) */
666 @ Unless the divisor is very big, shift it up in multiples of
667 @ four bits, since this is the amount of unwinding in the main
668 @ division loop. Continue shifting until the divisor is
669 @ larger than the dividend.
670 1: cmp \divisor, #0x10000000
671 cmplo \divisor, \dividend
672 movlo \divisor, \divisor, lsl #4
673 addlo \order, \order, #4
676 @ For very big divisors, we must shift it a bit at a time, or
677 @ we will be in danger of overflowing.
678 1: cmp \divisor, #0x80000000
679 cmplo \divisor, \dividend
680 movlo \divisor, \divisor, lsl #1
681 addlo \order, \order, #1
684 #endif /* !defined (__ARM_FEATURE_CLZ) */
686 @ Perform all needed substractions to keep only the reminder.
687 @ Do comparisons in batch of 4 first.
688 subs \order, \order, #3 @ yes, 3 is intended here
691 1: cmp \dividend, \divisor
692 subhs \dividend, \dividend, \divisor
693 cmp \dividend, \divisor, lsr #1
694 subhs \dividend, \dividend, \divisor, lsr #1
695 cmp \dividend, \divisor, lsr #2
696 subhs \dividend, \dividend, \divisor, lsr #2
697 cmp \dividend, \divisor, lsr #3
698 subhs \dividend, \dividend, \divisor, lsr #3
700 mov \divisor, \divisor, lsr #4
701 subges \order, \order, #4
708 @ Either 1, 2 or 3 comparison/substractions are left.
712 cmp \dividend, \divisor
713 subhs \dividend, \dividend, \divisor
714 mov \divisor, \divisor, lsr #1
715 3: cmp \dividend, \divisor
716 subhs \dividend, \dividend, \divisor
717 mov \divisor, \divisor, lsr #1
718 4: cmp \dividend, \divisor
719 subhs \dividend, \dividend, \divisor
722 #endif /* !defined (__ARM_FEATURE_CLZ) || defined (__OPTIMIZE_SIZE__) */
725 /* ------------------------------------------------------------------------ */
726 .macro THUMB_DIV_MOD_BODY modulo
727 @ Load the constant 0x10000000 into our work register.
731 @ Unless the divisor is very big, shift it up in multiples of
732 @ four bits, since this is the amount of unwinding in the main
733 @ division loop. Continue shifting until the divisor is
734 @ larger than the dividend.
737 cmp divisor, dividend
743 @ Set work to 0x80000000
746 @ For very big divisors, we must shift it a bit at a time, or
747 @ we will be in danger of overflowing.
750 cmp divisor, dividend
756 @ Test for possible subtractions ...
758 @ ... On the final pass, this may subtract too much from the dividend,
759 @ so keep track of which subtractions are done, we can fix them up
762 cmp dividend, divisor
764 sub dividend, dividend, divisor
766 lsr work, divisor, #1
769 sub dividend, dividend, work
776 lsr work, divisor, #2
779 sub dividend, dividend, work
786 lsr work, divisor, #3
789 sub dividend, dividend, work
798 @ ... and note which bits are done in the result. On the final pass,
799 @ this may subtract too much from the dividend, but the result will be ok,
800 @ since the "bit" will have been shifted out at the bottom.
801 cmp dividend, divisor
803 sub dividend, dividend, divisor
804 orr result, result, curbit
806 lsr work, divisor, #1
809 sub dividend, dividend, work
813 lsr work, divisor, #2
816 sub dividend, dividend, work
820 lsr work, divisor, #3
823 sub dividend, dividend, work
829 cmp dividend, #0 @ Early termination?
831 lsr curbit, #4 @ No, any more bits to do?
837 @ Any subtractions that we should not have done will be recorded in
838 @ the top three bits of "overdone". Exactly which were not needed
839 @ are governed by the position of the bit, stored in ip.
843 beq LSYM(Lgot_result)
845 @ If we terminated early, because dividend became zero, then the
846 @ bit in ip will not be in the bottom nibble, and we should not
847 @ perform the additions below. We must test for this though
848 @ (rather relying upon the TSTs to prevent the additions) since
849 @ the bit in ip could be in the top two bits which might then match
850 @ with one of the smaller RORs.
854 beq LSYM(Lgot_result)
861 lsr work, divisor, #3
869 lsr work, divisor, #2
876 beq LSYM(Lgot_result)
877 lsr work, divisor, #1
883 /* If performance is preferred, the following functions are provided. */
884 #if defined(__prefer_thumb__) && !defined(__OPTIMIZE_SIZE__)
886 /* Branch to div(n), and jump to label if curbit is lo than divisior. */
887 .macro BranchToDiv n, label
888 lsr curbit, dividend, \n
893 /* Body of div(n). Shift the divisor in n bits and compare the divisor
894 and dividend. Update the dividend as the substruction result. */
896 lsr curbit, dividend, \n
899 lsl curbit, divisor, \n
900 sub dividend, dividend, curbit
902 1: adc result, result
905 /* The body of division with positive divisor. Unless the divisor is very
906 big, shift it up in multiples of four bits, since this is the amount of
907 unwinding in the main division loop. Continue shifting until the divisor
908 is larger than the dividend. */
909 .macro THUMB1_Div_Positive
911 BranchToDiv #1, LSYM(Lthumb1_div1)
912 BranchToDiv #4, LSYM(Lthumb1_div4)
913 BranchToDiv #8, LSYM(Lthumb1_div8)
914 BranchToDiv #12, LSYM(Lthumb1_div12)
915 BranchToDiv #16, LSYM(Lthumb1_div16)
916 LSYM(Lthumb1_div_large_positive):
918 lsl divisor, divisor, #8
920 lsr curbit, dividend, #16
924 lsl divisor, divisor, #8
925 beq LSYM(Ldivbyzero_waypoint)
927 1: lsr curbit, dividend, #12
929 blo LSYM(Lthumb1_div12)
930 b LSYM(Lthumb1_div16)
931 LSYM(Lthumb1_div_loop):
932 lsr divisor, divisor, #8
943 bcs LSYM(Lthumb1_div_loop)
957 sub divisor, dividend, divisor
959 cpy divisor, dividend
961 1: adc result, result
965 LSYM(Ldivbyzero_waypoint):
969 /* The body of division with negative divisor. Similar with
970 THUMB1_Div_Positive except that the shift steps are in multiples
972 .macro THUMB1_Div_Negative
973 lsr result, divisor, #31
977 1: asr curbit, dividend, #32
979 neg dividend, dividend
981 2: eor curbit, result
984 BranchToDiv #4, LSYM(Lthumb1_div_negative4)
985 BranchToDiv #8, LSYM(Lthumb1_div_negative8)
986 LSYM(Lthumb1_div_large):
988 lsl divisor, divisor, #6
990 lsr curbit, dividend, #8
992 blo LSYM(Lthumb1_div_negative8)
994 lsl divisor, divisor, #6
995 asr result, result, #6
997 blo LSYM(Lthumb1_div_negative8)
999 lsl divisor, divisor, #6
1000 asr result, result, #6
1002 blo LSYM(Lthumb1_div_negative8)
1004 lsl divisor, divisor, #6
1005 beq LSYM(Ldivbyzero_negative)
1006 asr result, result, #6
1007 b LSYM(Lthumb1_div_negative8)
1008 LSYM(Lthumb1_div_negative_loop):
1009 lsr divisor, divisor, #6
1010 LSYM(Lthumb1_div_negative8):
1015 LSYM(Lthumb1_div_negative4):
1018 bcs LSYM(Lthumb1_div_negative_loop)
1020 sub divisor, dividend, divisor
1022 cpy divisor, dividend
1026 asr curbit, curbit, #1
1027 cpy dividend, result
1029 neg dividend, dividend
1033 neg divisor, divisor
1037 LSYM(Ldivbyzero_negative):
1039 asr curbit, curbit, #1
1041 neg dividend, dividend
1043 #endif /* ARM Thumb version. */
1045 /* ------------------------------------------------------------------------ */
1046 /* Start of the Real Functions */
1047 /* ------------------------------------------------------------------------ */
1050 #if defined(__prefer_thumb__)
1053 FUNC_ALIAS aeabi_uidiv udivsi3
1054 #if defined(__OPTIMIZE_SIZE__)
1058 LSYM(udivsi3_skip_div0_test):
1063 cmp dividend, divisor
1064 blo LSYM(Lgot_result)
1066 THUMB_DIV_MOD_BODY 0
1072 /* Implementation of aeabi_uidiv for ARMv6m. This version is only
1073 used in ARMv6-M when we need an efficient implementation. */
1075 LSYM(udivsi3_skip_div0_test):
1078 #endif /* __OPTIMIZE_SIZE__ */
1080 #elif defined(__ARM_ARCH_EXT_IDIV__)
1082 ARM_FUNC_START udivsi3
1083 ARM_FUNC_ALIAS aeabi_uidiv udivsi3
1091 #else /* ARM version/Thumb-2. */
1093 ARM_FUNC_START udivsi3
1094 ARM_FUNC_ALIAS aeabi_uidiv udivsi3
1096 /* Note: if called via udivsi3_skip_div0_test, this will unnecessarily
1097 check for division-by-zero a second time. */
1098 LSYM(udivsi3_skip_div0_test):
1108 ARM_DIV_BODY r0, r1, r2, r3
1118 12: ARM_DIV2_ORDER r1, r2
1123 #endif /* ARM version */
1125 DIV_FUNC_END udivsi3 unsigned
1127 #if defined(__prefer_thumb__)
1128 FUNC_START aeabi_uidivmod
1131 # if defined(__OPTIMIZE_SIZE__)
1133 bl LSYM(udivsi3_skip_div0_test)
1139 /* Both the quotient and remainder are calculated simultaneously
1140 in THUMB1_Div_Positive. There is no need to calculate the
1141 remainder again here. */
1142 b LSYM(udivsi3_skip_div0_test)
1144 # endif /* __OPTIMIZE_SIZE__ */
1146 #elif defined(__ARM_ARCH_EXT_IDIV__)
1147 ARM_FUNC_START aeabi_uidivmod
1155 ARM_FUNC_START aeabi_uidivmod
1158 stmfd sp!, { r0, r1, lr }
1159 bl LSYM(udivsi3_skip_div0_test)
1160 ldmfd sp!, { r1, r2, lr }
1165 FUNC_END aeabi_uidivmod
1167 #endif /* L_udivsi3 */
1168 /* ------------------------------------------------------------------------ */
1171 #if defined(__ARM_ARCH_EXT_IDIV__) && __ARM_ARCH_ISA_THUMB != 1
1173 ARM_FUNC_START umodsi3
1181 #elif defined(__thumb__)
1188 cmp dividend, divisor
1195 THUMB_DIV_MOD_BODY 1
1200 #else /* ARM version. */
1204 subs r2, r1, #1 @ compare divisor with 1
1206 cmpne r0, r1 @ compare dividend with divisor
1208 tsthi r1, r2 @ see if divisor is power of 2
1212 ARM_MOD_BODY r0, r1, r2, r3
1216 #endif /* ARM version. */
1218 DIV_FUNC_END umodsi3 unsigned
1220 #endif /* L_umodsi3 */
1221 /* ------------------------------------------------------------------------ */
1224 #if defined(__prefer_thumb__)
1227 FUNC_ALIAS aeabi_idiv divsi3
1228 #if defined(__OPTIMIZE_SIZE__)
1232 LSYM(divsi3_skip_div0_test):
1235 eor work, divisor @ Save the sign of the result.
1241 neg divisor, divisor @ Loops below use unsigned.
1245 neg dividend, dividend
1247 cmp dividend, divisor
1248 blo LSYM(Lgot_result)
1250 THUMB_DIV_MOD_BODY 0
1261 /* Implementation of aeabi_idiv for ARMv6m. This version is only
1262 used in ARMv6-M when we need an efficient implementation. */
1264 LSYM(divsi3_skip_div0_test):
1265 cpy curbit, dividend
1267 bmi LSYM(Lthumb1_div_negative)
1269 LSYM(Lthumb1_div_positive):
1272 LSYM(Lthumb1_div_negative):
1275 #endif /* __OPTIMIZE_SIZE__ */
1277 #elif defined(__ARM_ARCH_EXT_IDIV__)
1279 ARM_FUNC_START divsi3
1280 ARM_FUNC_ALIAS aeabi_idiv divsi3
1287 #else /* ARM/Thumb-2 version. */
1289 ARM_FUNC_START divsi3
1290 ARM_FUNC_ALIAS aeabi_idiv divsi3
1294 LSYM(divsi3_skip_div0_test):
1295 eor ip, r0, r1 @ save the sign of the result.
1297 rsbmi r1, r1, #0 @ loops below use unsigned.
1298 subs r2, r1, #1 @ division by 1 or -1 ?
1302 rsbmi r3, r0, #0 @ positive dividend value
1305 tst r1, r2 @ divisor is power of 2 ?
1308 ARM_DIV_BODY r3, r1, r0, r2
1315 10: teq ip, r0 @ same sign ?
1323 moveq r0, ip, asr #31
1327 12: ARM_DIV2_ORDER r1, r2
1335 #endif /* ARM version */
1337 DIV_FUNC_END divsi3 signed
1339 #if defined(__prefer_thumb__)
1340 FUNC_START aeabi_idivmod
1343 # if defined(__OPTIMIZE_SIZE__)
1345 bl LSYM(divsi3_skip_div0_test)
1351 /* Both the quotient and remainder are calculated simultaneously
1352 in THUMB1_Div_Positive and THUMB1_Div_Negative. There is no
1353 need to calculate the remainder again here. */
1354 b LSYM(divsi3_skip_div0_test)
1356 # endif /* __OPTIMIZE_SIZE__ */
1358 #elif defined(__ARM_ARCH_EXT_IDIV__)
1359 ARM_FUNC_START aeabi_idivmod
1367 ARM_FUNC_START aeabi_idivmod
1370 stmfd sp!, { r0, r1, lr }
1371 bl LSYM(divsi3_skip_div0_test)
1372 ldmfd sp!, { r1, r2, lr }
1377 FUNC_END aeabi_idivmod
1379 #endif /* L_divsi3 */
1380 /* ------------------------------------------------------------------------ */
1383 #if defined(__ARM_ARCH_EXT_IDIV__) && __ARM_ARCH_ISA_THUMB != 1
1385 ARM_FUNC_START modsi3
1394 #elif defined(__thumb__)
1402 neg divisor, divisor @ Loops below use unsigned.
1405 @ Need to save the sign of the dividend, unfortunately, we need
1406 @ work later on. Must do this after saving the original value of
1407 @ the work register, because we will pop this value off first.
1411 neg dividend, dividend
1413 cmp dividend, divisor
1414 blo LSYM(Lgot_result)
1416 THUMB_DIV_MOD_BODY 1
1421 neg dividend, dividend
1426 #else /* ARM version. */
1432 rsbmi r1, r1, #0 @ loops below use unsigned.
1433 movs ip, r0 @ preserve sign of dividend
1434 rsbmi r0, r0, #0 @ if negative make positive
1435 subs r2, r1, #1 @ compare divisor with 1
1436 cmpne r0, r1 @ compare dividend with divisor
1438 tsthi r1, r2 @ see if divisor is power of 2
1442 ARM_MOD_BODY r0, r1, r2, r3
1448 #endif /* ARM version */
1450 DIV_FUNC_END modsi3 signed
1452 #endif /* L_modsi3 */
1453 /* ------------------------------------------------------------------------ */
1459 FUNC_START aeabi_idiv0
1460 FUNC_START aeabi_ldiv0
1462 FUNC_END aeabi_ldiv0
1463 FUNC_END aeabi_idiv0
1470 #endif /* L_divmodsi_tools */
1471 /* ------------------------------------------------------------------------ */
1473 @ GNU/Linux division-by zero handler. Used in place of L_dvmd_tls
1475 /* Constant taken from <asm/signal.h>. */
1479 cfi_start __aeabi_ldiv0, LSYM(Lend_aeabi_ldiv0)
1482 ARM_FUNC_START aeabi_idiv0
1483 ARM_FUNC_START aeabi_ldiv0
1485 98: cfi_push 98b - __aeabi_ldiv0, 0xe, -0x4, 0x8
1487 cfi_start __div0, LSYM(Lend_div0)
1490 98: cfi_push 98b - __div0, 0xe, -0x4, 0x8
1494 bl SYM(raise) __PLT__
1495 RETLDM r1 unwind=98b
1498 cfi_end LSYM(Lend_aeabi_ldiv0)
1499 FUNC_END aeabi_ldiv0
1500 FUNC_END aeabi_idiv0
1502 cfi_end LSYM(Lend_div0)
1506 #endif /* L_dvmd_lnx */
1507 #ifdef L_clear_cache
1508 #if defined __ARM_EABI__ && defined __linux__
1509 @ EABI GNU/Linux call to cacheflush syscall.
1510 ARM_FUNC_START clear_cache
1512 #if __ARM_ARCH >= 7 || defined(__ARM_ARCH_6T2__)
1523 FUNC_END clear_cache
1525 #error "This is only for ARM EABI GNU/Linux"
1527 #endif /* L_clear_cache */
1529 #ifdef L_speculation_barrier
1530 FUNC_START speculation_barrier
1534 #elif defined __ARM_EABI__ && defined __linux__
1535 /* We don't have a speculation barrier directly for this
1536 platform/architecture variant. But we can use a kernel
1537 clear_cache service routine which will emit such instructions
1538 if run on a later version of the architecture. We don't
1539 really want to flush the cache, but we must give it a valid
1540 address, so just clear pc..pc+1. */
1541 #if defined __thumb__ && !defined __thumb2__
1553 #ifdef __ARM_ARCH_6T2__
1560 add r0, pc, #0 /* ADR. */
1565 #endif /* Thumb1 only */
1567 #warning "No speculation barrier defined for this platform"
1570 FUNC_END speculation_barrier
1572 /* ------------------------------------------------------------------------ */
1573 /* Dword shift operations. */
1574 /* All the following Dword shift variants rely on the fact that
1577 shft xxx, (Reg & 255)
1578 so for Reg value in (32...63) and (-1...-31) we will get zero (in the
1579 case of logical shifts) or the sign (for asr). */
1589 /* Prevent __aeabi double-word shifts from being produced on SymbianOS. */
1595 FUNC_ALIAS aeabi_llsr lshrdi3
1613 movmi al, al, lsr r2
1614 movpl al, ah, lsr r3
1615 orrmi al, al, ah, lsl ip
1627 FUNC_ALIAS aeabi_lasr ashrdi3
1634 @ If r2 is negative at this point the following step would OR
1635 @ the sign bit into all of AL. That's not what we want...
1649 movmi al, al, lsr r2
1650 movpl al, ah, asr r3
1651 orrmi al, al, ah, lsl ip
1664 FUNC_ALIAS aeabi_llsl ashldi3
1682 movmi ah, ah, lsl r2
1683 movpl ah, al, lsl r3
1684 orrmi ah, ah, al, lsr ip
1693 #endif /* __symbian__ */
1696 #ifdef NOT_ISA_TARGET_32BIT
1701 cmp r0, r3 /* 0x10000 */
1706 cmp r0, r3 /* #0x100 */
1711 cmp r0, r3 /* #0x10 */
1721 .byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
1724 ARM_FUNC_START clzsi2
1725 # if defined (__ARM_FEATURE_CLZ)
1732 movcs r0, r0, lsr #16
1736 movcs r0, r0, lsr #8
1740 movcs r0, r0, lsr #4
1748 .byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
1749 # endif /* !defined (__ARM_FEATURE_CLZ) */
1752 #endif /* L_clzsi2 */
1755 #if !defined (__ARM_FEATURE_CLZ)
1757 # ifdef NOT_ISA_TARGET_32BIT
1761 ARM_FUNC_START clzdi2
1782 # ifdef NOT_ISA_TARGET_32BIT
1789 #else /* defined (__ARM_FEATURE_CLZ) */
1791 ARM_FUNC_START clzdi2
1801 #endif /* L_clzdi2 */
1804 #ifdef NOT_ISA_TARGET_32BIT
1811 cmp r0, r3 /* 0x10000 */
1816 cmp r0, r3 /* #0x100 */
1821 cmp r0, r3 /* #0x10 */
1831 .byte 27, 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31
1834 ARM_FUNC_START ctzsi2
1837 # if defined (__ARM_FEATURE_CLZ)
1845 movcs r0, r0, lsr #16
1849 movcs r0, r0, lsr #8
1853 movcs r0, r0, lsr #4
1861 .byte 27, 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31
1862 # endif /* !defined (__ARM_FEATURE_CLZ) */
1865 #endif /* L_clzsi2 */
1867 /* ------------------------------------------------------------------------ */
1868 /* These next two sections are here despite the fact that they contain Thumb
1869 assembler because their presence allows interworked code to be linked even
1870 when the GCC library is this one. */
1872 /* Do not build the interworking functions when the target architecture does
1873 not support Thumb instructions. (This can be a multilib option). */
1874 #if defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__\
1875 || defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__ \
1878 #if defined L_call_via_rX
1880 /* These labels & instructions are used by the Arm/Thumb interworking code.
1881 The address of function to be called is loaded into a register and then
1882 one of these labels is called via a BL instruction. This puts the
1883 return address into the link register with the bottom bit set, and the
1884 code here switches to the correct mode before executing the function. */
1890 .macro call_via register
1891 THUMB_FUNC_START _call_via_\register
1896 SIZE (_call_via_\register)
1915 #endif /* L_call_via_rX */
1917 /* Don't bother with the old interworking routines for Thumb-2. */
1918 /* ??? Maybe only omit these on "m" variants. */
1919 #if !defined(__thumb2__) && __ARM_ARCH_ISA_ARM
1921 #if defined L_interwork_call_via_rX
1923 /* These labels & instructions are used by the Arm/Thumb interworking code,
1924 when the target address is in an unknown instruction set. The address
1925 of function to be called is loaded into a register and then one of these
1926 labels is called via a BL instruction. This puts the return address
1927 into the link register with the bottom bit set, and the code here
1928 switches to the correct mode before executing the function. Unfortunately
1929 the target code cannot be relied upon to return via a BX instruction, so
1930 instead we have to store the resturn address on the stack and allow the
1931 called function to return here instead. Upon return we recover the real
1932 return address and use a BX to get back to Thumb mode.
1934 There are three variations of this code. The first,
1935 _interwork_call_via_rN(), will push the return address onto the
1936 stack and pop it in _arm_return(). It should only be used if all
1937 arguments are passed in registers.
1939 The second, _interwork_r7_call_via_rN(), instead stores the return
1940 address at [r7, #-4]. It is the caller's responsibility to ensure
1941 that this address is valid and contains no useful data.
1943 The third, _interwork_r11_call_via_rN(), works in the same way but
1944 uses r11 instead of r7. It is useful if the caller does not really
1945 need a frame pointer. */
1952 LSYM(Lstart_arm_return):
1953 cfi_start LSYM(Lstart_arm_return) LSYM(Lend_arm_return)
1954 cfi_push 0, 0xe, -0x8, 0x8
1955 nop @ This nop is for the benefit of debuggers, so that
1956 @ backtraces will use the correct unwind information.
1958 RETLDM unwind=LSYM(Lstart_arm_return)
1959 cfi_end LSYM(Lend_arm_return)
1961 .globl _arm_return_r7
1966 .globl _arm_return_r11
1971 .macro interwork_with_frame frame, register, name, return
1974 THUMB_FUNC_START \name
1981 streq lr, [\frame, #-4]
1982 adreq lr, _arm_return_\frame
1988 .macro interwork register
1991 THUMB_FUNC_START _interwork_call_via_\register
1997 .globl LSYM(Lchange_\register)
1998 LSYM(Lchange_\register):
2000 streq lr, [sp, #-8]!
2001 adreq lr, _arm_return
2004 SIZE (_interwork_call_via_\register)
2006 interwork_with_frame r7,\register,_interwork_r7_call_via_\register
2007 interwork_with_frame r11,\register,_interwork_r11_call_via_\register
2025 /* The LR case has to be handled a little differently... */
2028 THUMB_FUNC_START _interwork_call_via_lr
2037 stmeqdb r13!, {lr, pc}
2039 adreq lr, _arm_return
2042 SIZE (_interwork_call_via_lr)
2044 #endif /* L_interwork_call_via_rX */
2045 #endif /* !__thumb2__ */
2047 /* Functions to support compact pic switch tables in thumb1 state.
2048 All these routines take an index into the table in r0. The
2049 table is at LR & ~1 (but this must be rounded up in the case
2050 of 32-bit entires). They are only permitted to clobber r12
2051 and r14 and r0 must be preserved on exit. */
2052 #ifdef L_thumb1_case_sqi
2058 THUMB_FUNC_START __gnu_thumb1_case_sqi
2068 SIZE (__gnu_thumb1_case_sqi)
2071 #ifdef L_thumb1_case_uqi
2077 THUMB_FUNC_START __gnu_thumb1_case_uqi
2087 SIZE (__gnu_thumb1_case_uqi)
2090 #ifdef L_thumb1_case_shi
2096 THUMB_FUNC_START __gnu_thumb1_case_shi
2107 SIZE (__gnu_thumb1_case_shi)
2110 #ifdef L_thumb1_case_uhi
2116 THUMB_FUNC_START __gnu_thumb1_case_uhi
2127 SIZE (__gnu_thumb1_case_uhi)
2130 #ifdef L_thumb1_case_si
2136 THUMB_FUNC_START __gnu_thumb1_case_si
2139 adds.n r1, r1, #2 /* Align to word. */
2147 mov pc, lr /* We know we were called from thumb code. */
2148 SIZE (__gnu_thumb1_case_si)
2151 #endif /* Arch supports thumb. */
2153 .macro CFI_START_FUNCTION
2158 .macro CFI_END_FUNCTION
2164 /* The condition here must match the one in gcc/config/arm/elf.h and
2165 libgcc/config/arm/t-elf. */
2166 #ifndef NOT_ISA_TARGET_32BIT
2167 #include "ieee754-df.S"
2168 #include "ieee754-sf.S"
2170 #else /* NOT_ISA_TARGET_32BIT */
2171 #include "bpabi-v6m.S"
2172 #endif /* NOT_ISA_TARGET_32BIT */
2173 #endif /* !__symbian__ */