]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgcc/config/arm/lib1funcs.S
Update copyright years.
[thirdparty/gcc.git] / libgcc / config / arm / lib1funcs.S
1 @ libgcc routines for ARM cpu.
2 @ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
3
4 /* Copyright (C) 1995-2020 Free Software Foundation, Inc.
5
6 This file is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 This file is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
15
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
19
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>. */
24
25 /* An executable stack is *not* required for these functions. */
26 #if defined(__ELF__) && defined(__linux__)
27 .section .note.GNU-stack,"",%progbits
28 .previous
29 #endif /* __ELF__ and __linux__ */
30
31 #ifdef __ARM_EABI__
32 /* Some attributes that are common to all routines in this file. */
33 /* Tag_ABI_align_needed: This code does not require 8-byte
34 alignment from the caller. */
35 /* .eabi_attribute 24, 0 -- default setting. */
36 /* Tag_ABI_align_preserved: This code preserves 8-byte
37 alignment in any callee. */
38 .eabi_attribute 25, 1
39 #endif /* __ARM_EABI__ */
40 /* ------------------------------------------------------------------------ */
41
42 /* We need to know what prefix to add to function names. */
43
44 #ifndef __USER_LABEL_PREFIX__
45 #error __USER_LABEL_PREFIX__ not defined
46 #endif
47
48 /* ANSI concatenation macros. */
49
50 #define CONCAT1(a, b) CONCAT2(a, b)
51 #define CONCAT2(a, b) a ## b
52
53 /* Use the right prefix for global labels. */
54
55 #define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
56
57 #ifdef __ELF__
58 #ifdef __thumb__
59 #define __PLT__ /* Not supported in Thumb assembler (for now). */
60 #elif defined __vxworks && !defined __PIC__
61 #define __PLT__ /* Not supported by the kernel loader. */
62 #else
63 #define __PLT__ (PLT)
64 #endif
65 #define TYPE(x) .type SYM(x),function
66 #define SIZE(x) .size SYM(x), . - SYM(x)
67 #define LSYM(x) .x
68 #else
69 #define __PLT__
70 #define TYPE(x)
71 #define SIZE(x)
72 #define LSYM(x) x
73 #endif
74
75 /* Function end macros. Variants for interworking. */
76
77 /* There are times when we might prefer Thumb1 code even if ARM code is
78 permitted, for example, the code might be smaller, or there might be
79 interworking problems with switching to ARM state if interworking is
80 disabled. */
81 #if (defined(__thumb__) \
82 && !defined(__thumb2__) \
83 && (!defined(__THUMB_INTERWORK__) \
84 || defined (__OPTIMIZE_SIZE__) \
85 || !__ARM_ARCH_ISA_ARM))
86 # define __prefer_thumb__
87 #endif
88
89 #if !__ARM_ARCH_ISA_ARM && __ARM_ARCH_ISA_THUMB == 1
90 #define NOT_ISA_TARGET_32BIT 1
91 #endif
92
93 /* How to return from a function call depends on the architecture variant. */
94
95 #if (__ARM_ARCH > 4) || defined(__ARM_ARCH_4T__)
96
97 # define RET bx lr
98 # define RETc(x) bx##x lr
99
100 /* Special precautions for interworking on armv4t. */
101 # if (__ARM_ARCH == 4)
102
103 /* Always use bx, not ldr pc. */
104 # if (defined(__thumb__) || defined(__THUMB_INTERWORK__))
105 # define __INTERWORKING__
106 # endif /* __THUMB__ || __THUMB_INTERWORK__ */
107
108 /* Include thumb stub before arm mode code. */
109 # if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
110 # define __INTERWORKING_STUBS__
111 # endif /* __thumb__ && !__THUMB_INTERWORK__ */
112
113 #endif /* __ARM_ARCH == 4 */
114
115 #else
116
117 # define RET mov pc, lr
118 # define RETc(x) mov##x pc, lr
119
120 #endif
121
122 .macro cfi_pop advance, reg, cfa_offset
123 #ifdef __ELF__
124 .pushsection .debug_frame
125 .byte 0x4 /* DW_CFA_advance_loc4 */
126 .4byte \advance
127 .byte (0xc0 | \reg) /* DW_CFA_restore */
128 .byte 0xe /* DW_CFA_def_cfa_offset */
129 .uleb128 \cfa_offset
130 .popsection
131 #endif
132 .endm
133 .macro cfi_push advance, reg, offset, cfa_offset
134 #ifdef __ELF__
135 .pushsection .debug_frame
136 .byte 0x4 /* DW_CFA_advance_loc4 */
137 .4byte \advance
138 .byte (0x80 | \reg) /* DW_CFA_offset */
139 .uleb128 (\offset / -4)
140 .byte 0xe /* DW_CFA_def_cfa_offset */
141 .uleb128 \cfa_offset
142 .popsection
143 #endif
144 .endm
145 .macro cfi_start start_label, end_label
146 #ifdef __ELF__
147 .pushsection .debug_frame
148 LSYM(Lstart_frame):
149 .4byte LSYM(Lend_cie) - LSYM(Lstart_cie) @ Length of CIE
150 LSYM(Lstart_cie):
151 .4byte 0xffffffff @ CIE Identifier Tag
152 .byte 0x1 @ CIE Version
153 .ascii "\0" @ CIE Augmentation
154 .uleb128 0x1 @ CIE Code Alignment Factor
155 .sleb128 -4 @ CIE Data Alignment Factor
156 .byte 0xe @ CIE RA Column
157 .byte 0xc @ DW_CFA_def_cfa
158 .uleb128 0xd
159 .uleb128 0x0
160
161 .align 2
162 LSYM(Lend_cie):
163 .4byte LSYM(Lend_fde)-LSYM(Lstart_fde) @ FDE Length
164 LSYM(Lstart_fde):
165 .4byte LSYM(Lstart_frame) @ FDE CIE offset
166 .4byte \start_label @ FDE initial location
167 .4byte \end_label-\start_label @ FDE address range
168 .popsection
169 #endif
170 .endm
171 .macro cfi_end end_label
172 #ifdef __ELF__
173 .pushsection .debug_frame
174 .align 2
175 LSYM(Lend_fde):
176 .popsection
177 \end_label:
178 #endif
179 .endm
180
181 /* Don't pass dirn, it's there just to get token pasting right. */
182
183 .macro RETLDM regs=, cond=, unwind=, dirn=ia
184 #if defined (__INTERWORKING__)
185 .ifc "\regs",""
186 ldr\cond lr, [sp], #8
187 .else
188 # if defined(__thumb2__)
189 pop\cond {\regs, lr}
190 # else
191 ldm\cond\dirn sp!, {\regs, lr}
192 # endif
193 .endif
194 .ifnc "\unwind", ""
195 /* Mark LR as restored. */
196 97: cfi_pop 97b - \unwind, 0xe, 0x0
197 .endif
198 bx\cond lr
199 #else
200 /* Caller is responsible for providing IT instruction. */
201 .ifc "\regs",""
202 ldr\cond pc, [sp], #8
203 .else
204 # if defined(__thumb2__)
205 pop\cond {\regs, pc}
206 # else
207 ldm\cond\dirn sp!, {\regs, pc}
208 # endif
209 .endif
210 #endif
211 .endm
212
213 /* The Unified assembly syntax allows the same code to be assembled for both
214 ARM and Thumb-2. However this is only supported by recent gas, so define
215 a set of macros to allow ARM code on older assemblers. */
216 #if defined(__thumb2__)
217 .macro do_it cond, suffix=""
218 it\suffix \cond
219 .endm
220 .macro shift1 op, arg0, arg1, arg2
221 \op \arg0, \arg1, \arg2
222 .endm
223 #define do_push push
224 #define do_pop pop
225 #define COND(op1, op2, cond) op1 ## op2 ## cond
226 /* Perform an arithmetic operation with a variable shift operand. This
227 requires two instructions and a scratch register on Thumb-2. */
228 .macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
229 \shiftop \tmp, \src2, \shiftreg
230 \name \dest, \src1, \tmp
231 .endm
232 #else
233 .macro do_it cond, suffix=""
234 .endm
235 .macro shift1 op, arg0, arg1, arg2
236 mov \arg0, \arg1, \op \arg2
237 .endm
238 #define do_push stmfd sp!,
239 #define do_pop ldmfd sp!,
240 #define COND(op1, op2, cond) op1 ## cond ## op2
241 .macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
242 \name \dest, \src1, \src2, \shiftop \shiftreg
243 .endm
244 #endif
245
246 #ifdef __ARM_EABI__
247 .macro ARM_LDIV0 name signed
248 cmp r0, #0
249 .ifc \signed, unsigned
250 movne r0, #0xffffffff
251 .else
252 movgt r0, #0x7fffffff
253 movlt r0, #0x80000000
254 .endif
255 b SYM (__aeabi_idiv0) __PLT__
256 .endm
257 #else
258 .macro ARM_LDIV0 name signed
259 str lr, [sp, #-8]!
260 98: cfi_push 98b - __\name, 0xe, -0x8, 0x8
261 bl SYM (__div0) __PLT__
262 mov r0, #0 @ About as wrong as it could be.
263 RETLDM unwind=98b
264 .endm
265 #endif
266
267
268 #ifdef __ARM_EABI__
269 .macro THUMB_LDIV0 name signed
270 #ifdef NOT_ISA_TARGET_32BIT
271
272 push {r0, lr}
273 mov r0, #0
274 bl SYM(__aeabi_idiv0)
275 @ We know we are not on armv4t, so pop pc is safe.
276 pop {r1, pc}
277
278 #elif defined(__thumb2__)
279 .syntax unified
280 .ifc \signed, unsigned
281 cbz r0, 1f
282 mov r0, #0xffffffff
283 1:
284 .else
285 cmp r0, #0
286 do_it gt
287 movgt r0, #0x7fffffff
288 do_it lt
289 movlt r0, #0x80000000
290 .endif
291 b.w SYM(__aeabi_idiv0) __PLT__
292 #else
293 .align 2
294 bx pc
295 nop
296 .arm
297 cmp r0, #0
298 .ifc \signed, unsigned
299 movne r0, #0xffffffff
300 .else
301 movgt r0, #0x7fffffff
302 movlt r0, #0x80000000
303 .endif
304 b SYM(__aeabi_idiv0) __PLT__
305 .thumb
306 #endif
307 .endm
308 #else
309 .macro THUMB_LDIV0 name signed
310 push { r1, lr }
311 98: cfi_push 98b - __\name, 0xe, -0x4, 0x8
312 bl SYM (__div0)
313 mov r0, #0 @ About as wrong as it could be.
314 #if defined (__INTERWORKING__)
315 pop { r1, r2 }
316 bx r2
317 #else
318 pop { r1, pc }
319 #endif
320 .endm
321 #endif
322
323 .macro FUNC_END name
324 SIZE (__\name)
325 .endm
326
327 .macro DIV_FUNC_END name signed
328 cfi_start __\name, LSYM(Lend_div0)
329 LSYM(Ldiv0):
330 #ifdef __thumb__
331 THUMB_LDIV0 \name \signed
332 #else
333 ARM_LDIV0 \name \signed
334 #endif
335 cfi_end LSYM(Lend_div0)
336 FUNC_END \name
337 .endm
338
339 .macro THUMB_FUNC_START name
340 .globl SYM (\name)
341 TYPE (\name)
342 .thumb_func
343 SYM (\name):
344 .endm
345
346 /* Function start macros. Variants for ARM and Thumb. */
347
348 #ifdef __thumb__
349 #define THUMB_FUNC .thumb_func
350 #define THUMB_CODE .force_thumb
351 # if defined(__thumb2__)
352 #define THUMB_SYNTAX .syntax divided
353 # else
354 #define THUMB_SYNTAX
355 # endif
356 #else
357 #define THUMB_FUNC
358 #define THUMB_CODE
359 #define THUMB_SYNTAX
360 #endif
361
362 .macro FUNC_START name
363 .text
364 .globl SYM (__\name)
365 TYPE (__\name)
366 .align 0
367 THUMB_CODE
368 THUMB_FUNC
369 THUMB_SYNTAX
370 SYM (__\name):
371 .endm
372
373 .macro ARM_SYM_START name
374 TYPE (\name)
375 .align 0
376 SYM (\name):
377 .endm
378
379 .macro SYM_END name
380 SIZE (\name)
381 .endm
382
383 /* Special function that will always be coded in ARM assembly, even if
384 in Thumb-only compilation. */
385
386 #if defined(__thumb2__)
387
388 /* For Thumb-2 we build everything in thumb mode. */
389 .macro ARM_FUNC_START name
390 FUNC_START \name
391 .syntax unified
392 .endm
393 #define EQUIV .thumb_set
394 .macro ARM_CALL name
395 bl __\name
396 .endm
397
398 #elif defined(__INTERWORKING_STUBS__)
399
400 .macro ARM_FUNC_START name
401 FUNC_START \name
402 bx pc
403 nop
404 .arm
405 /* A hook to tell gdb that we've switched to ARM mode. Also used to call
406 directly from other local arm routines. */
407 _L__\name:
408 .endm
409 #define EQUIV .thumb_set
410 /* Branch directly to a function declared with ARM_FUNC_START.
411 Must be called in arm mode. */
412 .macro ARM_CALL name
413 bl _L__\name
414 .endm
415
416 #else /* !(__INTERWORKING_STUBS__ || __thumb2__) */
417
418 #ifdef NOT_ISA_TARGET_32BIT
419 #define EQUIV .thumb_set
420 #else
421 .macro ARM_FUNC_START name
422 .text
423 .globl SYM (__\name)
424 TYPE (__\name)
425 .align 0
426 .arm
427 SYM (__\name):
428 .endm
429 #define EQUIV .set
430 .macro ARM_CALL name
431 bl __\name
432 .endm
433 #endif
434
435 #endif
436
437 .macro FUNC_ALIAS new old
438 .globl SYM (__\new)
439 #if defined (__thumb__)
440 .thumb_set SYM (__\new), SYM (__\old)
441 #else
442 .set SYM (__\new), SYM (__\old)
443 #endif
444 .endm
445
446 #ifndef NOT_ISA_TARGET_32BIT
447 .macro ARM_FUNC_ALIAS new old
448 .globl SYM (__\new)
449 EQUIV SYM (__\new), SYM (__\old)
450 #if defined(__INTERWORKING_STUBS__)
451 .set SYM (_L__\new), SYM (_L__\old)
452 #endif
453 .endm
454 #endif
455
456 #ifdef __ARMEB__
457 #define xxh r0
458 #define xxl r1
459 #define yyh r2
460 #define yyl r3
461 #else
462 #define xxh r1
463 #define xxl r0
464 #define yyh r3
465 #define yyl r2
466 #endif
467
468 #ifdef __ARM_EABI__
469 .macro WEAK name
470 .weak SYM (__\name)
471 .endm
472 #endif
473
474 #ifdef __thumb__
475 /* Register aliases. */
476
477 work .req r4 @ XXXX is this safe ?
478 dividend .req r0
479 divisor .req r1
480 overdone .req r2
481 result .req r2
482 curbit .req r3
483 #endif
484 #if 0
485 ip .req r12
486 sp .req r13
487 lr .req r14
488 pc .req r15
489 #endif
490
491 /* ------------------------------------------------------------------------ */
492 /* Bodies of the division and modulo routines. */
493 /* ------------------------------------------------------------------------ */
494 .macro ARM_DIV_BODY dividend, divisor, result, curbit
495
496 #if defined (__ARM_FEATURE_CLZ) && ! defined (__OPTIMIZE_SIZE__)
497
498 #if defined (__thumb2__)
499 clz \curbit, \dividend
500 clz \result, \divisor
501 sub \curbit, \result, \curbit
502 rsb \curbit, \curbit, #31
503 adr \result, 1f
504 add \curbit, \result, \curbit, lsl #4
505 mov \result, #0
506 mov pc, \curbit
507 .p2align 3
508 1:
509 .set shift, 32
510 .rept 32
511 .set shift, shift - 1
512 cmp.w \dividend, \divisor, lsl #shift
513 nop.n
514 adc.w \result, \result, \result
515 it cs
516 subcs.w \dividend, \dividend, \divisor, lsl #shift
517 .endr
518 #else
519 clz \curbit, \dividend
520 clz \result, \divisor
521 sub \curbit, \result, \curbit
522 rsbs \curbit, \curbit, #31
523 addne \curbit, \curbit, \curbit, lsl #1
524 mov \result, #0
525 addne pc, pc, \curbit, lsl #2
526 nop
527 .set shift, 32
528 .rept 32
529 .set shift, shift - 1
530 cmp \dividend, \divisor, lsl #shift
531 adc \result, \result, \result
532 subcs \dividend, \dividend, \divisor, lsl #shift
533 .endr
534 #endif
535
536 #else /* !defined (__ARM_FEATURE_CLZ) || defined (__OPTIMIZE_SIZE__) */
537 #if defined (__ARM_FEATURE_CLZ)
538
539 clz \curbit, \divisor
540 clz \result, \dividend
541 sub \result, \curbit, \result
542 mov \curbit, #1
543 mov \divisor, \divisor, lsl \result
544 mov \curbit, \curbit, lsl \result
545 mov \result, #0
546
547 #else /* !defined (__ARM_FEATURE_CLZ) */
548
549 @ Initially shift the divisor left 3 bits if possible,
550 @ set curbit accordingly. This allows for curbit to be located
551 @ at the left end of each 4-bit nibbles in the division loop
552 @ to save one loop in most cases.
553 tst \divisor, #0xe0000000
554 moveq \divisor, \divisor, lsl #3
555 moveq \curbit, #8
556 movne \curbit, #1
557
558 @ Unless the divisor is very big, shift it up in multiples of
559 @ four bits, since this is the amount of unwinding in the main
560 @ division loop. Continue shifting until the divisor is
561 @ larger than the dividend.
562 1: cmp \divisor, #0x10000000
563 cmplo \divisor, \dividend
564 movlo \divisor, \divisor, lsl #4
565 movlo \curbit, \curbit, lsl #4
566 blo 1b
567
568 @ For very big divisors, we must shift it a bit at a time, or
569 @ we will be in danger of overflowing.
570 1: cmp \divisor, #0x80000000
571 cmplo \divisor, \dividend
572 movlo \divisor, \divisor, lsl #1
573 movlo \curbit, \curbit, lsl #1
574 blo 1b
575
576 mov \result, #0
577
578 #endif /* !defined (__ARM_FEATURE_CLZ) */
579
580 @ Division loop
581 1: cmp \dividend, \divisor
582 do_it hs, t
583 subhs \dividend, \dividend, \divisor
584 orrhs \result, \result, \curbit
585 cmp \dividend, \divisor, lsr #1
586 do_it hs, t
587 subhs \dividend, \dividend, \divisor, lsr #1
588 orrhs \result, \result, \curbit, lsr #1
589 cmp \dividend, \divisor, lsr #2
590 do_it hs, t
591 subhs \dividend, \dividend, \divisor, lsr #2
592 orrhs \result, \result, \curbit, lsr #2
593 cmp \dividend, \divisor, lsr #3
594 do_it hs, t
595 subhs \dividend, \dividend, \divisor, lsr #3
596 orrhs \result, \result, \curbit, lsr #3
597 cmp \dividend, #0 @ Early termination?
598 do_it ne, t
599 movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
600 movne \divisor, \divisor, lsr #4
601 bne 1b
602
603 #endif /* !defined (__ARM_FEATURE_CLZ) || defined (__OPTIMIZE_SIZE__) */
604
605 .endm
606 /* ------------------------------------------------------------------------ */
607 .macro ARM_DIV2_ORDER divisor, order
608
609 #if defined (__ARM_FEATURE_CLZ)
610
611 clz \order, \divisor
612 rsb \order, \order, #31
613
614 #else
615
616 cmp \divisor, #(1 << 16)
617 movhs \divisor, \divisor, lsr #16
618 movhs \order, #16
619 movlo \order, #0
620
621 cmp \divisor, #(1 << 8)
622 movhs \divisor, \divisor, lsr #8
623 addhs \order, \order, #8
624
625 cmp \divisor, #(1 << 4)
626 movhs \divisor, \divisor, lsr #4
627 addhs \order, \order, #4
628
629 cmp \divisor, #(1 << 2)
630 addhi \order, \order, #3
631 addls \order, \order, \divisor, lsr #1
632
633 #endif
634
635 .endm
636 /* ------------------------------------------------------------------------ */
637 .macro ARM_MOD_BODY dividend, divisor, order, spare
638
639 #if defined(__ARM_FEATURE_CLZ) && ! defined (__OPTIMIZE_SIZE__)
640
641 clz \order, \divisor
642 clz \spare, \dividend
643 sub \order, \order, \spare
644 rsbs \order, \order, #31
645 addne pc, pc, \order, lsl #3
646 nop
647 .set shift, 32
648 .rept 32
649 .set shift, shift - 1
650 cmp \dividend, \divisor, lsl #shift
651 subcs \dividend, \dividend, \divisor, lsl #shift
652 .endr
653
654 #else /* !defined (__ARM_FEATURE_CLZ) || defined (__OPTIMIZE_SIZE__) */
655 #if defined (__ARM_FEATURE_CLZ)
656
657 clz \order, \divisor
658 clz \spare, \dividend
659 sub \order, \order, \spare
660 mov \divisor, \divisor, lsl \order
661
662 #else /* !defined (__ARM_FEATURE_CLZ) */
663
664 mov \order, #0
665
666 @ Unless the divisor is very big, shift it up in multiples of
667 @ four bits, since this is the amount of unwinding in the main
668 @ division loop. Continue shifting until the divisor is
669 @ larger than the dividend.
670 1: cmp \divisor, #0x10000000
671 cmplo \divisor, \dividend
672 movlo \divisor, \divisor, lsl #4
673 addlo \order, \order, #4
674 blo 1b
675
676 @ For very big divisors, we must shift it a bit at a time, or
677 @ we will be in danger of overflowing.
678 1: cmp \divisor, #0x80000000
679 cmplo \divisor, \dividend
680 movlo \divisor, \divisor, lsl #1
681 addlo \order, \order, #1
682 blo 1b
683
684 #endif /* !defined (__ARM_FEATURE_CLZ) */
685
686 @ Perform all needed substractions to keep only the reminder.
687 @ Do comparisons in batch of 4 first.
688 subs \order, \order, #3 @ yes, 3 is intended here
689 blt 2f
690
691 1: cmp \dividend, \divisor
692 subhs \dividend, \dividend, \divisor
693 cmp \dividend, \divisor, lsr #1
694 subhs \dividend, \dividend, \divisor, lsr #1
695 cmp \dividend, \divisor, lsr #2
696 subhs \dividend, \dividend, \divisor, lsr #2
697 cmp \dividend, \divisor, lsr #3
698 subhs \dividend, \dividend, \divisor, lsr #3
699 cmp \dividend, #1
700 mov \divisor, \divisor, lsr #4
701 subges \order, \order, #4
702 bge 1b
703
704 tst \order, #3
705 teqne \dividend, #0
706 beq 5f
707
708 @ Either 1, 2 or 3 comparison/substractions are left.
709 2: cmn \order, #2
710 blt 4f
711 beq 3f
712 cmp \dividend, \divisor
713 subhs \dividend, \dividend, \divisor
714 mov \divisor, \divisor, lsr #1
715 3: cmp \dividend, \divisor
716 subhs \dividend, \dividend, \divisor
717 mov \divisor, \divisor, lsr #1
718 4: cmp \dividend, \divisor
719 subhs \dividend, \dividend, \divisor
720 5:
721
722 #endif /* !defined (__ARM_FEATURE_CLZ) || defined (__OPTIMIZE_SIZE__) */
723
724 .endm
725 /* ------------------------------------------------------------------------ */
726 .macro THUMB_DIV_MOD_BODY modulo
727 @ Load the constant 0x10000000 into our work register.
728 mov work, #1
729 lsl work, #28
730 LSYM(Loop1):
731 @ Unless the divisor is very big, shift it up in multiples of
732 @ four bits, since this is the amount of unwinding in the main
733 @ division loop. Continue shifting until the divisor is
734 @ larger than the dividend.
735 cmp divisor, work
736 bhs LSYM(Lbignum)
737 cmp divisor, dividend
738 bhs LSYM(Lbignum)
739 lsl divisor, #4
740 lsl curbit, #4
741 b LSYM(Loop1)
742 LSYM(Lbignum):
743 @ Set work to 0x80000000
744 lsl work, #3
745 LSYM(Loop2):
746 @ For very big divisors, we must shift it a bit at a time, or
747 @ we will be in danger of overflowing.
748 cmp divisor, work
749 bhs LSYM(Loop3)
750 cmp divisor, dividend
751 bhs LSYM(Loop3)
752 lsl divisor, #1
753 lsl curbit, #1
754 b LSYM(Loop2)
755 LSYM(Loop3):
756 @ Test for possible subtractions ...
757 .if \modulo
758 @ ... On the final pass, this may subtract too much from the dividend,
759 @ so keep track of which subtractions are done, we can fix them up
760 @ afterwards.
761 mov overdone, #0
762 cmp dividend, divisor
763 blo LSYM(Lover1)
764 sub dividend, dividend, divisor
765 LSYM(Lover1):
766 lsr work, divisor, #1
767 cmp dividend, work
768 blo LSYM(Lover2)
769 sub dividend, dividend, work
770 mov ip, curbit
771 mov work, #1
772 ror curbit, work
773 orr overdone, curbit
774 mov curbit, ip
775 LSYM(Lover2):
776 lsr work, divisor, #2
777 cmp dividend, work
778 blo LSYM(Lover3)
779 sub dividend, dividend, work
780 mov ip, curbit
781 mov work, #2
782 ror curbit, work
783 orr overdone, curbit
784 mov curbit, ip
785 LSYM(Lover3):
786 lsr work, divisor, #3
787 cmp dividend, work
788 blo LSYM(Lover4)
789 sub dividend, dividend, work
790 mov ip, curbit
791 mov work, #3
792 ror curbit, work
793 orr overdone, curbit
794 mov curbit, ip
795 LSYM(Lover4):
796 mov ip, curbit
797 .else
798 @ ... and note which bits are done in the result. On the final pass,
799 @ this may subtract too much from the dividend, but the result will be ok,
800 @ since the "bit" will have been shifted out at the bottom.
801 cmp dividend, divisor
802 blo LSYM(Lover1)
803 sub dividend, dividend, divisor
804 orr result, result, curbit
805 LSYM(Lover1):
806 lsr work, divisor, #1
807 cmp dividend, work
808 blo LSYM(Lover2)
809 sub dividend, dividend, work
810 lsr work, curbit, #1
811 orr result, work
812 LSYM(Lover2):
813 lsr work, divisor, #2
814 cmp dividend, work
815 blo LSYM(Lover3)
816 sub dividend, dividend, work
817 lsr work, curbit, #2
818 orr result, work
819 LSYM(Lover3):
820 lsr work, divisor, #3
821 cmp dividend, work
822 blo LSYM(Lover4)
823 sub dividend, dividend, work
824 lsr work, curbit, #3
825 orr result, work
826 LSYM(Lover4):
827 .endif
828
829 cmp dividend, #0 @ Early termination?
830 beq LSYM(Lover5)
831 lsr curbit, #4 @ No, any more bits to do?
832 beq LSYM(Lover5)
833 lsr divisor, #4
834 b LSYM(Loop3)
835 LSYM(Lover5):
836 .if \modulo
837 @ Any subtractions that we should not have done will be recorded in
838 @ the top three bits of "overdone". Exactly which were not needed
839 @ are governed by the position of the bit, stored in ip.
840 mov work, #0xe
841 lsl work, #28
842 and overdone, work
843 beq LSYM(Lgot_result)
844
845 @ If we terminated early, because dividend became zero, then the
846 @ bit in ip will not be in the bottom nibble, and we should not
847 @ perform the additions below. We must test for this though
848 @ (rather relying upon the TSTs to prevent the additions) since
849 @ the bit in ip could be in the top two bits which might then match
850 @ with one of the smaller RORs.
851 mov curbit, ip
852 mov work, #0x7
853 tst curbit, work
854 beq LSYM(Lgot_result)
855
856 mov curbit, ip
857 mov work, #3
858 ror curbit, work
859 tst overdone, curbit
860 beq LSYM(Lover6)
861 lsr work, divisor, #3
862 add dividend, work
863 LSYM(Lover6):
864 mov curbit, ip
865 mov work, #2
866 ror curbit, work
867 tst overdone, curbit
868 beq LSYM(Lover7)
869 lsr work, divisor, #2
870 add dividend, work
871 LSYM(Lover7):
872 mov curbit, ip
873 mov work, #1
874 ror curbit, work
875 tst overdone, curbit
876 beq LSYM(Lgot_result)
877 lsr work, divisor, #1
878 add dividend, work
879 .endif
880 LSYM(Lgot_result):
881 .endm
882
883 /* If performance is preferred, the following functions are provided. */
884 #if defined(__prefer_thumb__) && !defined(__OPTIMIZE_SIZE__)
885
886 /* Branch to div(n), and jump to label if curbit is lo than divisior. */
887 .macro BranchToDiv n, label
888 lsr curbit, dividend, \n
889 cmp curbit, divisor
890 blo \label
891 .endm
892
893 /* Body of div(n). Shift the divisor in n bits and compare the divisor
894 and dividend. Update the dividend as the substruction result. */
895 .macro DoDiv n
896 lsr curbit, dividend, \n
897 cmp curbit, divisor
898 bcc 1f
899 lsl curbit, divisor, \n
900 sub dividend, dividend, curbit
901
902 1: adc result, result
903 .endm
904
905 /* The body of division with positive divisor. Unless the divisor is very
906 big, shift it up in multiples of four bits, since this is the amount of
907 unwinding in the main division loop. Continue shifting until the divisor
908 is larger than the dividend. */
909 .macro THUMB1_Div_Positive
910 mov result, #0
911 BranchToDiv #1, LSYM(Lthumb1_div1)
912 BranchToDiv #4, LSYM(Lthumb1_div4)
913 BranchToDiv #8, LSYM(Lthumb1_div8)
914 BranchToDiv #12, LSYM(Lthumb1_div12)
915 BranchToDiv #16, LSYM(Lthumb1_div16)
916 LSYM(Lthumb1_div_large_positive):
917 mov result, #0xff
918 lsl divisor, divisor, #8
919 rev result, result
920 lsr curbit, dividend, #16
921 cmp curbit, divisor
922 blo 1f
923 asr result, #8
924 lsl divisor, divisor, #8
925 beq LSYM(Ldivbyzero_waypoint)
926
927 1: lsr curbit, dividend, #12
928 cmp curbit, divisor
929 blo LSYM(Lthumb1_div12)
930 b LSYM(Lthumb1_div16)
931 LSYM(Lthumb1_div_loop):
932 lsr divisor, divisor, #8
933 LSYM(Lthumb1_div16):
934 Dodiv #15
935 Dodiv #14
936 Dodiv #13
937 Dodiv #12
938 LSYM(Lthumb1_div12):
939 Dodiv #11
940 Dodiv #10
941 Dodiv #9
942 Dodiv #8
943 bcs LSYM(Lthumb1_div_loop)
944 LSYM(Lthumb1_div8):
945 Dodiv #7
946 Dodiv #6
947 Dodiv #5
948 LSYM(Lthumb1_div5):
949 Dodiv #4
950 LSYM(Lthumb1_div4):
951 Dodiv #3
952 LSYM(Lthumb1_div3):
953 Dodiv #2
954 LSYM(Lthumb1_div2):
955 Dodiv #1
956 LSYM(Lthumb1_div1):
957 sub divisor, dividend, divisor
958 bcs 1f
959 cpy divisor, dividend
960
961 1: adc result, result
962 cpy dividend, result
963 RET
964
965 LSYM(Ldivbyzero_waypoint):
966 b LSYM(Ldiv0)
967 .endm
968
969 /* The body of division with negative divisor. Similar with
970 THUMB1_Div_Positive except that the shift steps are in multiples
971 of six bits. */
972 .macro THUMB1_Div_Negative
973 lsr result, divisor, #31
974 beq 1f
975 neg divisor, divisor
976
977 1: asr curbit, dividend, #32
978 bcc 2f
979 neg dividend, dividend
980
981 2: eor curbit, result
982 mov result, #0
983 cpy ip, curbit
984 BranchToDiv #4, LSYM(Lthumb1_div_negative4)
985 BranchToDiv #8, LSYM(Lthumb1_div_negative8)
986 LSYM(Lthumb1_div_large):
987 mov result, #0xfc
988 lsl divisor, divisor, #6
989 rev result, result
990 lsr curbit, dividend, #8
991 cmp curbit, divisor
992 blo LSYM(Lthumb1_div_negative8)
993
994 lsl divisor, divisor, #6
995 asr result, result, #6
996 cmp curbit, divisor
997 blo LSYM(Lthumb1_div_negative8)
998
999 lsl divisor, divisor, #6
1000 asr result, result, #6
1001 cmp curbit, divisor
1002 blo LSYM(Lthumb1_div_negative8)
1003
1004 lsl divisor, divisor, #6
1005 beq LSYM(Ldivbyzero_negative)
1006 asr result, result, #6
1007 b LSYM(Lthumb1_div_negative8)
1008 LSYM(Lthumb1_div_negative_loop):
1009 lsr divisor, divisor, #6
1010 LSYM(Lthumb1_div_negative8):
1011 DoDiv #7
1012 DoDiv #6
1013 DoDiv #5
1014 DoDiv #4
1015 LSYM(Lthumb1_div_negative4):
1016 DoDiv #3
1017 DoDiv #2
1018 bcs LSYM(Lthumb1_div_negative_loop)
1019 DoDiv #1
1020 sub divisor, dividend, divisor
1021 bcs 1f
1022 cpy divisor, dividend
1023
1024 1: cpy curbit, ip
1025 adc result, result
1026 asr curbit, curbit, #1
1027 cpy dividend, result
1028 bcc 2f
1029 neg dividend, dividend
1030 cmp curbit, #0
1031
1032 2: bpl 3f
1033 neg divisor, divisor
1034
1035 3: RET
1036
1037 LSYM(Ldivbyzero_negative):
1038 cpy curbit, ip
1039 asr curbit, curbit, #1
1040 bcc LSYM(Ldiv0)
1041 neg dividend, dividend
1042 .endm
1043 #endif /* ARM Thumb version. */
1044
1045 /* ------------------------------------------------------------------------ */
1046 /* Start of the Real Functions */
1047 /* ------------------------------------------------------------------------ */
1048 #ifdef L_udivsi3
1049
1050 #if defined(__prefer_thumb__)
1051
1052 FUNC_START udivsi3
1053 FUNC_ALIAS aeabi_uidiv udivsi3
1054 #if defined(__OPTIMIZE_SIZE__)
1055
1056 cmp divisor, #0
1057 beq LSYM(Ldiv0)
1058 LSYM(udivsi3_skip_div0_test):
1059 mov curbit, #1
1060 mov result, #0
1061
1062 push { work }
1063 cmp dividend, divisor
1064 blo LSYM(Lgot_result)
1065
1066 THUMB_DIV_MOD_BODY 0
1067
1068 mov r0, result
1069 pop { work }
1070 RET
1071
1072 /* Implementation of aeabi_uidiv for ARMv6m. This version is only
1073 used in ARMv6-M when we need an efficient implementation. */
1074 #else
1075 LSYM(udivsi3_skip_div0_test):
1076 THUMB1_Div_Positive
1077
1078 #endif /* __OPTIMIZE_SIZE__ */
1079
1080 #elif defined(__ARM_ARCH_EXT_IDIV__)
1081
1082 ARM_FUNC_START udivsi3
1083 ARM_FUNC_ALIAS aeabi_uidiv udivsi3
1084
1085 cmp r1, #0
1086 beq LSYM(Ldiv0)
1087
1088 udiv r0, r0, r1
1089 RET
1090
1091 #else /* ARM version/Thumb-2. */
1092
1093 ARM_FUNC_START udivsi3
1094 ARM_FUNC_ALIAS aeabi_uidiv udivsi3
1095
1096 /* Note: if called via udivsi3_skip_div0_test, this will unnecessarily
1097 check for division-by-zero a second time. */
1098 LSYM(udivsi3_skip_div0_test):
1099 subs r2, r1, #1
1100 do_it eq
1101 RETc(eq)
1102 bcc LSYM(Ldiv0)
1103 cmp r0, r1
1104 bls 11f
1105 tst r1, r2
1106 beq 12f
1107
1108 ARM_DIV_BODY r0, r1, r2, r3
1109
1110 mov r0, r2
1111 RET
1112
1113 11: do_it eq, e
1114 moveq r0, #1
1115 movne r0, #0
1116 RET
1117
1118 12: ARM_DIV2_ORDER r1, r2
1119
1120 mov r0, r0, lsr r2
1121 RET
1122
1123 #endif /* ARM version */
1124
1125 DIV_FUNC_END udivsi3 unsigned
1126
1127 #if defined(__prefer_thumb__)
1128 FUNC_START aeabi_uidivmod
1129 cmp r1, #0
1130 beq LSYM(Ldiv0)
1131 # if defined(__OPTIMIZE_SIZE__)
1132 push {r0, r1, lr}
1133 bl LSYM(udivsi3_skip_div0_test)
1134 POP {r1, r2, r3}
1135 mul r2, r0
1136 sub r1, r1, r2
1137 bx r3
1138 # else
1139 /* Both the quotient and remainder are calculated simultaneously
1140 in THUMB1_Div_Positive. There is no need to calculate the
1141 remainder again here. */
1142 b LSYM(udivsi3_skip_div0_test)
1143 RET
1144 # endif /* __OPTIMIZE_SIZE__ */
1145
1146 #elif defined(__ARM_ARCH_EXT_IDIV__)
1147 ARM_FUNC_START aeabi_uidivmod
1148 cmp r1, #0
1149 beq LSYM(Ldiv0)
1150 mov r2, r0
1151 udiv r0, r0, r1
1152 mls r1, r0, r1, r2
1153 RET
1154 #else
1155 ARM_FUNC_START aeabi_uidivmod
1156 cmp r1, #0
1157 beq LSYM(Ldiv0)
1158 stmfd sp!, { r0, r1, lr }
1159 bl LSYM(udivsi3_skip_div0_test)
1160 ldmfd sp!, { r1, r2, lr }
1161 mul r3, r2, r0
1162 sub r1, r1, r3
1163 RET
1164 #endif
1165 FUNC_END aeabi_uidivmod
1166
1167 #endif /* L_udivsi3 */
1168 /* ------------------------------------------------------------------------ */
1169 #ifdef L_umodsi3
1170
1171 #if defined(__ARM_ARCH_EXT_IDIV__) && __ARM_ARCH_ISA_THUMB != 1
1172
1173 ARM_FUNC_START umodsi3
1174
1175 cmp r1, #0
1176 beq LSYM(Ldiv0)
1177 udiv r2, r0, r1
1178 mls r0, r1, r2, r0
1179 RET
1180
1181 #elif defined(__thumb__)
1182
1183 FUNC_START umodsi3
1184
1185 cmp divisor, #0
1186 beq LSYM(Ldiv0)
1187 mov curbit, #1
1188 cmp dividend, divisor
1189 bhs LSYM(Lover10)
1190 RET
1191
1192 LSYM(Lover10):
1193 push { work }
1194
1195 THUMB_DIV_MOD_BODY 1
1196
1197 pop { work }
1198 RET
1199
1200 #else /* ARM version. */
1201
1202 FUNC_START umodsi3
1203
1204 subs r2, r1, #1 @ compare divisor with 1
1205 bcc LSYM(Ldiv0)
1206 cmpne r0, r1 @ compare dividend with divisor
1207 moveq r0, #0
1208 tsthi r1, r2 @ see if divisor is power of 2
1209 andeq r0, r0, r2
1210 RETc(ls)
1211
1212 ARM_MOD_BODY r0, r1, r2, r3
1213
1214 RET
1215
1216 #endif /* ARM version. */
1217
1218 DIV_FUNC_END umodsi3 unsigned
1219
1220 #endif /* L_umodsi3 */
1221 /* ------------------------------------------------------------------------ */
1222 #ifdef L_divsi3
1223
1224 #if defined(__prefer_thumb__)
1225
1226 FUNC_START divsi3
1227 FUNC_ALIAS aeabi_idiv divsi3
1228 #if defined(__OPTIMIZE_SIZE__)
1229
1230 cmp divisor, #0
1231 beq LSYM(Ldiv0)
1232 LSYM(divsi3_skip_div0_test):
1233 push { work }
1234 mov work, dividend
1235 eor work, divisor @ Save the sign of the result.
1236 mov ip, work
1237 mov curbit, #1
1238 mov result, #0
1239 cmp divisor, #0
1240 bpl LSYM(Lover10)
1241 neg divisor, divisor @ Loops below use unsigned.
1242 LSYM(Lover10):
1243 cmp dividend, #0
1244 bpl LSYM(Lover11)
1245 neg dividend, dividend
1246 LSYM(Lover11):
1247 cmp dividend, divisor
1248 blo LSYM(Lgot_result)
1249
1250 THUMB_DIV_MOD_BODY 0
1251
1252 mov r0, result
1253 mov work, ip
1254 cmp work, #0
1255 bpl LSYM(Lover12)
1256 neg r0, r0
1257 LSYM(Lover12):
1258 pop { work }
1259 RET
1260
1261 /* Implementation of aeabi_idiv for ARMv6m. This version is only
1262 used in ARMv6-M when we need an efficient implementation. */
1263 #else
1264 LSYM(divsi3_skip_div0_test):
1265 cpy curbit, dividend
1266 orr curbit, divisor
1267 bmi LSYM(Lthumb1_div_negative)
1268
1269 LSYM(Lthumb1_div_positive):
1270 THUMB1_Div_Positive
1271
1272 LSYM(Lthumb1_div_negative):
1273 THUMB1_Div_Negative
1274
1275 #endif /* __OPTIMIZE_SIZE__ */
1276
1277 #elif defined(__ARM_ARCH_EXT_IDIV__)
1278
1279 ARM_FUNC_START divsi3
1280 ARM_FUNC_ALIAS aeabi_idiv divsi3
1281
1282 cmp r1, #0
1283 beq LSYM(Ldiv0)
1284 sdiv r0, r0, r1
1285 RET
1286
1287 #else /* ARM/Thumb-2 version. */
1288
1289 ARM_FUNC_START divsi3
1290 ARM_FUNC_ALIAS aeabi_idiv divsi3
1291
1292 cmp r1, #0
1293 beq LSYM(Ldiv0)
1294 LSYM(divsi3_skip_div0_test):
1295 eor ip, r0, r1 @ save the sign of the result.
1296 do_it mi
1297 rsbmi r1, r1, #0 @ loops below use unsigned.
1298 subs r2, r1, #1 @ division by 1 or -1 ?
1299 beq 10f
1300 movs r3, r0
1301 do_it mi
1302 rsbmi r3, r0, #0 @ positive dividend value
1303 cmp r3, r1
1304 bls 11f
1305 tst r1, r2 @ divisor is power of 2 ?
1306 beq 12f
1307
1308 ARM_DIV_BODY r3, r1, r0, r2
1309
1310 cmp ip, #0
1311 do_it mi
1312 rsbmi r0, r0, #0
1313 RET
1314
1315 10: teq ip, r0 @ same sign ?
1316 do_it mi
1317 rsbmi r0, r0, #0
1318 RET
1319
1320 11: do_it lo
1321 movlo r0, #0
1322 do_it eq,t
1323 moveq r0, ip, asr #31
1324 orreq r0, r0, #1
1325 RET
1326
1327 12: ARM_DIV2_ORDER r1, r2
1328
1329 cmp ip, #0
1330 mov r0, r3, lsr r2
1331 do_it mi
1332 rsbmi r0, r0, #0
1333 RET
1334
1335 #endif /* ARM version */
1336
1337 DIV_FUNC_END divsi3 signed
1338
1339 #if defined(__prefer_thumb__)
1340 FUNC_START aeabi_idivmod
1341 cmp r1, #0
1342 beq LSYM(Ldiv0)
1343 # if defined(__OPTIMIZE_SIZE__)
1344 push {r0, r1, lr}
1345 bl LSYM(divsi3_skip_div0_test)
1346 POP {r1, r2, r3}
1347 mul r2, r0
1348 sub r1, r1, r2
1349 bx r3
1350 # else
1351 /* Both the quotient and remainder are calculated simultaneously
1352 in THUMB1_Div_Positive and THUMB1_Div_Negative. There is no
1353 need to calculate the remainder again here. */
1354 b LSYM(divsi3_skip_div0_test)
1355 RET
1356 # endif /* __OPTIMIZE_SIZE__ */
1357
1358 #elif defined(__ARM_ARCH_EXT_IDIV__)
1359 ARM_FUNC_START aeabi_idivmod
1360 cmp r1, #0
1361 beq LSYM(Ldiv0)
1362 mov r2, r0
1363 sdiv r0, r0, r1
1364 mls r1, r0, r1, r2
1365 RET
1366 #else
1367 ARM_FUNC_START aeabi_idivmod
1368 cmp r1, #0
1369 beq LSYM(Ldiv0)
1370 stmfd sp!, { r0, r1, lr }
1371 bl LSYM(divsi3_skip_div0_test)
1372 ldmfd sp!, { r1, r2, lr }
1373 mul r3, r2, r0
1374 sub r1, r1, r3
1375 RET
1376 #endif
1377 FUNC_END aeabi_idivmod
1378
1379 #endif /* L_divsi3 */
1380 /* ------------------------------------------------------------------------ */
1381 #ifdef L_modsi3
1382
1383 #if defined(__ARM_ARCH_EXT_IDIV__) && __ARM_ARCH_ISA_THUMB != 1
1384
1385 ARM_FUNC_START modsi3
1386
1387 cmp r1, #0
1388 beq LSYM(Ldiv0)
1389
1390 sdiv r2, r0, r1
1391 mls r0, r1, r2, r0
1392 RET
1393
1394 #elif defined(__thumb__)
1395
1396 FUNC_START modsi3
1397
1398 mov curbit, #1
1399 cmp divisor, #0
1400 beq LSYM(Ldiv0)
1401 bpl LSYM(Lover10)
1402 neg divisor, divisor @ Loops below use unsigned.
1403 LSYM(Lover10):
1404 push { work }
1405 @ Need to save the sign of the dividend, unfortunately, we need
1406 @ work later on. Must do this after saving the original value of
1407 @ the work register, because we will pop this value off first.
1408 push { dividend }
1409 cmp dividend, #0
1410 bpl LSYM(Lover11)
1411 neg dividend, dividend
1412 LSYM(Lover11):
1413 cmp dividend, divisor
1414 blo LSYM(Lgot_result)
1415
1416 THUMB_DIV_MOD_BODY 1
1417
1418 pop { work }
1419 cmp work, #0
1420 bpl LSYM(Lover12)
1421 neg dividend, dividend
1422 LSYM(Lover12):
1423 pop { work }
1424 RET
1425
1426 #else /* ARM version. */
1427
1428 FUNC_START modsi3
1429
1430 cmp r1, #0
1431 beq LSYM(Ldiv0)
1432 rsbmi r1, r1, #0 @ loops below use unsigned.
1433 movs ip, r0 @ preserve sign of dividend
1434 rsbmi r0, r0, #0 @ if negative make positive
1435 subs r2, r1, #1 @ compare divisor with 1
1436 cmpne r0, r1 @ compare dividend with divisor
1437 moveq r0, #0
1438 tsthi r1, r2 @ see if divisor is power of 2
1439 andeq r0, r0, r2
1440 bls 10f
1441
1442 ARM_MOD_BODY r0, r1, r2, r3
1443
1444 10: cmp ip, #0
1445 rsbmi r0, r0, #0
1446 RET
1447
1448 #endif /* ARM version */
1449
1450 DIV_FUNC_END modsi3 signed
1451
1452 #endif /* L_modsi3 */
1453 /* ------------------------------------------------------------------------ */
1454 #ifdef L_dvmd_tls
1455
1456 #ifdef __ARM_EABI__
1457 WEAK aeabi_idiv0
1458 WEAK aeabi_ldiv0
1459 FUNC_START aeabi_idiv0
1460 FUNC_START aeabi_ldiv0
1461 RET
1462 FUNC_END aeabi_ldiv0
1463 FUNC_END aeabi_idiv0
1464 #else
1465 FUNC_START div0
1466 RET
1467 FUNC_END div0
1468 #endif
1469
1470 #endif /* L_divmodsi_tools */
1471 /* ------------------------------------------------------------------------ */
1472 #ifdef L_dvmd_lnx
1473 @ GNU/Linux division-by zero handler. Used in place of L_dvmd_tls
1474
1475 /* Constant taken from <asm/signal.h>. */
1476 #define SIGFPE 8
1477
1478 #ifdef __ARM_EABI__
1479 cfi_start __aeabi_ldiv0, LSYM(Lend_aeabi_ldiv0)
1480 WEAK aeabi_idiv0
1481 WEAK aeabi_ldiv0
1482 ARM_FUNC_START aeabi_idiv0
1483 ARM_FUNC_START aeabi_ldiv0
1484 do_push {r1, lr}
1485 98: cfi_push 98b - __aeabi_ldiv0, 0xe, -0x4, 0x8
1486 #else
1487 cfi_start __div0, LSYM(Lend_div0)
1488 ARM_FUNC_START div0
1489 do_push {r1, lr}
1490 98: cfi_push 98b - __div0, 0xe, -0x4, 0x8
1491 #endif
1492
1493 mov r0, #SIGFPE
1494 bl SYM(raise) __PLT__
1495 RETLDM r1 unwind=98b
1496
1497 #ifdef __ARM_EABI__
1498 cfi_end LSYM(Lend_aeabi_ldiv0)
1499 FUNC_END aeabi_ldiv0
1500 FUNC_END aeabi_idiv0
1501 #else
1502 cfi_end LSYM(Lend_div0)
1503 FUNC_END div0
1504 #endif
1505
1506 #endif /* L_dvmd_lnx */
1507 #ifdef L_clear_cache
1508 #if defined __ARM_EABI__ && defined __linux__
1509 @ EABI GNU/Linux call to cacheflush syscall.
1510 ARM_FUNC_START clear_cache
1511 do_push {r7}
1512 #if __ARM_ARCH >= 7 || defined(__ARM_ARCH_6T2__)
1513 movw r7, #2
1514 movt r7, #0xf
1515 #else
1516 mov r7, #0xf0000
1517 add r7, r7, #2
1518 #endif
1519 mov r2, #0
1520 swi 0
1521 do_pop {r7}
1522 RET
1523 FUNC_END clear_cache
1524 #else
1525 #error "This is only for ARM EABI GNU/Linux"
1526 #endif
1527 #endif /* L_clear_cache */
1528
1529 #ifdef L_speculation_barrier
1530 FUNC_START speculation_barrier
1531 #if __ARM_ARCH >= 7
1532 isb
1533 dsb sy
1534 #elif defined __ARM_EABI__ && defined __linux__
1535 /* We don't have a speculation barrier directly for this
1536 platform/architecture variant. But we can use a kernel
1537 clear_cache service routine which will emit such instructions
1538 if run on a later version of the architecture. We don't
1539 really want to flush the cache, but we must give it a valid
1540 address, so just clear pc..pc+1. */
1541 #if defined __thumb__ && !defined __thumb2__
1542 push {r7}
1543 mov r7, #0xf
1544 lsl r7, #16
1545 add r7, #2
1546 adr r0, . + 4
1547 add r1, r0, #1
1548 mov r2, #0
1549 svc 0
1550 pop {r7}
1551 #else
1552 do_push {r7}
1553 #ifdef __ARM_ARCH_6T2__
1554 movw r7, #2
1555 movt r7, #0xf
1556 #else
1557 mov r7, #0xf0000
1558 add r7, r7, #2
1559 #endif
1560 add r0, pc, #0 /* ADR. */
1561 add r1, r0, #1
1562 mov r2, #0
1563 svc 0
1564 do_pop {r7}
1565 #endif /* Thumb1 only */
1566 #else
1567 #warning "No speculation barrier defined for this platform"
1568 #endif
1569 RET
1570 FUNC_END speculation_barrier
1571 #endif
1572 /* ------------------------------------------------------------------------ */
1573 /* Dword shift operations. */
1574 /* All the following Dword shift variants rely on the fact that
1575 shft xxx, Reg
1576 is in fact done as
1577 shft xxx, (Reg & 255)
1578 so for Reg value in (32...63) and (-1...-31) we will get zero (in the
1579 case of logical shifts) or the sign (for asr). */
1580
1581 #ifdef __ARMEB__
1582 #define al r1
1583 #define ah r0
1584 #else
1585 #define al r0
1586 #define ah r1
1587 #endif
1588
1589 /* Prevent __aeabi double-word shifts from being produced on SymbianOS. */
1590 #ifndef __symbian__
1591
1592 #ifdef L_lshrdi3
1593
1594 FUNC_START lshrdi3
1595 FUNC_ALIAS aeabi_llsr lshrdi3
1596
1597 #ifdef __thumb__
1598 lsr al, r2
1599 mov r3, ah
1600 lsr ah, r2
1601 mov ip, r3
1602 sub r2, #32
1603 lsr r3, r2
1604 orr al, r3
1605 neg r2, r2
1606 mov r3, ip
1607 lsl r3, r2
1608 orr al, r3
1609 RET
1610 #else
1611 subs r3, r2, #32
1612 rsb ip, r2, #32
1613 movmi al, al, lsr r2
1614 movpl al, ah, lsr r3
1615 orrmi al, al, ah, lsl ip
1616 mov ah, ah, lsr r2
1617 RET
1618 #endif
1619 FUNC_END aeabi_llsr
1620 FUNC_END lshrdi3
1621
1622 #endif
1623
1624 #ifdef L_ashrdi3
1625
1626 FUNC_START ashrdi3
1627 FUNC_ALIAS aeabi_lasr ashrdi3
1628
1629 #ifdef __thumb__
1630 lsr al, r2
1631 mov r3, ah
1632 asr ah, r2
1633 sub r2, #32
1634 @ If r2 is negative at this point the following step would OR
1635 @ the sign bit into all of AL. That's not what we want...
1636 bmi 1f
1637 mov ip, r3
1638 asr r3, r2
1639 orr al, r3
1640 mov r3, ip
1641 1:
1642 neg r2, r2
1643 lsl r3, r2
1644 orr al, r3
1645 RET
1646 #else
1647 subs r3, r2, #32
1648 rsb ip, r2, #32
1649 movmi al, al, lsr r2
1650 movpl al, ah, asr r3
1651 orrmi al, al, ah, lsl ip
1652 mov ah, ah, asr r2
1653 RET
1654 #endif
1655
1656 FUNC_END aeabi_lasr
1657 FUNC_END ashrdi3
1658
1659 #endif
1660
1661 #ifdef L_ashldi3
1662
1663 FUNC_START ashldi3
1664 FUNC_ALIAS aeabi_llsl ashldi3
1665
1666 #ifdef __thumb__
1667 lsl ah, r2
1668 mov r3, al
1669 lsl al, r2
1670 mov ip, r3
1671 sub r2, #32
1672 lsl r3, r2
1673 orr ah, r3
1674 neg r2, r2
1675 mov r3, ip
1676 lsr r3, r2
1677 orr ah, r3
1678 RET
1679 #else
1680 subs r3, r2, #32
1681 rsb ip, r2, #32
1682 movmi ah, ah, lsl r2
1683 movpl ah, al, lsl r3
1684 orrmi ah, ah, al, lsr ip
1685 mov al, al, lsl r2
1686 RET
1687 #endif
1688 FUNC_END aeabi_llsl
1689 FUNC_END ashldi3
1690
1691 #endif
1692
1693 #endif /* __symbian__ */
1694
1695 #ifdef L_clzsi2
1696 #ifdef NOT_ISA_TARGET_32BIT
1697 FUNC_START clzsi2
1698 mov r1, #28
1699 mov r3, #1
1700 lsl r3, r3, #16
1701 cmp r0, r3 /* 0x10000 */
1702 bcc 2f
1703 lsr r0, r0, #16
1704 sub r1, r1, #16
1705 2: lsr r3, r3, #8
1706 cmp r0, r3 /* #0x100 */
1707 bcc 2f
1708 lsr r0, r0, #8
1709 sub r1, r1, #8
1710 2: lsr r3, r3, #4
1711 cmp r0, r3 /* #0x10 */
1712 bcc 2f
1713 lsr r0, r0, #4
1714 sub r1, r1, #4
1715 2: adr r2, 1f
1716 ldrb r0, [r2, r0]
1717 add r0, r0, r1
1718 bx lr
1719 .align 2
1720 1:
1721 .byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
1722 FUNC_END clzsi2
1723 #else
1724 ARM_FUNC_START clzsi2
1725 # if defined (__ARM_FEATURE_CLZ)
1726 clz r0, r0
1727 RET
1728 # else
1729 mov r1, #28
1730 cmp r0, #0x10000
1731 do_it cs, t
1732 movcs r0, r0, lsr #16
1733 subcs r1, r1, #16
1734 cmp r0, #0x100
1735 do_it cs, t
1736 movcs r0, r0, lsr #8
1737 subcs r1, r1, #8
1738 cmp r0, #0x10
1739 do_it cs, t
1740 movcs r0, r0, lsr #4
1741 subcs r1, r1, #4
1742 adr r2, 1f
1743 ldrb r0, [r2, r0]
1744 add r0, r0, r1
1745 RET
1746 .align 2
1747 1:
1748 .byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
1749 # endif /* !defined (__ARM_FEATURE_CLZ) */
1750 FUNC_END clzsi2
1751 #endif
1752 #endif /* L_clzsi2 */
1753
1754 #ifdef L_clzdi2
1755 #if !defined (__ARM_FEATURE_CLZ)
1756
1757 # ifdef NOT_ISA_TARGET_32BIT
1758 FUNC_START clzdi2
1759 push {r4, lr}
1760 # else
1761 ARM_FUNC_START clzdi2
1762 do_push {r4, lr}
1763 # endif
1764 cmp xxh, #0
1765 bne 1f
1766 # ifdef __ARMEB__
1767 mov r0, xxl
1768 bl __clzsi2
1769 add r0, r0, #32
1770 b 2f
1771 1:
1772 bl __clzsi2
1773 # else
1774 bl __clzsi2
1775 add r0, r0, #32
1776 b 2f
1777 1:
1778 mov r0, xxh
1779 bl __clzsi2
1780 # endif
1781 2:
1782 # ifdef NOT_ISA_TARGET_32BIT
1783 pop {r4, pc}
1784 # else
1785 RETLDM r4
1786 # endif
1787 FUNC_END clzdi2
1788
1789 #else /* defined (__ARM_FEATURE_CLZ) */
1790
1791 ARM_FUNC_START clzdi2
1792 cmp xxh, #0
1793 do_it eq, et
1794 clzeq r0, xxl
1795 clzne r0, xxh
1796 addeq r0, r0, #32
1797 RET
1798 FUNC_END clzdi2
1799
1800 #endif
1801 #endif /* L_clzdi2 */
1802
1803 #ifdef L_ctzsi2
1804 #ifdef NOT_ISA_TARGET_32BIT
1805 FUNC_START ctzsi2
1806 neg r1, r0
1807 and r0, r0, r1
1808 mov r1, #28
1809 mov r3, #1
1810 lsl r3, r3, #16
1811 cmp r0, r3 /* 0x10000 */
1812 bcc 2f
1813 lsr r0, r0, #16
1814 sub r1, r1, #16
1815 2: lsr r3, r3, #8
1816 cmp r0, r3 /* #0x100 */
1817 bcc 2f
1818 lsr r0, r0, #8
1819 sub r1, r1, #8
1820 2: lsr r3, r3, #4
1821 cmp r0, r3 /* #0x10 */
1822 bcc 2f
1823 lsr r0, r0, #4
1824 sub r1, r1, #4
1825 2: adr r2, 1f
1826 ldrb r0, [r2, r0]
1827 sub r0, r0, r1
1828 bx lr
1829 .align 2
1830 1:
1831 .byte 27, 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31
1832 FUNC_END ctzsi2
1833 #else
1834 ARM_FUNC_START ctzsi2
1835 rsb r1, r0, #0
1836 and r0, r0, r1
1837 # if defined (__ARM_FEATURE_CLZ)
1838 clz r0, r0
1839 rsb r0, r0, #31
1840 RET
1841 # else
1842 mov r1, #28
1843 cmp r0, #0x10000
1844 do_it cs, t
1845 movcs r0, r0, lsr #16
1846 subcs r1, r1, #16
1847 cmp r0, #0x100
1848 do_it cs, t
1849 movcs r0, r0, lsr #8
1850 subcs r1, r1, #8
1851 cmp r0, #0x10
1852 do_it cs, t
1853 movcs r0, r0, lsr #4
1854 subcs r1, r1, #4
1855 adr r2, 1f
1856 ldrb r0, [r2, r0]
1857 sub r0, r0, r1
1858 RET
1859 .align 2
1860 1:
1861 .byte 27, 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31
1862 # endif /* !defined (__ARM_FEATURE_CLZ) */
1863 FUNC_END ctzsi2
1864 #endif
1865 #endif /* L_clzsi2 */
1866
1867 /* ------------------------------------------------------------------------ */
1868 /* These next two sections are here despite the fact that they contain Thumb
1869 assembler because their presence allows interworked code to be linked even
1870 when the GCC library is this one. */
1871
1872 /* Do not build the interworking functions when the target architecture does
1873 not support Thumb instructions. (This can be a multilib option). */
1874 #if defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__\
1875 || defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__ \
1876 || __ARM_ARCH >= 6
1877
1878 #if defined L_call_via_rX
1879
1880 /* These labels & instructions are used by the Arm/Thumb interworking code.
1881 The address of function to be called is loaded into a register and then
1882 one of these labels is called via a BL instruction. This puts the
1883 return address into the link register with the bottom bit set, and the
1884 code here switches to the correct mode before executing the function. */
1885
1886 .text
1887 .align 0
1888 .force_thumb
1889
1890 .macro call_via register
1891 THUMB_FUNC_START _call_via_\register
1892
1893 bx \register
1894 nop
1895
1896 SIZE (_call_via_\register)
1897 .endm
1898
1899 call_via r0
1900 call_via r1
1901 call_via r2
1902 call_via r3
1903 call_via r4
1904 call_via r5
1905 call_via r6
1906 call_via r7
1907 call_via r8
1908 call_via r9
1909 call_via sl
1910 call_via fp
1911 call_via ip
1912 call_via sp
1913 call_via lr
1914
1915 #endif /* L_call_via_rX */
1916
1917 /* Don't bother with the old interworking routines for Thumb-2. */
1918 /* ??? Maybe only omit these on "m" variants. */
1919 #if !defined(__thumb2__) && __ARM_ARCH_ISA_ARM
1920
1921 #if defined L_interwork_call_via_rX
1922
1923 /* These labels & instructions are used by the Arm/Thumb interworking code,
1924 when the target address is in an unknown instruction set. The address
1925 of function to be called is loaded into a register and then one of these
1926 labels is called via a BL instruction. This puts the return address
1927 into the link register with the bottom bit set, and the code here
1928 switches to the correct mode before executing the function. Unfortunately
1929 the target code cannot be relied upon to return via a BX instruction, so
1930 instead we have to store the resturn address on the stack and allow the
1931 called function to return here instead. Upon return we recover the real
1932 return address and use a BX to get back to Thumb mode.
1933
1934 There are three variations of this code. The first,
1935 _interwork_call_via_rN(), will push the return address onto the
1936 stack and pop it in _arm_return(). It should only be used if all
1937 arguments are passed in registers.
1938
1939 The second, _interwork_r7_call_via_rN(), instead stores the return
1940 address at [r7, #-4]. It is the caller's responsibility to ensure
1941 that this address is valid and contains no useful data.
1942
1943 The third, _interwork_r11_call_via_rN(), works in the same way but
1944 uses r11 instead of r7. It is useful if the caller does not really
1945 need a frame pointer. */
1946
1947 .text
1948 .align 0
1949
1950 .code 32
1951 .globl _arm_return
1952 LSYM(Lstart_arm_return):
1953 cfi_start LSYM(Lstart_arm_return) LSYM(Lend_arm_return)
1954 cfi_push 0, 0xe, -0x8, 0x8
1955 nop @ This nop is for the benefit of debuggers, so that
1956 @ backtraces will use the correct unwind information.
1957 _arm_return:
1958 RETLDM unwind=LSYM(Lstart_arm_return)
1959 cfi_end LSYM(Lend_arm_return)
1960
1961 .globl _arm_return_r7
1962 _arm_return_r7:
1963 ldr lr, [r7, #-4]
1964 bx lr
1965
1966 .globl _arm_return_r11
1967 _arm_return_r11:
1968 ldr lr, [r11, #-4]
1969 bx lr
1970
1971 .macro interwork_with_frame frame, register, name, return
1972 .code 16
1973
1974 THUMB_FUNC_START \name
1975
1976 bx pc
1977 nop
1978
1979 .code 32
1980 tst \register, #1
1981 streq lr, [\frame, #-4]
1982 adreq lr, _arm_return_\frame
1983 bx \register
1984
1985 SIZE (\name)
1986 .endm
1987
1988 .macro interwork register
1989 .code 16
1990
1991 THUMB_FUNC_START _interwork_call_via_\register
1992
1993 bx pc
1994 nop
1995
1996 .code 32
1997 .globl LSYM(Lchange_\register)
1998 LSYM(Lchange_\register):
1999 tst \register, #1
2000 streq lr, [sp, #-8]!
2001 adreq lr, _arm_return
2002 bx \register
2003
2004 SIZE (_interwork_call_via_\register)
2005
2006 interwork_with_frame r7,\register,_interwork_r7_call_via_\register
2007 interwork_with_frame r11,\register,_interwork_r11_call_via_\register
2008 .endm
2009
2010 interwork r0
2011 interwork r1
2012 interwork r2
2013 interwork r3
2014 interwork r4
2015 interwork r5
2016 interwork r6
2017 interwork r7
2018 interwork r8
2019 interwork r9
2020 interwork sl
2021 interwork fp
2022 interwork ip
2023 interwork sp
2024
2025 /* The LR case has to be handled a little differently... */
2026 .code 16
2027
2028 THUMB_FUNC_START _interwork_call_via_lr
2029
2030 bx pc
2031 nop
2032
2033 .code 32
2034 .globl .Lchange_lr
2035 .Lchange_lr:
2036 tst lr, #1
2037 stmeqdb r13!, {lr, pc}
2038 mov ip, lr
2039 adreq lr, _arm_return
2040 bx ip
2041
2042 SIZE (_interwork_call_via_lr)
2043
2044 #endif /* L_interwork_call_via_rX */
2045 #endif /* !__thumb2__ */
2046
2047 /* Functions to support compact pic switch tables in thumb1 state.
2048 All these routines take an index into the table in r0. The
2049 table is at LR & ~1 (but this must be rounded up in the case
2050 of 32-bit entires). They are only permitted to clobber r12
2051 and r14 and r0 must be preserved on exit. */
2052 #ifdef L_thumb1_case_sqi
2053
2054 .text
2055 .align 0
2056 .force_thumb
2057 .syntax unified
2058 THUMB_FUNC_START __gnu_thumb1_case_sqi
2059 push {r1}
2060 mov r1, lr
2061 lsrs r1, r1, #1
2062 lsls r1, r1, #1
2063 ldrsb r1, [r1, r0]
2064 lsls r1, r1, #1
2065 add lr, lr, r1
2066 pop {r1}
2067 bx lr
2068 SIZE (__gnu_thumb1_case_sqi)
2069 #endif
2070
2071 #ifdef L_thumb1_case_uqi
2072
2073 .text
2074 .align 0
2075 .force_thumb
2076 .syntax unified
2077 THUMB_FUNC_START __gnu_thumb1_case_uqi
2078 push {r1}
2079 mov r1, lr
2080 lsrs r1, r1, #1
2081 lsls r1, r1, #1
2082 ldrb r1, [r1, r0]
2083 lsls r1, r1, #1
2084 add lr, lr, r1
2085 pop {r1}
2086 bx lr
2087 SIZE (__gnu_thumb1_case_uqi)
2088 #endif
2089
2090 #ifdef L_thumb1_case_shi
2091
2092 .text
2093 .align 0
2094 .force_thumb
2095 .syntax unified
2096 THUMB_FUNC_START __gnu_thumb1_case_shi
2097 push {r0, r1}
2098 mov r1, lr
2099 lsrs r1, r1, #1
2100 lsls r0, r0, #1
2101 lsls r1, r1, #1
2102 ldrsh r1, [r1, r0]
2103 lsls r1, r1, #1
2104 add lr, lr, r1
2105 pop {r0, r1}
2106 bx lr
2107 SIZE (__gnu_thumb1_case_shi)
2108 #endif
2109
2110 #ifdef L_thumb1_case_uhi
2111
2112 .text
2113 .align 0
2114 .force_thumb
2115 .syntax unified
2116 THUMB_FUNC_START __gnu_thumb1_case_uhi
2117 push {r0, r1}
2118 mov r1, lr
2119 lsrs r1, r1, #1
2120 lsls r0, r0, #1
2121 lsls r1, r1, #1
2122 ldrh r1, [r1, r0]
2123 lsls r1, r1, #1
2124 add lr, lr, r1
2125 pop {r0, r1}
2126 bx lr
2127 SIZE (__gnu_thumb1_case_uhi)
2128 #endif
2129
2130 #ifdef L_thumb1_case_si
2131
2132 .text
2133 .align 0
2134 .force_thumb
2135 .syntax unified
2136 THUMB_FUNC_START __gnu_thumb1_case_si
2137 push {r0, r1}
2138 mov r1, lr
2139 adds.n r1, r1, #2 /* Align to word. */
2140 lsrs r1, r1, #2
2141 lsls r0, r0, #2
2142 lsls r1, r1, #2
2143 ldr r0, [r1, r0]
2144 adds r0, r0, r1
2145 mov lr, r0
2146 pop {r0, r1}
2147 mov pc, lr /* We know we were called from thumb code. */
2148 SIZE (__gnu_thumb1_case_si)
2149 #endif
2150
2151 #endif /* Arch supports thumb. */
2152
2153 .macro CFI_START_FUNCTION
2154 .cfi_startproc
2155 .cfi_remember_state
2156 .endm
2157
2158 .macro CFI_END_FUNCTION
2159 .cfi_restore_state
2160 .cfi_endproc
2161 .endm
2162
2163 #ifndef __symbian__
2164 /* The condition here must match the one in gcc/config/arm/elf.h and
2165 libgcc/config/arm/t-elf. */
2166 #ifndef NOT_ISA_TARGET_32BIT
2167 #include "ieee754-df.S"
2168 #include "ieee754-sf.S"
2169 #include "bpabi.S"
2170 #else /* NOT_ISA_TARGET_32BIT */
2171 #include "bpabi-v6m.S"
2172 #endif /* NOT_ISA_TARGET_32BIT */
2173 #endif /* !__symbian__ */