1 /* -*- Mode: Asm -*- */
2 /* Copyright (C) 1998-2021 Free Software Foundation, Inc.
3 Contributed by Denis Chertykov <chertykov@gmail.com>
5 This file is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 3, or (at your option) any
10 This file is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 #if defined (__AVR_TINY__)
25 #define __zero_reg__ r17
26 #define __tmp_reg__ r16
28 #define __zero_reg__ r1
29 #define __tmp_reg__ r0
32 #if defined (__AVR_HAVE_SPH__)
36 #define __RAMPZ__ 0x3B
39 /* Most of the functions here are called directly from avr.md
40 patterns, instead of using the standard libcall mechanisms.
41 This can make better code because GCC knows exactly which
42 of the call-used registers (not all of them) are clobbered. */
44 /* FIXME: At present, there is no SORT directive in the linker
45 script so that we must not assume that different modules
46 in the same input section like .libgcc.text.mul will be
47 located close together. Therefore, we cannot use
48 RCALL/RJMP to call a function like __udivmodhi4 from
49 __divmodhi4 and have to use lengthy XCALL/XJMP even
50 though they are in the same input section and all same
51 input sections together are small enough to reach every
52 location with a RCALL/RJMP instruction. */
54 #if defined (__AVR_HAVE_EIJMP_EICALL__) && !defined (__AVR_HAVE_ELPMX__)
55 #error device not supported
58 .macro mov_l r_dest, r_src
59 #if defined (__AVR_HAVE_MOVW__)
66 .macro mov_h r_dest, r_src
67 #if defined (__AVR_HAVE_MOVW__)
74 .macro wmov r_dest, r_src
75 #if defined (__AVR_HAVE_MOVW__)
79 mov \r_dest+1, \r_src+1
83 #if defined (__AVR_HAVE_JMP_CALL__)
91 #if defined (__AVR_HAVE_EIJMP_EICALL__)
101 .macro do_prologue_saves n_pushed n_frame=0
102 ldi r26, lo8(\n_frame)
103 ldi r27, hi8(\n_frame)
104 ldi r30, lo8(gs(.L_prologue_saves.\@))
105 ldi r31, hi8(gs(.L_prologue_saves.\@))
106 XJMP __prologue_saves__ + ((18 - (\n_pushed)) * 2)
107 .L_prologue_saves.\@:
112 .macro do_epilogue_restores n_pushed n_frame=0
114 #ifdef __AVR_HAVE_SPH__
117 subi r28, lo8(-\n_frame)
118 sbci r29, hi8(-\n_frame)
125 subi r28, lo8(-\n_frame)
127 #endif /* HAVE SPH */
129 XJMP __epilogue_restores__ + ((18 - (\n_pushed)) * 2)
132 ;; Support function entry and exit for convenience
134 .macro wsubi r_arg1, i_arg2
135 #if defined (__AVR_TINY__)
136 subi \r_arg1, lo8(\i_arg2)
137 sbci \r_arg1+1, hi8(\i_arg2)
139 sbiw \r_arg1, \i_arg2
143 .macro waddi r_arg1, i_arg2
144 #if defined (__AVR_TINY__)
145 subi \r_arg1, lo8(-\i_arg2)
146 sbci \r_arg1+1, hi8(-\i_arg2)
148 adiw \r_arg1, \i_arg2
171 ;; Skip next instruction, typically a jump target
172 #define skip cpse 16,16
174 ;; Negate a 2-byte value held in consecutive registers
181 ;; Negate a 4-byte value held in consecutive registers
182 ;; Sets the V flag for signed overflow tests if REG >= 16
194 adc \reg, __zero_reg__
195 adc \reg+1, __zero_reg__
196 adc \reg+2, __zero_reg__
197 adc \reg+3, __zero_reg__
201 #define exp_lo(N) hlo8 ((N) << 23)
202 #define exp_hi(N) hhi8 ((N) << 23)
205 .section .text.libgcc.mul, "ax", @progbits
207 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
208 /* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */
209 #if !defined (__AVR_HAVE_MUL__)
210 /*******************************************************
211 Multiplication 8 x 8 without MUL
212 *******************************************************/
213 #if defined (L_mulqi3)
215 #define r_arg2 r22 /* multiplicand */
216 #define r_arg1 r24 /* multiplier */
217 #define r_res __tmp_reg__ /* result */
220 clr r_res ; clear result
224 add r_arg2,r_arg2 ; shift multiplicand
225 breq __mulqi3_exit ; while multiplicand != 0
227 brne __mulqi3_loop ; exit if multiplier = 0
229 mov r_arg1,r_res ; result to return register
237 #endif /* defined (L_mulqi3) */
240 /*******************************************************
241 Widening Multiplication 16 = 8 x 8 without MUL
242 Multiplication 16 x 16 without MUL
243 *******************************************************/
250 ;; Output overlaps input, thus expand result in CC0/1
253 #define CC0 __tmp_reg__
256 #if defined (L_umulqihi3)
257 ;;; R25:R24 = (unsigned int) R22 * (unsigned int) R24
258 ;;; (C1:C0) = (unsigned int) A0 * (unsigned int) B0
259 ;;; Clobbers: __tmp_reg__, R21..R23
265 #endif /* L_umulqihi3 */
267 #if defined (L_mulqihi3)
268 ;;; R25:R24 = (signed int) R22 * (signed int) R24
269 ;;; (C1:C0) = (signed int) A0 * (signed int) B0
270 ;;; Clobbers: __tmp_reg__, R20..R23
276 ;; The multiplication runs twice as fast if A1 is zero, thus:
279 #ifdef __AVR_HAVE_JMP_CALL__
280 ;; Store B0 * sign of A
285 #else /* have no CALL */
286 ;; Skip sign-extension of A if A >= 0
287 ;; Same size as with the first alternative but avoids errata skip
288 ;; and is faster if A >= 0
294 #endif /* HAVE_JMP_CALL */
295 ;; 1-extend A after the multiplication
299 #endif /* L_mulqihi3 */
301 #if defined (L_mulhi3)
302 ;;; R25:R24 = R23:R22 * R25:R24
303 ;;; (C1:C0) = (A1:A0) * (B1:B0)
304 ;;; Clobbers: __tmp_reg__, R21..R23
312 ;; Bit n of A is 1 --> C += B << n
319 ;; If B == 0 we are ready
323 ;; Carry = n-th bit of A
326 ;; If bit n of A is set, then go add B * 2^n to C
329 ;; Carry = 0 --> The ROR above acts like CP A0, 0
330 ;; Thus, it is sufficient to CPC the high part to test A against 0
332 ;; Only proceed if A != 0
335 ;; Move Result into place
340 #endif /* L_mulhi3 */
373 /*******************************************************
374 Widening Multiplication 32 = 16 x 16 without MUL
375 *******************************************************/
377 #if defined (L_umulhisi3)
387 #endif /* L_umulhisi3 */
389 #if defined (L_mulhisi3)
396 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
403 #else /* no __AVR_ERRATA_SKIP_JMP_CALL__ */
404 ;; Zero-extend A and __mulsi3 will run at least twice as fast
405 ;; compared to a sign-extended A.
410 ;; If A < 0 then perform the B * 0xffff.... before the
411 ;; very multiplication by initializing the high part of the
412 ;; result CC with -B.
417 #endif /* __AVR_ERRATA_SKIP_JMP_CALL__ */
419 #endif /* L_mulhisi3 */
422 /*******************************************************
423 Multiplication 32 x 32 without MUL
424 *******************************************************/
426 #if defined (L_mulsi3)
428 #if defined (__AVR_TINY__)
429 in r26, __SP_L__ ; safe to use X, as it is CC0/CC1
431 subi r26, lo8(-3) ; Add 3 to point past return address
433 push B0 ; save callee saved regs
435 ld B0, X+ ; load from caller stack
446 DEFUN __mulsi3_helper
451 1: ;; If bit n of A is set, then add B * 2^n to the result in CC
453 add CC0,B0 $ adc CC1,B1 $ adc CC2,B2 $ adc CC3,B3
456 lsl B0 $ rol B1 $ rol B2 $ rol B3
458 3: ;; A >>= 1: Carry = n-th bit of A
459 lsr A3 $ ror A2 $ ror A1 $ ror A0
462 ;; Only continue if A != 0
468 ;; All bits of A are consumed: Copy result to return register C
471 #if defined (__AVR_TINY__)
472 pop B1 ; restore callee saved regs
474 #endif /* defined (__AVR_TINY__) */
478 #endif /* L_mulsi3 */
497 #endif /* !defined (__AVR_HAVE_MUL__) */
498 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
500 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
501 #if defined (__AVR_HAVE_MUL__)
516 /*******************************************************
517 Widening Multiplication 32 = 16 x 16 with MUL
518 *******************************************************/
520 #if defined (L_mulhisi3)
521 ;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
522 ;;; C3:C0 = (signed long) A1:A0 * (signed long) B1:B0
523 ;;; Clobbers: __tmp_reg__
532 XJMP __usmulhisi3_tail
534 #endif /* L_mulhisi3 */
536 #if defined (L_usmulhisi3)
537 ;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
538 ;;; C3:C0 = (signed long) A1:A0 * (unsigned long) B1:B0
539 ;;; Clobbers: __tmp_reg__
545 DEFUN __usmulhisi3_tail
552 ENDF __usmulhisi3_tail
553 #endif /* L_usmulhisi3 */
555 #if defined (L_umulhisi3)
556 ;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
557 ;;; C3:C0 = (unsigned long) A1:A0 * (unsigned long) B1:B0
558 ;;; Clobbers: __tmp_reg__
565 #ifdef __AVR_HAVE_JMP_CALL__
566 ;; This function is used by many other routines, often multiple times.
567 ;; Therefore, if the flash size is not too limited, avoid the RCALL
568 ;; and inverst 6 Bytes to speed things up.
583 #endif /* L_umulhisi3 */
585 /*******************************************************
586 Widening Multiplication 32 = 16 x 32 with MUL
587 *******************************************************/
589 #if defined (L_mulshisi3)
590 ;;; R25:R22 = (signed long) R27:R26 * R21:R18
591 ;;; (C3:C0) = (signed long) A1:A0 * B3:B0
592 ;;; Clobbers: __tmp_reg__
594 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
595 ;; Some cores have problem skipping 2-word instruction
600 #endif /* __AVR_HAVE_JMP_CALL__ */
605 ;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
606 ;;; (C3:C0) = (one-extended long) A1:A0 * B3:B0
607 ;;; Clobbers: __tmp_reg__
610 ;; One-extend R27:R26 (A1:A0)
615 #endif /* L_mulshisi3 */
617 #if defined (L_muluhisi3)
618 ;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
619 ;;; (C3:C0) = (unsigned long) A1:A0 * B3:B0
620 ;;; Clobbers: __tmp_reg__
633 #endif /* L_muluhisi3 */
635 /*******************************************************
636 Multiplication 32 x 32 with MUL
637 *******************************************************/
639 #if defined (L_mulsi3)
640 ;;; R25:R22 = R25:R22 * R21:R18
641 ;;; (C3:C0) = C3:C0 * B3:B0
642 ;;; Clobbers: R26, R27, __tmp_reg__
650 ;; A1:A0 now contains the high word of A
661 #endif /* L_mulsi3 */
676 #endif /* __AVR_HAVE_MUL__ */
678 /*******************************************************
679 Multiplication 24 x 24 with MUL
680 *******************************************************/
682 #if defined (L_mulpsi3)
684 ;; A[0..2]: In: Multiplicand; Out: Product
689 ;; B[0..2]: In: Multiplier
694 #if defined (__AVR_HAVE_MUL__)
696 ;; C[0..2]: Expand Result
701 ;; R24:R22 *= R20:R18
702 ;; Clobbers: r21, r25, r26, r27, __tmp_reg__
711 mul AA2, B0 $ add C2, r0
712 mul AA0, B2 $ add C2, r0
724 #else /* !HAVE_MUL */
725 ;; C[0..2]: Expand Result
726 #if defined (__AVR_TINY__)
730 #endif /* defined (__AVR_TINY__) */
734 ;; R24:R22 *= R20:R18
735 ;; Clobbers: __tmp_reg__, R18, R19, R20, R21
738 #if defined (__AVR_TINY__)
741 subi r26, lo8(-3) ; Add 3 to point past return address
743 push B0 ; save callee saved regs
745 ld B0,X+ ; load from caller stack
748 #endif /* defined (__AVR_TINY__) */
754 0: ;; Shift N-th Bit of B[] into Carry. N = 24 - Loop
755 LSR B2 $ ror B1 $ ror B0
757 ;; If the N-th Bit of B[] was set...
760 ;; ...then add A[] * 2^N to the Result C[]
761 ADD C0,A0 $ adc C1,A1 $ adc C2,A2
763 1: ;; Multiply A[] by 2
764 LSL A0 $ rol A1 $ rol A2
766 ;; Loop until B[] is 0
767 subi B0,0 $ sbci B1,0 $ sbci B2,0
770 ;; Copy C[] to the return Register A[]
775 #if defined (__AVR_TINY__)
778 #endif /* (__AVR_TINY__) */
786 #endif /* HAVE_MUL */
796 #endif /* L_mulpsi3 */
798 #if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__)
800 ;; A[0..2]: In: Multiplicand
805 ;; BB: In: Multiplier
813 ;; C[] = A[] * sign_extend (BB)
841 #endif /* L_mulsqipsi3 && HAVE_MUL */
843 /*******************************************************
844 Multiplication 64 x 64
845 *******************************************************/
849 ;; A[0..7]: In: Multiplicand
860 ;; B[0..7]: In: Multiplier
871 #if defined (__AVR_HAVE_MUL__)
872 ;; Define C[] for convenience
873 ;; Notice that parts of C[] overlap A[] respective B[]
883 #if defined (L_muldi3)
886 ;; R25:R18 *= R17:R10
887 ;; Ordinary ABI-Function
895 ;; Counting in Words, we have to perform a 4 * 4 Multiplication
898 mul A7,B0 $ $ mov C7,r0
899 mul A0,B7 $ $ add C7,r0
900 mul A6,B1 $ $ add C7,r0
901 mul A6,B0 $ mov C6,r0 $ add C7,r1
902 mul B6,A1 $ $ add C7,r0
903 mul B6,A0 $ add C6,r0 $ adc C7,r1
906 mul A2,B4 $ add C6,r0 $ adc C7,r1
907 mul A3,B4 $ $ add C7,r0
908 mul A2,B5 $ $ add C7,r0
925 XCALL __umulhisi3 $ wmov C4,22 $ add C6,24 $ adc C7,25
935 XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
945 XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
949 XCALL __umulhisi3 $ $ $ add C6,22 $ adc C7,23
963 #endif /* L_muldi3 */
965 #if defined (L_muldi3_6)
966 ;; A helper for some 64-bit multiplications with MUL available
978 #endif /* L_muldi3_6 */
989 #else /* !HAVE_MUL */
991 #if defined (L_muldi3)
1005 ;; R25:R18 *= R17:R10
1006 ;; Ordinary ABI-Function
1022 0: ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[]
1023 ;; where N = 64 - Loop.
1024 ;; Notice that B[] = B[] >>> 64 so after this Routine has finished,
1025 ;; B[] will have its initial Value again.
1026 LSR B7 $ ror B6 $ ror B5 $ ror B4
1027 ror B3 $ ror B2 $ ror B1 $ ror B0
1029 ;; If the N-th Bit of B[] was set then...
1031 ;; ...finish Rotation...
1034 ;; ...and add A[] * 2^N to the Result C[]
1035 ADD C0,A0 $ adc C1,A1 $ adc C2,A2 $ adc C3,A3
1036 adc C4,A4 $ adc C5,A5 $ adc C6,A6 $ adc C7,A7
1038 1: ;; Multiply A[] by 2
1039 LSL A0 $ rol A1 $ rol A2 $ rol A3
1040 rol A4 $ rol A5 $ rol A6 $ rol A7
1045 ;; We expanded the Result in C[]
1046 ;; Copy Result to the Return Register A[]
1070 #endif /* L_muldi3 */
1071 #endif /* HAVE_MUL */
1072 #endif /* if not __AVR_TINY__ */
1092 /*******************************************************
1093 Widening Multiplication 64 = 32 x 32 with MUL
1094 *******************************************************/
1096 #if defined (__AVR_HAVE_MUL__)
1116 #if defined (L_umulsidi3)
1118 ;; Unsigned widening 64 = 32 * 32 Multiplication with MUL
1120 ;; R18[8] = R22[4] * R18[4]
1122 ;; Ordinary ABI Function, but additionally sets
1123 ;; X = R20[2] = B2[2]
1124 ;; Z = R22[2] = A0[2]
1130 DEFUN __umulsidi3_helper
1131 push 29 $ push 28 ; Y
1133 ;; Counting in Words, we have to perform 4 Multiplications
1137 push 23 $ push 22 ; C0
1141 push 27 $ push 26 ; A0
1142 push 19 $ push 18 ; B2
1144 ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
1145 ;; B2 C2 -- -- -- B0 A2
1149 ;; Sign-extend A. T holds the sign of A
1151 ;; Subtract B from the high part of the result
1156 0: wmov 18, 28 ;; B0
1160 ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
1161 ;; B0 C2 -- -- A2 C4 C6
1166 pop 26 $ pop 27 ;; B2
1167 pop 18 $ pop 19 ;; A0
1170 ;; Move result C into place and save A0 in Z
1177 pop 28 $ pop 29 ;; Y
1179 ENDF __umulsidi3_helper
1180 #endif /* L_umulsidi3 */
1183 #if defined (L_mulsidi3)
1185 ;; Signed widening 64 = 32 * 32 Multiplication
1187 ;; R18[8] = R22[4] * R18[4]
1188 ;; Ordinary ABI Function
1191 sbrs B3, 7 ; Enhanced core has no skip bug
1192 XJMP __umulsidi3_helper
1194 ;; B needs sign-extension
1197 XCALL __umulsidi3_helper
1207 #endif /* L_mulsidi3 */
1225 #endif /* HAVE_MUL */
1227 /**********************************************************
1228 Widening Multiplication 64 = 32 x 32 without MUL
1229 **********************************************************/
1230 #ifndef __AVR_TINY__ /* if not __AVR_TINY__ */
1231 #if defined (L_mulsidi3) && !defined (__AVR_HAVE_MUL__)
1262 ;; Signed / Unsigned widening 64 = 32 * 32 Multiplication without MUL
1264 ;; R18[8] = R22[4] * R18[4]
1265 ;; Ordinary ABI Function
1274 ;; Save 10 Registers: R10..R17, R28, R29
1275 do_prologue_saves 10
1278 ;; Move B into place...
1287 ;; Move A into place...
1297 do_epilogue_restores 10
1325 #endif /* L_mulsidi3 && !HAVE_MUL */
1326 #endif /* if not __AVR_TINY__ */
1327 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1330 .section .text.libgcc.div, "ax", @progbits
1332 /*******************************************************
1333 Division 8 / 8 => (result + remainder)
1334 *******************************************************/
1335 #define r_rem r25 /* remainder */
1336 #define r_arg1 r24 /* dividend, quotient */
1337 #define r_arg2 r22 /* divisor */
1338 #define r_cnt r23 /* loop count */
1340 #if defined (L_udivmodqi4)
1342 sub r_rem,r_rem ; clear remainder and carry
1343 ldi r_cnt,9 ; init loop counter
1344 rjmp __udivmodqi4_ep ; jump to entry point
1346 rol r_rem ; shift dividend into remainder
1347 cp r_rem,r_arg2 ; compare remainder & divisor
1348 brcs __udivmodqi4_ep ; remainder <= divisor
1349 sub r_rem,r_arg2 ; restore remainder
1351 rol r_arg1 ; shift dividend (with CARRY)
1352 dec r_cnt ; decrement loop counter
1353 brne __udivmodqi4_loop
1354 com r_arg1 ; complement result
1355 ; because C flag was complemented in loop
1358 #endif /* defined (L_udivmodqi4) */
1360 #if defined (L_divmodqi4)
1362 bst r_arg1,7 ; store sign of dividend
1363 mov __tmp_reg__,r_arg1
1364 eor __tmp_reg__,r_arg2; r0.7 is sign of result
1366 neg r_arg1 ; dividend negative : negate
1368 neg r_arg2 ; divisor negative : negate
1369 XCALL __udivmodqi4 ; do the unsigned div/mod
1371 neg r_rem ; correct remainder sign
1374 neg r_arg1 ; correct result sign
1378 #endif /* defined (L_divmodqi4) */
1386 /*******************************************************
1387 Division 16 / 16 => (result + remainder)
1388 *******************************************************/
1389 #define r_remL r26 /* remainder Low */
1390 #define r_remH r27 /* remainder High */
1392 /* return: remainder */
1393 #define r_arg1L r24 /* dividend Low */
1394 #define r_arg1H r25 /* dividend High */
1396 /* return: quotient */
1397 #define r_arg2L r22 /* divisor Low */
1398 #define r_arg2H r23 /* divisor High */
1400 #define r_cnt r21 /* loop count */
1402 #if defined (L_udivmodhi4)
1405 sub r_remH,r_remH ; clear remainder and carry
1406 ldi r_cnt,17 ; init loop counter
1407 rjmp __udivmodhi4_ep ; jump to entry point
1409 rol r_remL ; shift dividend into remainder
1411 cp r_remL,r_arg2L ; compare remainder & divisor
1413 brcs __udivmodhi4_ep ; remainder < divisor
1414 sub r_remL,r_arg2L ; restore remainder
1417 rol r_arg1L ; shift dividend (with CARRY)
1419 dec r_cnt ; decrement loop counter
1420 brne __udivmodhi4_loop
1423 ; div/mod results to return registers, as for the div() function
1424 mov_l r_arg2L, r_arg1L ; quotient
1425 mov_h r_arg2H, r_arg1H
1426 mov_l r_arg1L, r_remL ; remainder
1427 mov_h r_arg1H, r_remH
1430 #endif /* defined (L_udivmodhi4) */
1432 #if defined (L_divmodhi4)
1436 bst r_arg1H,7 ; store sign of dividend
1437 mov __tmp_reg__,r_arg2H
1439 com __tmp_reg__ ; r0.7 is sign of result
1440 rcall __divmodhi4_neg1 ; dividend negative: negate
1443 rcall __divmodhi4_neg2 ; divisor negative: negate
1444 XCALL __udivmodhi4 ; do the unsigned div/mod
1446 rcall __divmodhi4_neg2 ; correct remainder sign
1447 brtc __divmodhi4_exit
1449 ;; correct dividend/remainder sign
1455 ;; correct divisor/result sign
1462 #endif /* defined (L_divmodhi4) */
1475 /*******************************************************
1476 Division 24 / 24 => (result + remainder)
1477 *******************************************************/
1479 ;; A[0..2]: In: Dividend; Out: Quotient
1484 ;; B[0..2]: In: Divisor; Out: Remainder
1489 ;; C[0..2]: Expand remainder
1490 #define C0 __zero_reg__
1497 #if defined (L_udivmodpsi4)
1498 ;; R24:R22 = R24:R24 udiv R20:R18
1499 ;; R20:R18 = R24:R22 umod R20:R18
1500 ;; Clobbers: R21, R25, R26
1505 ; Clear remainder and carry. C0 is already 0
1508 ; jump to entry point
1509 rjmp __udivmodpsi4_start
1511 ; shift dividend into remainder
1515 ; compare remainder & divisor
1519 brcs __udivmodpsi4_start ; remainder <= divisor
1520 sub C0, B0 ; restore remainder
1523 __udivmodpsi4_start:
1524 ; shift dividend (with CARRY)
1528 ; decrement loop counter
1530 brne __udivmodpsi4_loop
1534 ; div/mod results to return registers
1539 clr __zero_reg__ ; C0
1542 #endif /* defined (L_udivmodpsi4) */
1544 #if defined (L_divmodpsi4)
1545 ;; R24:R22 = R24:R22 div R20:R18
1546 ;; R20:R18 = R24:R22 mod R20:R18
1547 ;; Clobbers: T, __tmp_reg__, R21, R25, R26
1550 ; R0.7 will contain the sign of the result:
1551 ; R0.7 = A.sign ^ B.sign
1553 ; T-flag = sign of dividend
1557 ; Adjust dividend's sign
1558 rcall __divmodpsi4_negA
1560 ; Adjust divisor's sign
1562 rcall __divmodpsi4_negB
1564 ; Do the unsigned div/mod
1567 ; Adjust quotient's sign
1569 rcall __divmodpsi4_negA
1571 ; Adjust remainder's sign
1572 brtc __divmodpsi4_end
1575 ; Correct divisor/remainder sign
1583 ; Correct dividend/quotient sign
1594 #endif /* defined (L_divmodpsi4) */
1610 /*******************************************************
1611 Division 32 / 32 => (result + remainder)
1612 *******************************************************/
1613 #define r_remHH r31 /* remainder High */
1616 #define r_remL r26 /* remainder Low */
1618 /* return: remainder */
1619 #define r_arg1HH r25 /* dividend High */
1620 #define r_arg1HL r24
1622 #define r_arg1L r22 /* dividend Low */
1624 /* return: quotient */
1625 #define r_arg2HH r21 /* divisor High */
1626 #define r_arg2HL r20
1628 #define r_arg2L r18 /* divisor Low */
1630 #define r_cnt __zero_reg__ /* loop count (0 after the loop!) */
1632 #if defined (L_udivmodsi4)
1634 ldi r_remL, 33 ; init loop counter
1637 sub r_remH,r_remH ; clear remainder and carry
1638 mov_l r_remHL, r_remL
1639 mov_h r_remHH, r_remH
1640 rjmp __udivmodsi4_ep ; jump to entry point
1642 rol r_remL ; shift dividend into remainder
1646 cp r_remL,r_arg2L ; compare remainder & divisor
1648 cpc r_remHL,r_arg2HL
1649 cpc r_remHH,r_arg2HH
1650 brcs __udivmodsi4_ep ; remainder <= divisor
1651 sub r_remL,r_arg2L ; restore remainder
1653 sbc r_remHL,r_arg2HL
1654 sbc r_remHH,r_arg2HH
1656 rol r_arg1L ; shift dividend (with CARRY)
1660 dec r_cnt ; decrement loop counter
1661 brne __udivmodsi4_loop
1662 ; __zero_reg__ now restored (r_cnt == 0)
1667 ; div/mod results to return registers, as for the ldiv() function
1668 mov_l r_arg2L, r_arg1L ; quotient
1669 mov_h r_arg2H, r_arg1H
1670 mov_l r_arg2HL, r_arg1HL
1671 mov_h r_arg2HH, r_arg1HH
1672 mov_l r_arg1L, r_remL ; remainder
1673 mov_h r_arg1H, r_remH
1674 mov_l r_arg1HL, r_remHL
1675 mov_h r_arg1HH, r_remHH
1678 #endif /* defined (L_udivmodsi4) */
1680 #if defined (L_divmodsi4)
1682 mov __tmp_reg__,r_arg2HH
1683 bst r_arg1HH,7 ; store sign of dividend
1685 com __tmp_reg__ ; r0.7 is sign of result
1686 XCALL __negsi2 ; dividend negative: negate
1689 rcall __divmodsi4_neg2 ; divisor negative: negate
1690 XCALL __udivmodsi4 ; do the unsigned div/mod
1691 sbrc __tmp_reg__, 7 ; correct quotient sign
1692 rcall __divmodsi4_neg2
1693 brtc __divmodsi4_exit ; correct remainder sign
1696 ;; correct divisor/quotient sign
1707 #endif /* defined (L_divmodsi4) */
1709 #if defined (L_negsi2)
1711 ;; (neg:SI (reg:SI 22)))
1712 ;; Sets the V flag for signed overflow tests
1717 #endif /* L_negsi2 */
1733 /* *di routines use registers below R19 and won't work with tiny arch
1736 #if !defined (__AVR_TINY__)
1737 /*******************************************************
1740 *******************************************************/
1742 ;; Use Speed-optimized Version on "big" Devices, i.e. Devices with
1743 ;; at least 16k of Program Memory. For smaller Devices, depend
1744 ;; on MOVW and SP Size. There is a Connexion between SP Size and
1745 ;; Flash Size so that SP Size can be used to test for Flash Size.
1747 #if defined (__AVR_HAVE_JMP_CALL__)
1748 # define SPEED_DIV 8
1749 #elif defined (__AVR_HAVE_MOVW__) && defined (__AVR_HAVE_SPH__)
1750 # define SPEED_DIV 16
1752 # define SPEED_DIV 0
1755 ;; A[0..7]: In: Dividend;
1756 ;; Out: Quotient (T = 0)
1757 ;; Out: Remainder (T = 1)
1767 ;; B[0..7]: In: Divisor; Out: Clobber
1777 ;; C[0..7]: Expand remainder; Out: Remainder (unused)
1787 ;; Holds Signs during Division Routine
1788 #define SS __tmp_reg__
1790 ;; Bit-Counter in Division Routine
1791 #define R_cnt __zero_reg__
1793 ;; Scratch Register for Negation
1796 #if defined (L_udivdi3)
1798 ;; R25:R18 = R24:R18 umod R17:R10
1799 ;; Ordinary ABI-Function
1803 rjmp __udivdi3_umoddi3
1806 ;; R25:R18 = R24:R18 udiv R17:R10
1807 ;; Ordinary ABI-Function
1813 DEFUN __udivdi3_umoddi3
1824 ENDF __udivdi3_umoddi3
1825 #endif /* L_udivdi3 */
1827 #if defined (L_udivmod64)
1829 ;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation
1830 ;; No Registers saved/restored; the Callers will take Care.
1831 ;; Preserves B[] and T-flag
1832 ;; T = 0: Compute Quotient in A[]
1833 ;; T = 1: Compute Remainder in A[] and shift SS one Bit left
1837 ;; Clear Remainder (C6, C7 will follow)
1844 #if SPEED_DIV == 0 || SPEED_DIV == 16
1845 ;; Initialize Loop-Counter
1848 #endif /* SPEED_DIV */
1855 1: ;; Compare shifted Devidend against Divisor
1856 ;; If -- even after Shifting -- it is smaller...
1857 CP A7,B0 $ cpc C0,B1 $ cpc C1,B2 $ cpc C2,B3
1858 cpc C3,B4 $ cpc C4,B5 $ cpc C5,B6 $ cpc C6,B7
1861 ;; ...then we can subtract it. Thus, it is legal to shift left
1862 $ mov C6,C5 $ mov C5,C4 $ mov C4,C3
1863 mov C3,C2 $ mov C2,C1 $ mov C1,C0 $ mov C0,A7
1864 mov A7,A6 $ mov A6,A5 $ mov A5,A4 $ mov A4,A3
1865 mov A3,A2 $ mov A2,A1 $ mov A1,A0 $ clr A0
1871 ;; Shifted 64 Bits: A7 has traveled to C7
1873 ;; Divisor is greater than Dividend. We have:
1876 ;; Thus, we can return immediately
1879 2: ;; Initialze Bit-Counter with Number of Bits still to be performed
1882 ;; Push of A7 is not needed because C7 is still 0
1886 #elif SPEED_DIV == 16
1888 ;; Compare shifted Dividend against Divisor
1896 ;; Divisor is greater than shifted Dividen: We can shift the Dividend
1897 ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk
1898 wmov C2,A6 $ wmov C0,A4
1899 wmov A6,A2 $ wmov A4,A0
1900 wmov A2,C6 $ wmov A0,C4
1902 ;; Set Bit Counter to 32
1906 #error SPEED_DIV = ?
1907 #endif /* SPEED_DIV */
1909 ;; The very Division + Remainder Routine
1911 3: ;; Left-shift Dividend...
1912 lsl A0 $ rol A1 $ rol A2 $ rol A3
1913 rol A4 $ rol A5 $ rol A6 $ rol A7
1915 ;; ...into Remainder
1916 rol C0 $ rol C1 $ rol C2 $ rol C3
1917 rol C4 $ rol C5 $ rol C6 $ rol C7
1919 ;; Compare Remainder and Divisor
1920 CP C0,B0 $ cpc C1,B1 $ cpc C2,B2 $ cpc C3,B3
1921 cpc C4,B4 $ cpc C5,B5 $ cpc C6,B6 $ cpc C7,B7
1925 ;; Divisor fits into Remainder: Subtract it from Remainder...
1926 SUB C0,B0 $ sbc C1,B1 $ sbc C2,B2 $ sbc C3,B3
1927 sbc C4,B4 $ sbc C5,B5 $ sbc C6,B6 $ sbc C7,B7
1929 ;; ...and set according Bit in the upcoming Quotient
1930 ;; The Bit will travel to its final Position
1933 4: ;; This Bit is done
1936 ;; __zero_reg__ is 0 again
1938 ;; T = 0: We are fine with the Quotient in A[]
1939 ;; T = 1: Copy Remainder to A[]
1945 ;; Move the Sign of the Result to SS.7
1951 #endif /* L_udivmod64 */
1954 #if defined (L_divdi3)
1956 ;; R25:R18 = R24:R18 mod R17:R10
1957 ;; Ordinary ABI-Function
1961 rjmp __divdi3_moddi3
1964 ;; R25:R18 = R24:R18 div R17:R10
1965 ;; Ordinary ABI-Function
1971 DEFUN __divdi3_moddi3
1976 ;; Both Signs are 0: the following Complexitiy is not needed
1977 XJMP __udivdi3_umoddi3
1978 #endif /* SPEED_DIV */
1981 ;; Save 12 Registers: Y, 17...8
1983 do_prologue_saves 12
1985 ;; SS.7 will contain the Sign of the Quotient (A.sign * B.sign)
1986 ;; SS.6 will contain the Sign of the Remainder (A.sign)
1989 ;; Adjust Dividend's Sign as needed
1991 ;; Compiling for Speed we know that at least one Sign must be < 0
1992 ;; Thus, if A[] >= 0 then we know B[] < 0
1996 #endif /* SPEED_DIV */
2000 ;; Adjust Divisor's Sign and SS.7 as needed
2007 com B4 $ com B5 $ com B6 $ com B7
2008 $ com B1 $ com B2 $ com B3
2010 $ sbc B1,NN $ sbc B2,NN $ sbc B3,NN
2011 sbc B4,NN $ sbc B5,NN $ sbc B6,NN $ sbc B7,NN
2013 3: ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag)
2016 ;; Adjust Result's Sign
2017 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2022 #endif /* __AVR_HAVE_JMP_CALL__ */
2025 4: ;; Epilogue: Restore 12 Registers and return
2026 do_epilogue_restores 12
2028 ENDF __divdi3_moddi3
2030 #endif /* L_divdi3 */
2036 .section .text.libgcc, "ax", @progbits
2038 #define TT __tmp_reg__
2040 #if defined (L_adddi3)
2042 ;; (plus:DI (reg:DI 18)
2044 ;; Sets the V flag for signed overflow tests
2045 ;; Sets the C flag for unsigned overflow tests
2047 ADD A0,B0 $ adc A1,B1 $ adc A2,B2 $ adc A3,B3
2048 adc A4,B4 $ adc A5,B5 $ adc A6,B6 $ adc A7,B7
2051 #endif /* L_adddi3 */
2053 #if defined (L_adddi3_s8)
2055 ;; (plus:DI (reg:DI 18)
2056 ;; (sign_extend:SI (reg:QI 26))))
2057 ;; Sets the V flag for signed overflow tests
2058 ;; Sets the C flag for unsigned overflow tests provided 0 <= R26 < 128
2063 ADD A0,r26 $ adc A1,TT $ adc A2,TT $ adc A3,TT
2064 adc A4,TT $ adc A5,TT $ adc A6,TT $ adc A7,TT
2067 #endif /* L_adddi3_s8 */
2069 #if defined (L_subdi3)
2071 ;; (minus:DI (reg:DI 18)
2073 ;; Sets the V flag for signed overflow tests
2074 ;; Sets the C flag for unsigned overflow tests
2076 SUB A0,B0 $ sbc A1,B1 $ sbc A2,B2 $ sbc A3,B3
2077 sbc A4,B4 $ sbc A5,B5 $ sbc A6,B6 $ sbc A7,B7
2080 #endif /* L_subdi3 */
2082 #if defined (L_cmpdi2)
2084 ;; (compare (reg:DI 18)
2087 CP A0,B0 $ cpc A1,B1 $ cpc A2,B2 $ cpc A3,B3
2088 cpc A4,B4 $ cpc A5,B5 $ cpc A6,B6 $ cpc A7,B7
2091 #endif /* L_cmpdi2 */
2093 #if defined (L_cmpdi2_s8)
2095 ;; (compare (reg:DI 18)
2096 ;; (sign_extend:SI (reg:QI 26))))
2101 CP A0,r26 $ cpc A1,TT $ cpc A2,TT $ cpc A3,TT
2102 cpc A4,TT $ cpc A5,TT $ cpc A6,TT $ cpc A7,TT
2105 #endif /* L_cmpdi2_s8 */
2107 #if defined (L_negdi2)
2109 ;; (neg:DI (reg:DI 18)))
2110 ;; Sets the V flag for signed overflow tests
2113 com A4 $ com A5 $ com A6 $ com A7
2114 $ com A1 $ com A2 $ com A3
2116 $ sbci A1,-1 $ sbci A2,-1 $ sbci A3,-1
2117 sbci A4,-1 $ sbci A5,-1 $ sbci A6,-1 $ sbci A7,-1
2121 #endif /* L_negdi2 */
2152 #endif /* !defined (__AVR_TINY__) */
2155 .section .text.libgcc.prologue, "ax", @progbits
2157 /**********************************
2158 * This is a prologue subroutine
2159 **********************************/
2160 #if !defined (__AVR_TINY__)
2161 #if defined (L_prologue)
2163 ;; This function does not clobber T-flag; 64-bit division relies on it
2164 DEFUN __prologue_saves__
2183 #if !defined (__AVR_HAVE_SPH__)
2188 #elif defined (__AVR_XMEGA__)
2200 in __tmp_reg__,__SREG__
2203 out __SREG__,__tmp_reg__
2205 #endif /* #SP = 8/16 */
2209 ENDF __prologue_saves__
2210 #endif /* defined (L_prologue) */
2213 * This is an epilogue subroutine
2215 #if defined (L_epilogue)
2217 DEFUN __epilogue_restores__
2235 #if !defined (__AVR_HAVE_SPH__)
2240 #elif defined (__AVR_XMEGA__)
2243 adc r29,__zero_reg__
2250 adc r29,__zero_reg__
2251 in __tmp_reg__,__SREG__
2254 out __SREG__,__tmp_reg__
2258 #endif /* #SP = 8/16 */
2260 ENDF __epilogue_restores__
2261 #endif /* defined (L_epilogue) */
2262 #endif /* !defined (__AVR_TINY__) */
2265 .section .fini9,"ax",@progbits
2271 /* Code from .fini8 ... .fini1 sections inserted by ld script. */
2273 .section .fini0,"ax",@progbits
2277 #endif /* defined (L_exit) */
2285 #endif /* defined (L_cleanup) */
2288 .section .text.libgcc, "ax", @progbits
2291 DEFUN __tablejump2__
2294 #if defined (__AVR_HAVE_EIJMP_EICALL__)
2295 ;; Word address of gs() jumptable entry in R24:Z
2298 #elif defined (__AVR_HAVE_ELPM__)
2299 ;; Word address of jumptable entry in Z
2302 out __RAMPZ__, __tmp_reg__
2305 ;; Read word address from jumptable and jump
2307 #if defined (__AVR_HAVE_ELPMX__)
2308 elpm __tmp_reg__, Z+
2310 mov r30, __tmp_reg__
2311 #ifdef __AVR_HAVE_RAMPD__
2312 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2313 out __RAMPZ__, __zero_reg__
2316 #elif defined (__AVR_HAVE_ELPM__)
2323 #elif defined (__AVR_HAVE_LPMX__)
2326 mov r30, __tmp_reg__
2328 #elif defined (__AVR_TINY__)
2329 wsubi 30, -(__AVR_TINY_PM_BASE_ADDRESS__) ; Add PM offset to Z
2331 ld r31, Z ; Use ld instead of lpm to load Z
2332 mov r30, __tmp_reg__
2343 #endif /* L_tablejump2 */
2345 #if defined(__AVR_TINY__)
2347 .section .init4,"ax",@progbits
2348 .global __do_copy_data
2350 ldi r18, hi8(__data_end)
2351 ldi r26, lo8(__data_start)
2352 ldi r27, hi8(__data_start)
2353 ldi r30, lo8(__data_load_start + __AVR_TINY_PM_BASE_ADDRESS__)
2354 ldi r31, hi8(__data_load_start + __AVR_TINY_PM_BASE_ADDRESS__)
2355 rjmp .L__do_copy_data_start
2356 .L__do_copy_data_loop:
2359 .L__do_copy_data_start:
2360 cpi r26, lo8(__data_end)
2362 brne .L__do_copy_data_loop
2366 .section .init4,"ax",@progbits
2367 DEFUN __do_copy_data
2368 #if defined(__AVR_HAVE_ELPMX__)
2369 ldi r17, hi8(__data_end)
2370 ldi r26, lo8(__data_start)
2371 ldi r27, hi8(__data_start)
2372 ldi r30, lo8(__data_load_start)
2373 ldi r31, hi8(__data_load_start)
2374 ldi r16, hh8(__data_load_start)
2376 rjmp .L__do_copy_data_start
2377 .L__do_copy_data_loop:
2380 .L__do_copy_data_start:
2381 cpi r26, lo8(__data_end)
2383 brne .L__do_copy_data_loop
2384 #elif !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
2385 ldi r17, hi8(__data_end)
2386 ldi r26, lo8(__data_start)
2387 ldi r27, hi8(__data_start)
2388 ldi r30, lo8(__data_load_start)
2389 ldi r31, hi8(__data_load_start)
2390 ldi r16, hh8(__data_load_start - 0x10000)
2391 .L__do_copy_data_carry:
2394 rjmp .L__do_copy_data_start
2395 .L__do_copy_data_loop:
2399 brcs .L__do_copy_data_carry
2400 .L__do_copy_data_start:
2401 cpi r26, lo8(__data_end)
2403 brne .L__do_copy_data_loop
2404 #elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
2405 ldi r17, hi8(__data_end)
2406 ldi r26, lo8(__data_start)
2407 ldi r27, hi8(__data_start)
2408 ldi r30, lo8(__data_load_start)
2409 ldi r31, hi8(__data_load_start)
2410 rjmp .L__do_copy_data_start
2411 .L__do_copy_data_loop:
2412 #if defined (__AVR_HAVE_LPMX__)
2419 .L__do_copy_data_start:
2420 cpi r26, lo8(__data_end)
2422 brne .L__do_copy_data_loop
2423 #endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
2424 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2425 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2426 out __RAMPZ__, __zero_reg__
2427 #endif /* ELPM && RAMPD */
2429 #endif /* L_copy_data */
2430 #endif /* !defined (__AVR_TINY__) */
2432 /* __do_clear_bss is only necessary if there is anything in .bss section. */
2435 .section .init4,"ax",@progbits
2436 DEFUN __do_clear_bss
2437 ldi r18, hi8(__bss_end)
2438 ldi r26, lo8(__bss_start)
2439 ldi r27, hi8(__bss_start)
2440 rjmp .do_clear_bss_start
2443 .do_clear_bss_start:
2444 cpi r26, lo8(__bss_end)
2446 brne .do_clear_bss_loop
2448 #endif /* L_clear_bss */
2450 /* __do_global_ctors and __do_global_dtors are only necessary
2451 if there are any constructors/destructors. */
2453 #if defined(__AVR_TINY__)
2454 #define cdtors_tst_reg r18
2456 #define cdtors_tst_reg r17
2460 .section .init6,"ax",@progbits
2461 DEFUN __do_global_ctors
2462 ldi cdtors_tst_reg, pm_hi8(__ctors_start)
2463 ldi r28, pm_lo8(__ctors_end)
2464 ldi r29, pm_hi8(__ctors_end)
2465 #ifdef __AVR_HAVE_EIJMP_EICALL__
2466 ldi r16, pm_hh8(__ctors_end)
2467 #endif /* HAVE_EIJMP */
2468 rjmp .L__do_global_ctors_start
2469 .L__do_global_ctors_loop:
2471 #ifdef __AVR_HAVE_EIJMP_EICALL__
2472 sbc r16, __zero_reg__
2474 #endif /* HAVE_EIJMP */
2477 XCALL __tablejump2__
2478 .L__do_global_ctors_start:
2479 cpi r28, pm_lo8(__ctors_start)
2480 cpc r29, cdtors_tst_reg
2481 #ifdef __AVR_HAVE_EIJMP_EICALL__
2482 ldi r24, pm_hh8(__ctors_start)
2484 #endif /* HAVE_EIJMP */
2485 brne .L__do_global_ctors_loop
2486 ENDF __do_global_ctors
2487 #endif /* L_ctors */
2490 .section .fini6,"ax",@progbits
2491 DEFUN __do_global_dtors
2492 ldi cdtors_tst_reg, pm_hi8(__dtors_end)
2493 ldi r28, pm_lo8(__dtors_start)
2494 ldi r29, pm_hi8(__dtors_start)
2495 #ifdef __AVR_HAVE_EIJMP_EICALL__
2496 ldi r16, pm_hh8(__dtors_start)
2497 #endif /* HAVE_EIJMP */
2498 rjmp .L__do_global_dtors_start
2499 .L__do_global_dtors_loop:
2500 #ifdef __AVR_HAVE_EIJMP_EICALL__
2502 #endif /* HAVE_EIJMP */
2505 XCALL __tablejump2__
2507 #ifdef __AVR_HAVE_EIJMP_EICALL__
2508 adc r16, __zero_reg__
2509 #endif /* HAVE_EIJMP */
2510 .L__do_global_dtors_start:
2511 cpi r28, pm_lo8(__dtors_end)
2512 cpc r29, cdtors_tst_reg
2513 #ifdef __AVR_HAVE_EIJMP_EICALL__
2514 ldi r24, pm_hh8(__dtors_end)
2516 #endif /* HAVE_EIJMP */
2517 brne .L__do_global_dtors_loop
2518 ENDF __do_global_dtors
2519 #endif /* L_dtors */
2521 #undef cdtors_tst_reg
2523 .section .text.libgcc, "ax", @progbits
2525 #if !defined (__AVR_TINY__)
2526 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2527 ;; Loading n bytes from Flash; n = 3,4
2528 ;; R22... = Flash[Z]
2529 ;; Clobbers: __tmp_reg__
2531 #if (defined (L_load_3) \
2532 || defined (L_load_4)) \
2533 && !defined (__AVR_HAVE_LPMX__)
2541 .macro .load dest, n
2544 .if \dest != D0+\n-1
2551 #if defined (L_load_3)
2558 #endif /* L_load_3 */
2560 #if defined (L_load_4)
2568 #endif /* L_load_4 */
2570 #endif /* L_load_3 || L_load_3 */
2571 #endif /* !defined (__AVR_TINY__) */
2573 #if !defined (__AVR_TINY__)
2574 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2575 ;; Loading n bytes from Flash or RAM; n = 1,2,3,4
2576 ;; R22... = Flash[R21:Z] or RAM[Z] depending on R21.7
2577 ;; Clobbers: __tmp_reg__, R21, R30, R31
2579 #if (defined (L_xload_1) \
2580 || defined (L_xload_2) \
2581 || defined (L_xload_3) \
2582 || defined (L_xload_4))
2590 ;; Register containing bits 16+ of the address
2594 .macro .xload dest, n
2595 #if defined (__AVR_HAVE_ELPMX__)
2597 #elif defined (__AVR_HAVE_ELPM__)
2600 .if \dest != D0+\n-1
2602 adc HHI8, __zero_reg__
2605 #elif defined (__AVR_HAVE_LPMX__)
2610 .if \dest != D0+\n-1
2614 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2615 .if \dest == D0+\n-1
2616 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2617 out __RAMPZ__, __zero_reg__
2622 #if defined (L_xload_1)
2624 #if defined (__AVR_HAVE_LPMX__) && !defined (__AVR_HAVE_ELPM__)
2633 #if defined (__AVR_HAVE_ELPM__)
2635 #endif /* __AVR_HAVE_ELPM__ */
2640 #endif /* LPMx && ! ELPM */
2642 #endif /* L_xload_1 */
2644 #if defined (L_xload_2)
2648 #if defined (__AVR_HAVE_ELPM__)
2650 #endif /* __AVR_HAVE_ELPM__ */
2658 #endif /* L_xload_2 */
2660 #if defined (L_xload_3)
2664 #if defined (__AVR_HAVE_ELPM__)
2666 #endif /* __AVR_HAVE_ELPM__ */
2676 #endif /* L_xload_3 */
2678 #if defined (L_xload_4)
2682 #if defined (__AVR_HAVE_ELPM__)
2684 #endif /* __AVR_HAVE_ELPM__ */
2696 #endif /* L_xload_4 */
2698 #endif /* L_xload_{1|2|3|4} */
2699 #endif /* if !defined (__AVR_TINY__) */
2701 #if !defined (__AVR_TINY__)
2702 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2703 ;; memcopy from Address Space __pgmx to RAM
2704 ;; R23:Z = Source Address
2705 ;; X = Destination Address
2706 ;; Clobbers: __tmp_reg__, R23, R24, R25, X, Z
2708 #if defined (L_movmemx)
2714 ;; #Bytes to copy fity in 8 Bits (1..255)
2715 ;; Zero-extend Loop Counter
2728 #if defined (__AVR_HAVE_ELPM__)
2732 0: ;; Load 1 Byte from Flash...
2734 #if defined (__AVR_HAVE_ELPMX__)
2736 #elif defined (__AVR_HAVE_ELPM__)
2739 adc HHI8, __zero_reg__
2741 #elif defined (__AVR_HAVE_LPMX__)
2748 ;; ...and store that Byte to RAM Destination
2752 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2753 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2754 out __RAMPZ__, __zero_reg__
2755 #endif /* ELPM && RAMPD */
2760 1: ;; Read 1 Byte from RAM...
2762 ;; and store that Byte to RAM Destination
2772 #endif /* L_movmemx */
2773 #endif /* !defined (__AVR_TINY__) */
2776 .section .text.libgcc.builtins, "ax", @progbits
2778 /**********************************
2779 * Find first set Bit (ffs)
2780 **********************************/
2782 #if defined (L_ffssi2)
2783 ;; find first set bit
2784 ;; r25:r24 = ffs32 (r25:r22)
2785 ;; clobbers: r22, r26
2803 #endif /* defined (L_ffssi2) */
2805 #if defined (L_ffshi2)
2806 ;; find first set bit
2807 ;; r25:r24 = ffs16 (r25:r24)
2811 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2812 ;; Some cores have problem skipping 2-word instruction
2816 cpse r24, __zero_reg__
2817 #endif /* __AVR_HAVE_JMP_CALL__ */
2818 1: XJMP __loop_ffsqi2
2824 #endif /* defined (L_ffshi2) */
2826 #if defined (L_loop_ffsqi2)
2827 ;; Helper for ffshi2, ffssi2
2828 ;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
2839 #endif /* defined (L_loop_ffsqi2) */
2842 /**********************************
2843 * Count trailing Zeros (ctz)
2844 **********************************/
2846 #if defined (L_ctzsi2)
2847 ;; count trailing zeros
2848 ;; r25:r24 = ctz32 (r25:r22)
2849 ;; clobbers: r26, r22
2851 ;; Note that ctz(0) in undefined for GCC
2857 #endif /* defined (L_ctzsi2) */
2859 #if defined (L_ctzhi2)
2860 ;; count trailing zeros
2861 ;; r25:r24 = ctz16 (r25:r24)
2864 ;; Note that ctz(0) in undefined for GCC
2870 #endif /* defined (L_ctzhi2) */
2873 /**********************************
2874 * Count leading Zeros (clz)
2875 **********************************/
2877 #if defined (L_clzdi2)
2878 ;; count leading zeros
2879 ;; r25:r24 = clz64 (r25:r18)
2880 ;; clobbers: r22, r23, r26
2893 #endif /* defined (L_clzdi2) */
2895 #if defined (L_clzsi2)
2896 ;; count leading zeros
2897 ;; r25:r24 = clz32 (r25:r22)
2909 #endif /* defined (L_clzsi2) */
2911 #if defined (L_clzhi2)
2912 ;; count leading zeros
2913 ;; r25:r24 = clz16 (r25:r24)
2935 #endif /* defined (L_clzhi2) */
2938 /**********************************
2940 **********************************/
2942 #if defined (L_paritydi2)
2943 ;; r25:r24 = parity64 (r25:r18)
2944 ;; clobbers: __tmp_reg__
2952 #endif /* defined (L_paritydi2) */
2954 #if defined (L_paritysi2)
2955 ;; r25:r24 = parity32 (r25:r22)
2956 ;; clobbers: __tmp_reg__
2962 #endif /* defined (L_paritysi2) */
2964 #if defined (L_parityhi2)
2965 ;; r25:r24 = parity16 (r25:r24)
2966 ;; clobbers: __tmp_reg__
2972 ;; r25:r24 = parity8 (r24)
2973 ;; clobbers: __tmp_reg__
2975 ;; parity is in r24[0..7]
2976 mov __tmp_reg__, r24
2978 eor r24, __tmp_reg__
2979 ;; parity is in r24[0..3]
2983 ;; parity is in r24[0,3]
2986 ;; parity is in r24[0]
2991 #endif /* defined (L_parityhi2) */
2994 /**********************************
2996 **********************************/
2998 #if defined (L_popcounthi2)
3000 ;; r25:r24 = popcount16 (r25:r24)
3001 ;; clobbers: __tmp_reg__
3011 DEFUN __popcounthi2_tail
3013 add r24, __tmp_reg__
3015 ENDF __popcounthi2_tail
3016 #endif /* defined (L_popcounthi2) */
3018 #if defined (L_popcountsi2)
3020 ;; r25:r24 = popcount32 (r25:r22)
3021 ;; clobbers: __tmp_reg__
3028 XJMP __popcounthi2_tail
3030 #endif /* defined (L_popcountsi2) */
3032 #if defined (L_popcountdi2)
3034 ;; r25:r24 = popcount64 (r25:r18)
3035 ;; clobbers: r22, r23, __tmp_reg__
3044 XJMP __popcounthi2_tail
3046 #endif /* defined (L_popcountdi2) */
3048 #if defined (L_popcountqi2)
3050 ;; r24 = popcount8 (r24)
3051 ;; clobbers: __tmp_reg__
3053 mov __tmp_reg__, r24
3057 adc r24, __zero_reg__
3059 adc r24, __zero_reg__
3061 adc r24, __zero_reg__
3063 adc r24, __zero_reg__
3065 adc r24, __zero_reg__
3067 adc r24, __tmp_reg__
3070 #endif /* defined (L_popcountqi2) */
3073 /**********************************
3075 **********************************/
3077 ;; swap two registers with different register number
3084 #if defined (L_bswapsi2)
3086 ;; r25:r22 = bswap32 (r25:r22)
3092 #endif /* defined (L_bswapsi2) */
3094 #if defined (L_bswapdi2)
3096 ;; r25:r18 = bswap64 (r25:r18)
3104 #endif /* defined (L_bswapdi2) */
3107 /**********************************
3109 **********************************/
3111 #if defined (L_ashrdi3)
3113 #define SS __zero_reg__
3115 ;; Arithmetic shift right
3116 ;; r25:r18 = ashr64 (r25:r18, r17:r16)
3123 ;; Logic shift right
3124 ;; r25:r18 = lshr64 (r25:r18, r17:r16)
3126 ;; Signs are in SS (zero_reg)
3127 mov __tmp_reg__, r16
3152 mov r16, __tmp_reg__
3158 #endif /* defined (L_ashrdi3) */
3160 #if defined (L_ashldi3)
3162 ;; r25:r18 = ashl64 (r25:r18, r17:r16)
3163 ;; This function does not clobber T.
3165 mov __tmp_reg__, r16
3188 mov r16, __tmp_reg__
3191 #endif /* defined (L_ashldi3) */
3193 #if defined (L_rotldi3)
3195 ;; r25:r18 = rotl64 (r25:r18, r17:r16)
3201 mov __tmp_reg__, r25
3209 mov r18, __tmp_reg__
3219 adc r18, __zero_reg__
3225 #endif /* defined (L_rotldi3) */
3228 .section .text.libgcc.fmul, "ax", @progbits
3230 /***********************************************************/
3231 ;;; Softmul versions of FMUL, FMULS and FMULSU to implement
3232 ;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
3233 /***********************************************************/
3239 #define A0 __tmp_reg__
3242 ;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
3243 ;;; Clobbers: r24, r25, __tmp_reg__
3245 ;; A0.7 = negate result?
3253 #endif /* L_fmuls */
3256 ;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
3257 ;;; Clobbers: r24, r25, __tmp_reg__
3259 ;; A0.7 = negate result?
3264 ;; Helper for __fmuls and __fmulsu
3269 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
3270 ;; Some cores have problem skipping 2-word instruction
3275 #endif /* __AVR_HAVE_JMP_CALL__ */
3278 ;; C = -C iff A0.7 = 1
3282 #endif /* L_fmulsu */
3286 ;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
3287 ;;; Clobbers: r24, r25, __tmp_reg__
3294 ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
3315 #include "lib1funcs-fixed.S"