]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgcc/config/avr/lib1funcs.S
re PR target/52507 ([avr]: movmem loop for __memx address space uses wrong loop label)
[thirdparty/gcc.git] / libgcc / config / avr / lib1funcs.S
1 /* -*- Mode: Asm -*- */
2 /* Copyright (C) 1998, 1999, 2000, 2007, 2008, 2009
3 Free Software Foundation, Inc.
4 Contributed by Denis Chertykov <chertykov@gmail.com>
5
6 This file is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 This file is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
15
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
19
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>. */
24
25 #define __zero_reg__ r1
26 #define __tmp_reg__ r0
27 #define __SREG__ 0x3f
28 #define __SP_H__ 0x3e
29 #define __SP_L__ 0x3d
30 #define __RAMPZ__ 0x3B
31 #define __EIND__ 0x3C
32
33 /* Most of the functions here are called directly from avr.md
34 patterns, instead of using the standard libcall mechanisms.
35 This can make better code because GCC knows exactly which
36 of the call-used registers (not all of them) are clobbered. */
37
38 /* FIXME: At present, there is no SORT directive in the linker
39 script so that we must not assume that different modules
40 in the same input section like .libgcc.text.mul will be
41 located close together. Therefore, we cannot use
42 RCALL/RJMP to call a function like __udivmodhi4 from
43 __divmodhi4 and have to use lengthy XCALL/XJMP even
44 though they are in the same input section and all same
45 input sections together are small enough to reach every
46 location with a RCALL/RJMP instruction. */
47
48 .macro mov_l r_dest, r_src
49 #if defined (__AVR_HAVE_MOVW__)
50 movw \r_dest, \r_src
51 #else
52 mov \r_dest, \r_src
53 #endif
54 .endm
55
56 .macro mov_h r_dest, r_src
57 #if defined (__AVR_HAVE_MOVW__)
58 ; empty
59 #else
60 mov \r_dest, \r_src
61 #endif
62 .endm
63
64 .macro wmov r_dest, r_src
65 #if defined (__AVR_HAVE_MOVW__)
66 movw \r_dest, \r_src
67 #else
68 mov \r_dest, \r_src
69 mov \r_dest+1, \r_src+1
70 #endif
71 .endm
72
73 #if defined (__AVR_HAVE_JMP_CALL__)
74 #define XCALL call
75 #define XJMP jmp
76 #else
77 #define XCALL rcall
78 #define XJMP rjmp
79 #endif
80
81 .macro DEFUN name
82 .global \name
83 .func \name
84 \name:
85 .endm
86
87 .macro ENDF name
88 .size \name, .-\name
89 .endfunc
90 .endm
91
92 \f
93 .section .text.libgcc.mul, "ax", @progbits
94
95 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
96 /* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */
97 #if !defined (__AVR_HAVE_MUL__)
98 /*******************************************************
99 Multiplication 8 x 8 without MUL
100 *******************************************************/
101 #if defined (L_mulqi3)
102
103 #define r_arg2 r22 /* multiplicand */
104 #define r_arg1 r24 /* multiplier */
105 #define r_res __tmp_reg__ /* result */
106
107 DEFUN __mulqi3
108 clr r_res ; clear result
109 __mulqi3_loop:
110 sbrc r_arg1,0
111 add r_res,r_arg2
112 add r_arg2,r_arg2 ; shift multiplicand
113 breq __mulqi3_exit ; while multiplicand != 0
114 lsr r_arg1 ;
115 brne __mulqi3_loop ; exit if multiplier = 0
116 __mulqi3_exit:
117 mov r_arg1,r_res ; result to return register
118 ret
119 ENDF __mulqi3
120
121 #undef r_arg2
122 #undef r_arg1
123 #undef r_res
124
125 #endif /* defined (L_mulqi3) */
126
127 #if defined (L_mulqihi3)
128 DEFUN __mulqihi3
129 clr r25
130 sbrc r24, 7
131 dec r25
132 clr r23
133 sbrc r22, 7
134 dec r22
135 XJMP __mulhi3
136 ENDF __mulqihi3:
137 #endif /* defined (L_mulqihi3) */
138
139 #if defined (L_umulqihi3)
140 DEFUN __umulqihi3
141 clr r25
142 clr r23
143 XJMP __mulhi3
144 ENDF __umulqihi3
145 #endif /* defined (L_umulqihi3) */
146
147 /*******************************************************
148 Multiplication 16 x 16 without MUL
149 *******************************************************/
150 #if defined (L_mulhi3)
151 #define r_arg1L r24 /* multiplier Low */
152 #define r_arg1H r25 /* multiplier High */
153 #define r_arg2L r22 /* multiplicand Low */
154 #define r_arg2H r23 /* multiplicand High */
155 #define r_resL __tmp_reg__ /* result Low */
156 #define r_resH r21 /* result High */
157
158 DEFUN __mulhi3
159 clr r_resH ; clear result
160 clr r_resL ; clear result
161 __mulhi3_loop:
162 sbrs r_arg1L,0
163 rjmp __mulhi3_skip1
164 add r_resL,r_arg2L ; result + multiplicand
165 adc r_resH,r_arg2H
166 __mulhi3_skip1:
167 add r_arg2L,r_arg2L ; shift multiplicand
168 adc r_arg2H,r_arg2H
169
170 cp r_arg2L,__zero_reg__
171 cpc r_arg2H,__zero_reg__
172 breq __mulhi3_exit ; while multiplicand != 0
173
174 lsr r_arg1H ; gets LSB of multiplier
175 ror r_arg1L
176 sbiw r_arg1L,0
177 brne __mulhi3_loop ; exit if multiplier = 0
178 __mulhi3_exit:
179 mov r_arg1H,r_resH ; result to return register
180 mov r_arg1L,r_resL
181 ret
182 ENDF __mulhi3
183
184 #undef r_arg1L
185 #undef r_arg1H
186 #undef r_arg2L
187 #undef r_arg2H
188 #undef r_resL
189 #undef r_resH
190
191 #endif /* defined (L_mulhi3) */
192
193 /*******************************************************
194 Widening Multiplication 32 = 16 x 16 without MUL
195 *******************************************************/
196
197 #if defined (L_mulhisi3)
198 DEFUN __mulhisi3
199 ;;; FIXME: This is dead code (noone calls it)
200 mov_l r18, r24
201 mov_h r19, r25
202 clr r24
203 sbrc r23, 7
204 dec r24
205 mov r25, r24
206 clr r20
207 sbrc r19, 7
208 dec r20
209 mov r21, r20
210 XJMP __mulsi3
211 ENDF __mulhisi3
212 #endif /* defined (L_mulhisi3) */
213
214 #if defined (L_umulhisi3)
215 DEFUN __umulhisi3
216 ;;; FIXME: This is dead code (noone calls it)
217 mov_l r18, r24
218 mov_h r19, r25
219 clr r24
220 clr r25
221 mov_l r20, r24
222 mov_h r21, r25
223 XJMP __mulsi3
224 ENDF __umulhisi3
225 #endif /* defined (L_umulhisi3) */
226
227 #if defined (L_mulsi3)
228 /*******************************************************
229 Multiplication 32 x 32 without MUL
230 *******************************************************/
231 #define r_arg1L r22 /* multiplier Low */
232 #define r_arg1H r23
233 #define r_arg1HL r24
234 #define r_arg1HH r25 /* multiplier High */
235
236 #define r_arg2L r18 /* multiplicand Low */
237 #define r_arg2H r19
238 #define r_arg2HL r20
239 #define r_arg2HH r21 /* multiplicand High */
240
241 #define r_resL r26 /* result Low */
242 #define r_resH r27
243 #define r_resHL r30
244 #define r_resHH r31 /* result High */
245
246 DEFUN __mulsi3
247 clr r_resHH ; clear result
248 clr r_resHL ; clear result
249 clr r_resH ; clear result
250 clr r_resL ; clear result
251 __mulsi3_loop:
252 sbrs r_arg1L,0
253 rjmp __mulsi3_skip1
254 add r_resL,r_arg2L ; result + multiplicand
255 adc r_resH,r_arg2H
256 adc r_resHL,r_arg2HL
257 adc r_resHH,r_arg2HH
258 __mulsi3_skip1:
259 add r_arg2L,r_arg2L ; shift multiplicand
260 adc r_arg2H,r_arg2H
261 adc r_arg2HL,r_arg2HL
262 adc r_arg2HH,r_arg2HH
263
264 lsr r_arg1HH ; gets LSB of multiplier
265 ror r_arg1HL
266 ror r_arg1H
267 ror r_arg1L
268 brne __mulsi3_loop
269 sbiw r_arg1HL,0
270 cpc r_arg1H,r_arg1L
271 brne __mulsi3_loop ; exit if multiplier = 0
272 __mulsi3_exit:
273 mov_h r_arg1HH,r_resHH ; result to return register
274 mov_l r_arg1HL,r_resHL
275 mov_h r_arg1H,r_resH
276 mov_l r_arg1L,r_resL
277 ret
278 ENDF __mulsi3
279
280 #undef r_arg1L
281 #undef r_arg1H
282 #undef r_arg1HL
283 #undef r_arg1HH
284
285 #undef r_arg2L
286 #undef r_arg2H
287 #undef r_arg2HL
288 #undef r_arg2HH
289
290 #undef r_resL
291 #undef r_resH
292 #undef r_resHL
293 #undef r_resHH
294
295 #endif /* defined (L_mulsi3) */
296
297 #endif /* !defined (__AVR_HAVE_MUL__) */
298 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
299 \f
300 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
301 #if defined (__AVR_HAVE_MUL__)
302 #define A0 26
303 #define B0 18
304 #define C0 22
305
306 #define A1 A0+1
307
308 #define B1 B0+1
309 #define B2 B0+2
310 #define B3 B0+3
311
312 #define C1 C0+1
313 #define C2 C0+2
314 #define C3 C0+3
315
316 /*******************************************************
317 Widening Multiplication 32 = 16 x 16
318 *******************************************************/
319
320 #if defined (L_mulhisi3)
321 ;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
322 ;;; C3:C0 = (signed long) A1:A0 * (signed long) B1:B0
323 ;;; Clobbers: __tmp_reg__
324 DEFUN __mulhisi3
325 XCALL __umulhisi3
326 ;; Sign-extend B
327 tst B1
328 brpl 1f
329 sub C2, A0
330 sbc C3, A1
331 1: ;; Sign-extend A
332 XJMP __usmulhisi3_tail
333 ENDF __mulhisi3
334 #endif /* L_mulhisi3 */
335
336 #if defined (L_usmulhisi3)
337 ;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
338 ;;; C3:C0 = (signed long) A1:A0 * (unsigned long) B1:B0
339 ;;; Clobbers: __tmp_reg__
340 DEFUN __usmulhisi3
341 XCALL __umulhisi3
342 ;; FALLTHRU
343 ENDF __usmulhisi3
344
345 DEFUN __usmulhisi3_tail
346 ;; Sign-extend A
347 sbrs A1, 7
348 ret
349 sub C2, B0
350 sbc C3, B1
351 ret
352 ENDF __usmulhisi3_tail
353 #endif /* L_usmulhisi3 */
354
355 #if defined (L_umulhisi3)
356 ;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
357 ;;; C3:C0 = (unsigned long) A1:A0 * (unsigned long) B1:B0
358 ;;; Clobbers: __tmp_reg__
359 DEFUN __umulhisi3
360 mul A0, B0
361 movw C0, r0
362 mul A1, B1
363 movw C2, r0
364 mul A0, B1
365 rcall 1f
366 mul A1, B0
367 1: add C1, r0
368 adc C2, r1
369 clr __zero_reg__
370 adc C3, __zero_reg__
371 ret
372 ENDF __umulhisi3
373 #endif /* L_umulhisi3 */
374
375 /*******************************************************
376 Widening Multiplication 32 = 16 x 32
377 *******************************************************/
378
379 #if defined (L_mulshisi3)
380 ;;; R25:R22 = (signed long) R27:R26 * R21:R18
381 ;;; (C3:C0) = (signed long) A1:A0 * B3:B0
382 ;;; Clobbers: __tmp_reg__
383 DEFUN __mulshisi3
384 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
385 ;; Some cores have problem skipping 2-word instruction
386 tst A1
387 brmi __mulohisi3
388 #else
389 sbrs A1, 7
390 #endif /* __AVR_HAVE_JMP_CALL__ */
391 XJMP __muluhisi3
392 ;; FALLTHRU
393 ENDF __mulshisi3
394
395 ;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
396 ;;; (C3:C0) = (one-extended long) A1:A0 * B3:B0
397 ;;; Clobbers: __tmp_reg__
398 DEFUN __mulohisi3
399 XCALL __muluhisi3
400 ;; One-extend R27:R26 (A1:A0)
401 sub C2, B0
402 sbc C3, B1
403 ret
404 ENDF __mulohisi3
405 #endif /* L_mulshisi3 */
406
407 #if defined (L_muluhisi3)
408 ;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
409 ;;; (C3:C0) = (unsigned long) A1:A0 * B3:B0
410 ;;; Clobbers: __tmp_reg__
411 DEFUN __muluhisi3
412 XCALL __umulhisi3
413 mul A0, B3
414 add C3, r0
415 mul A1, B2
416 add C3, r0
417 mul A0, B2
418 add C2, r0
419 adc C3, r1
420 clr __zero_reg__
421 ret
422 ENDF __muluhisi3
423 #endif /* L_muluhisi3 */
424
425 /*******************************************************
426 Multiplication 32 x 32
427 *******************************************************/
428
429 #if defined (L_mulsi3)
430 ;;; R25:R22 = R25:R22 * R21:R18
431 ;;; (C3:C0) = C3:C0 * B3:B0
432 ;;; Clobbers: R26, R27, __tmp_reg__
433 DEFUN __mulsi3
434 movw A0, C0
435 push C2
436 push C3
437 XCALL __muluhisi3
438 pop A1
439 pop A0
440 ;; A1:A0 now contains the high word of A
441 mul A0, B0
442 add C2, r0
443 adc C3, r1
444 mul A0, B1
445 add C3, r0
446 mul A1, B0
447 add C3, r0
448 clr __zero_reg__
449 ret
450 ENDF __mulsi3
451 #endif /* L_mulsi3 */
452
453 #undef A0
454 #undef A1
455
456 #undef B0
457 #undef B1
458 #undef B2
459 #undef B3
460
461 #undef C0
462 #undef C1
463 #undef C2
464 #undef C3
465
466 #endif /* __AVR_HAVE_MUL__ */
467
468 /*******************************************************
469 Multiplication 24 x 24
470 *******************************************************/
471
472 #if defined (L_mulpsi3)
473
474 ;; A[0..2]: In: Multiplicand; Out: Product
475 #define A0 22
476 #define A1 A0+1
477 #define A2 A0+2
478
479 ;; B[0..2]: In: Multiplier
480 #define B0 18
481 #define B1 B0+1
482 #define B2 B0+2
483
484 #if defined (__AVR_HAVE_MUL__)
485
486 ;; C[0..2]: Expand Result
487 #define C0 22
488 #define C1 C0+1
489 #define C2 C0+2
490
491 ;; R24:R22 *= R20:R18
492 ;; Clobbers: r21, r25, r26, r27, __tmp_reg__
493
494 #define AA0 26
495 #define AA2 21
496
497 DEFUN __mulpsi3
498 wmov AA0, A0
499 mov AA2, A2
500 XCALL __umulhisi3
501 mul AA2, B0 $ add C2, r0
502 mul AA0, B2 $ add C2, r0
503 clr __zero_reg__
504 ret
505 ENDF __mulpsi3
506
507 #undef AA2
508 #undef AA0
509
510 #undef C2
511 #undef C1
512 #undef C0
513
514 #else /* !HAVE_MUL */
515
516 ;; C[0..2]: Expand Result
517 #define C0 0
518 #define C1 C0+1
519 #define C2 21
520
521 ;; R24:R22 *= R20:R18
522 ;; Clobbers: __tmp_reg__, R18, R19, R20, R21
523
524 DEFUN __mulpsi3
525
526 ;; C[] = 0
527 clr __tmp_reg__
528 clr C2
529
530 0: ;; Shift N-th Bit of B[] into Carry. N = 24 - Loop
531 LSR B2 $ ror B1 $ ror B0
532
533 ;; If the N-th Bit of B[] was set...
534 brcc 1f
535
536 ;; ...then add A[] * 2^N to the Result C[]
537 ADD C0,A0 $ adc C1,A1 $ adc C2,A2
538
539 1: ;; Multiply A[] by 2
540 LSL A0 $ rol A1 $ rol A2
541
542 ;; Loop until B[] is 0
543 subi B0,0 $ sbci B1,0 $ sbci B2,0
544 brne 0b
545
546 ;; Copy C[] to the return Register A[]
547 wmov A0, C0
548 mov A2, C2
549
550 clr __zero_reg__
551 ret
552 ENDF __mulpsi3
553
554 #undef C2
555 #undef C1
556 #undef C0
557
558 #endif /* HAVE_MUL */
559
560 #undef B2
561 #undef B1
562 #undef B0
563
564 #undef A2
565 #undef A1
566 #undef A0
567
568 #endif /* L_mulpsi3 */
569
570 #if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__)
571
572 ;; A[0..2]: In: Multiplicand
573 #define A0 22
574 #define A1 A0+1
575 #define A2 A0+2
576
577 ;; BB: In: Multiplier
578 #define BB 25
579
580 ;; C[0..2]: Result
581 #define C0 18
582 #define C1 C0+1
583 #define C2 C0+2
584
585 ;; C[] = A[] * sign_extend (BB)
586 DEFUN __mulsqipsi3
587 mul A0, BB
588 movw C0, r0
589 mul A2, BB
590 mov C2, r0
591 mul A1, BB
592 add C1, r0
593 adc C2, r1
594 clr __zero_reg__
595 sbrs BB, 7
596 ret
597 ;; One-extend BB
598 sub C1, A0
599 sbc C2, A1
600 ret
601 ENDF __mulsqipsi3
602
603 #undef C2
604 #undef C1
605 #undef C0
606
607 #undef BB
608
609 #undef A2
610 #undef A1
611 #undef A0
612
613 #endif /* L_mulsqipsi3 && HAVE_MUL */
614
615 /*******************************************************
616 Multiplication 64 x 64
617 *******************************************************/
618
619 #if defined (L_muldi3)
620
621 ;; A[] = A[] * B[]
622
623 ;; A[0..7]: In: Multiplicand
624 ;; Out: Product
625 #define A0 18
626 #define A1 A0+1
627 #define A2 A0+2
628 #define A3 A0+3
629 #define A4 A0+4
630 #define A5 A0+5
631 #define A6 A0+6
632 #define A7 A0+7
633
634 ;; B[0..7]: In: Multiplier
635 #define B0 10
636 #define B1 B0+1
637 #define B2 B0+2
638 #define B3 B0+3
639 #define B4 B0+4
640 #define B5 B0+5
641 #define B6 B0+6
642 #define B7 B0+7
643
644 #if defined (__AVR_HAVE_MUL__)
645
646 ;; Define C[] for convenience
647 ;; Notice that parts of C[] overlap A[] respective B[]
648 #define C0 16
649 #define C1 C0+1
650 #define C2 20
651 #define C3 C2+1
652 #define C4 28
653 #define C5 C4+1
654 #define C6 C4+2
655 #define C7 C4+3
656
657 ;; A[] *= B[]
658 ;; R25:R18 *= R17:R10
659 ;; Ordinary ABI-Function
660
661 DEFUN __muldi3
662 push r29
663 push r28
664 push r17
665 push r16
666
667 ;; Counting in Words, we have to perform a 4 * 4 Multiplication
668
669 ;; 3 * 0 + 0 * 3
670 mul A7,B0 $ $ mov C7,r0
671 mul A0,B7 $ $ add C7,r0
672 mul A6,B1 $ $ add C7,r0
673 mul A6,B0 $ mov C6,r0 $ add C7,r1
674 mul B6,A1 $ $ add C7,r0
675 mul B6,A0 $ add C6,r0 $ adc C7,r1
676
677 ;; 1 * 2
678 mul A2,B4 $ add C6,r0 $ adc C7,r1
679 mul A3,B4 $ $ add C7,r0
680 mul A2,B5 $ $ add C7,r0
681
682 push A5
683 push A4
684 push B1
685 push B0
686 push A3
687 push A2
688
689 ;; 0 * 0
690 wmov 26, B0
691 XCALL __umulhisi3
692 wmov C0, 22
693 wmov C2, 24
694
695 ;; 0 * 2
696 wmov 26, B4
697 XCALL __umulhisi3 $ wmov C4,22 $ add C6,24 $ adc C7,25
698
699 wmov 26, B2
700 ;; 0 * 1
701 rcall __muldi3_6
702
703 pop A0
704 pop A1
705 ;; 1 * 1
706 wmov 26, B2
707 XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
708
709 pop r26
710 pop r27
711 ;; 1 * 0
712 rcall __muldi3_6
713
714 pop A0
715 pop A1
716 ;; 2 * 0
717 XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
718
719 ;; 2 * 1
720 wmov 26, B2
721 XCALL __umulhisi3 $ $ $ add C6,22 $ adc C7,23
722
723 ;; A[] = C[]
724 wmov A0, C0
725 ;; A2 = C2 already
726 wmov A4, C4
727 wmov A6, C6
728
729 clr __zero_reg__
730 pop r16
731 pop r17
732 pop r28
733 pop r29
734 ret
735
736 __muldi3_6:
737 XCALL __umulhisi3
738 add C2, 22
739 adc C3, 23
740 adc C4, 24
741 adc C5, 25
742 brcc 0f
743 adiw C6, 1
744 0: ret
745 ENDF __muldi3
746
747 #undef C7
748 #undef C6
749 #undef C5
750 #undef C4
751 #undef C3
752 #undef C2
753 #undef C1
754 #undef C0
755
756 #else /* !HAVE_MUL */
757
758 #define C0 26
759 #define C1 C0+1
760 #define C2 C0+2
761 #define C3 C0+3
762 #define C4 C0+4
763 #define C5 C0+5
764 #define C6 0
765 #define C7 C6+1
766
767 #define Loop 9
768
769 ;; A[] *= B[]
770 ;; R25:R18 *= R17:R10
771 ;; Ordinary ABI-Function
772
773 DEFUN __muldi3
774 push r29
775 push r28
776 push Loop
777
778 ldi C0, 64
779 mov Loop, C0
780
781 ;; C[] = 0
782 clr __tmp_reg__
783 wmov C0, 0
784 wmov C2, 0
785 wmov C4, 0
786
787 0: ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[]
788 ;; where N = 64 - Loop.
789 ;; Notice that B[] = B[] >>> 64 so after this Routine has finished,
790 ;; B[] will have its initial Value again.
791 LSR B7 $ ror B6 $ ror B5 $ ror B4
792 ror B3 $ ror B2 $ ror B1 $ ror B0
793
794 ;; If the N-th Bit of B[] was set then...
795 brcc 1f
796 ;; ...finish Rotation...
797 ori B7, 1 << 7
798
799 ;; ...and add A[] * 2^N to the Result C[]
800 ADD C0,A0 $ adc C1,A1 $ adc C2,A2 $ adc C3,A3
801 adc C4,A4 $ adc C5,A5 $ adc C6,A6 $ adc C7,A7
802
803 1: ;; Multiply A[] by 2
804 LSL A0 $ rol A1 $ rol A2 $ rol A3
805 rol A4 $ rol A5 $ rol A6 $ rol A7
806
807 dec Loop
808 brne 0b
809
810 ;; We expanded the Result in C[]
811 ;; Copy Result to the Return Register A[]
812 wmov A0, C0
813 wmov A2, C2
814 wmov A4, C4
815 wmov A6, C6
816
817 clr __zero_reg__
818 pop Loop
819 pop r28
820 pop r29
821 ret
822 ENDF __muldi3
823
824 #undef Loop
825
826 #undef C7
827 #undef C6
828 #undef C5
829 #undef C4
830 #undef C3
831 #undef C2
832 #undef C1
833 #undef C0
834
835 #endif /* HAVE_MUL */
836
837 #undef B7
838 #undef B6
839 #undef B5
840 #undef B4
841 #undef B3
842 #undef B2
843 #undef B1
844 #undef B0
845
846 #undef A7
847 #undef A6
848 #undef A5
849 #undef A4
850 #undef A3
851 #undef A2
852 #undef A1
853 #undef A0
854
855 #endif /* L_muldi3 */
856
857 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
858
859 \f
860 .section .text.libgcc.div, "ax", @progbits
861
862 /*******************************************************
863 Division 8 / 8 => (result + remainder)
864 *******************************************************/
865 #define r_rem r25 /* remainder */
866 #define r_arg1 r24 /* dividend, quotient */
867 #define r_arg2 r22 /* divisor */
868 #define r_cnt r23 /* loop count */
869
870 #if defined (L_udivmodqi4)
871 DEFUN __udivmodqi4
872 sub r_rem,r_rem ; clear remainder and carry
873 ldi r_cnt,9 ; init loop counter
874 rjmp __udivmodqi4_ep ; jump to entry point
875 __udivmodqi4_loop:
876 rol r_rem ; shift dividend into remainder
877 cp r_rem,r_arg2 ; compare remainder & divisor
878 brcs __udivmodqi4_ep ; remainder <= divisor
879 sub r_rem,r_arg2 ; restore remainder
880 __udivmodqi4_ep:
881 rol r_arg1 ; shift dividend (with CARRY)
882 dec r_cnt ; decrement loop counter
883 brne __udivmodqi4_loop
884 com r_arg1 ; complement result
885 ; because C flag was complemented in loop
886 ret
887 ENDF __udivmodqi4
888 #endif /* defined (L_udivmodqi4) */
889
890 #if defined (L_divmodqi4)
891 DEFUN __divmodqi4
892 bst r_arg1,7 ; store sign of dividend
893 mov __tmp_reg__,r_arg1
894 eor __tmp_reg__,r_arg2; r0.7 is sign of result
895 sbrc r_arg1,7
896 neg r_arg1 ; dividend negative : negate
897 sbrc r_arg2,7
898 neg r_arg2 ; divisor negative : negate
899 XCALL __udivmodqi4 ; do the unsigned div/mod
900 brtc __divmodqi4_1
901 neg r_rem ; correct remainder sign
902 __divmodqi4_1:
903 sbrc __tmp_reg__,7
904 neg r_arg1 ; correct result sign
905 __divmodqi4_exit:
906 ret
907 ENDF __divmodqi4
908 #endif /* defined (L_divmodqi4) */
909
910 #undef r_rem
911 #undef r_arg1
912 #undef r_arg2
913 #undef r_cnt
914
915
916 /*******************************************************
917 Division 16 / 16 => (result + remainder)
918 *******************************************************/
919 #define r_remL r26 /* remainder Low */
920 #define r_remH r27 /* remainder High */
921
922 /* return: remainder */
923 #define r_arg1L r24 /* dividend Low */
924 #define r_arg1H r25 /* dividend High */
925
926 /* return: quotient */
927 #define r_arg2L r22 /* divisor Low */
928 #define r_arg2H r23 /* divisor High */
929
930 #define r_cnt r21 /* loop count */
931
932 #if defined (L_udivmodhi4)
933 DEFUN __udivmodhi4
934 sub r_remL,r_remL
935 sub r_remH,r_remH ; clear remainder and carry
936 ldi r_cnt,17 ; init loop counter
937 rjmp __udivmodhi4_ep ; jump to entry point
938 __udivmodhi4_loop:
939 rol r_remL ; shift dividend into remainder
940 rol r_remH
941 cp r_remL,r_arg2L ; compare remainder & divisor
942 cpc r_remH,r_arg2H
943 brcs __udivmodhi4_ep ; remainder < divisor
944 sub r_remL,r_arg2L ; restore remainder
945 sbc r_remH,r_arg2H
946 __udivmodhi4_ep:
947 rol r_arg1L ; shift dividend (with CARRY)
948 rol r_arg1H
949 dec r_cnt ; decrement loop counter
950 brne __udivmodhi4_loop
951 com r_arg1L
952 com r_arg1H
953 ; div/mod results to return registers, as for the div() function
954 mov_l r_arg2L, r_arg1L ; quotient
955 mov_h r_arg2H, r_arg1H
956 mov_l r_arg1L, r_remL ; remainder
957 mov_h r_arg1H, r_remH
958 ret
959 ENDF __udivmodhi4
960 #endif /* defined (L_udivmodhi4) */
961
962 #if defined (L_divmodhi4)
963 DEFUN __divmodhi4
964 .global _div
965 _div:
966 bst r_arg1H,7 ; store sign of dividend
967 mov __tmp_reg__,r_arg2H
968 brtc 0f
969 com __tmp_reg__ ; r0.7 is sign of result
970 rcall __divmodhi4_neg1 ; dividend negative: negate
971 0:
972 sbrc r_arg2H,7
973 rcall __divmodhi4_neg2 ; divisor negative: negate
974 XCALL __udivmodhi4 ; do the unsigned div/mod
975 sbrc __tmp_reg__,7
976 rcall __divmodhi4_neg2 ; correct remainder sign
977 brtc __divmodhi4_exit
978 __divmodhi4_neg1:
979 ;; correct dividend/remainder sign
980 com r_arg1H
981 neg r_arg1L
982 sbci r_arg1H,0xff
983 ret
984 __divmodhi4_neg2:
985 ;; correct divisor/result sign
986 com r_arg2H
987 neg r_arg2L
988 sbci r_arg2H,0xff
989 __divmodhi4_exit:
990 ret
991 ENDF __divmodhi4
992 #endif /* defined (L_divmodhi4) */
993
994 #undef r_remH
995 #undef r_remL
996
997 #undef r_arg1H
998 #undef r_arg1L
999
1000 #undef r_arg2H
1001 #undef r_arg2L
1002
1003 #undef r_cnt
1004
1005 /*******************************************************
1006 Division 24 / 24 => (result + remainder)
1007 *******************************************************/
1008
1009 ;; A[0..2]: In: Dividend; Out: Quotient
1010 #define A0 22
1011 #define A1 A0+1
1012 #define A2 A0+2
1013
1014 ;; B[0..2]: In: Divisor; Out: Remainder
1015 #define B0 18
1016 #define B1 B0+1
1017 #define B2 B0+2
1018
1019 ;; C[0..2]: Expand remainder
1020 #define C0 __zero_reg__
1021 #define C1 26
1022 #define C2 25
1023
1024 ;; Loop counter
1025 #define r_cnt 21
1026
1027 #if defined (L_udivmodpsi4)
1028 ;; R24:R22 = R24:R22 udiv R20:R18
1029 ;; R20:R18 = R24:R22 umod R20:R18
1030 ;; Clobbers: R21, R25, R26
1031
1032 DEFUN __udivmodpsi4
1033 ; init loop counter
1034 ldi r_cnt, 24+1
1035 ; Clear remainder and carry. C0 is already 0
1036 clr C1
1037 sub C2, C2
1038 ; jump to entry point
1039 rjmp __udivmodpsi4_start
1040 __udivmodpsi4_loop:
1041 ; shift dividend into remainder
1042 rol C0
1043 rol C1
1044 rol C2
1045 ; compare remainder & divisor
1046 cp C0, B0
1047 cpc C1, B1
1048 cpc C2, B2
1049 brcs __udivmodpsi4_start ; remainder <= divisor
1050 sub C0, B0 ; restore remainder
1051 sbc C1, B1
1052 sbc C2, B2
1053 __udivmodpsi4_start:
1054 ; shift dividend (with CARRY)
1055 rol A0
1056 rol A1
1057 rol A2
1058 ; decrement loop counter
1059 dec r_cnt
1060 brne __udivmodpsi4_loop
1061 com A0
1062 com A1
1063 com A2
1064 ; div/mod results to return registers
1065 ; remainder
1066 mov B0, C0
1067 mov B1, C1
1068 mov B2, C2
1069 clr __zero_reg__ ; C0
1070 ret
1071 ENDF __udivmodpsi4
1072 #endif /* defined (L_udivmodpsi4) */
1073
1074 #if defined (L_divmodpsi4)
1075 ;; R24:R22 = R24:R22 div R20:R18
1076 ;; R20:R18 = R24:R22 mod R20:R18
1077 ;; Clobbers: T, __tmp_reg__, R21, R25, R26
1078
1079 DEFUN __divmodpsi4
1080 ; R0.7 will contain the sign of the result:
1081 ; R0.7 = A.sign ^ B.sign
1082 mov __tmp_reg__, B2
1083 ; T-flag = sign of dividend
1084 bst A2, 7
1085 brtc 0f
1086 com __tmp_reg__
1087 ; Adjust dividend's sign
1088 rcall __divmodpsi4_negA
1089 0:
1090 ; Adjust divisor's sign
1091 sbrc B2, 7
1092 rcall __divmodpsi4_negB
1093
1094 ; Do the unsigned div/mod
1095 XCALL __udivmodpsi4
1096
1097 ; Adjust quotient's sign
1098 sbrc __tmp_reg__, 7
1099 rcall __divmodpsi4_negA
1100
1101 ; Adjust remainder's sign
1102 brtc __divmodpsi4_end
1103
1104 __divmodpsi4_negB:
1105 ; Correct divisor/remainder sign
1106 com B2
1107 com B1
1108 neg B0
1109 sbci B1, -1
1110 sbci B2, -1
1111 ret
1112
1113 ; Correct dividend/quotient sign
1114 __divmodpsi4_negA:
1115 com A2
1116 com A1
1117 neg A0
1118 sbci A1, -1
1119 sbci A2, -1
1120 __divmodpsi4_end:
1121 ret
1122
1123 ENDF __divmodpsi4
1124 #endif /* defined (L_divmodpsi4) */
1125
1126 #undef A0
1127 #undef A1
1128 #undef A2
1129
1130 #undef B0
1131 #undef B1
1132 #undef B2
1133
1134 #undef C0
1135 #undef C1
1136 #undef C2
1137
1138 #undef r_cnt
1139
1140 /*******************************************************
1141 Division 32 / 32 => (result + remainder)
1142 *******************************************************/
1143 #define r_remHH r31 /* remainder High */
1144 #define r_remHL r30
1145 #define r_remH r27
1146 #define r_remL r26 /* remainder Low */
1147
1148 /* return: remainder */
1149 #define r_arg1HH r25 /* dividend High */
1150 #define r_arg1HL r24
1151 #define r_arg1H r23
1152 #define r_arg1L r22 /* dividend Low */
1153
1154 /* return: quotient */
1155 #define r_arg2HH r21 /* divisor High */
1156 #define r_arg2HL r20
1157 #define r_arg2H r19
1158 #define r_arg2L r18 /* divisor Low */
1159
1160 #define r_cnt __zero_reg__ /* loop count (0 after the loop!) */
1161
1162 #if defined (L_udivmodsi4)
1163 DEFUN __udivmodsi4
1164 ldi r_remL, 33 ; init loop counter
1165 mov r_cnt, r_remL
1166 sub r_remL,r_remL
1167 sub r_remH,r_remH ; clear remainder and carry
1168 mov_l r_remHL, r_remL
1169 mov_h r_remHH, r_remH
1170 rjmp __udivmodsi4_ep ; jump to entry point
1171 __udivmodsi4_loop:
1172 rol r_remL ; shift dividend into remainder
1173 rol r_remH
1174 rol r_remHL
1175 rol r_remHH
1176 cp r_remL,r_arg2L ; compare remainder & divisor
1177 cpc r_remH,r_arg2H
1178 cpc r_remHL,r_arg2HL
1179 cpc r_remHH,r_arg2HH
1180 brcs __udivmodsi4_ep ; remainder <= divisor
1181 sub r_remL,r_arg2L ; restore remainder
1182 sbc r_remH,r_arg2H
1183 sbc r_remHL,r_arg2HL
1184 sbc r_remHH,r_arg2HH
1185 __udivmodsi4_ep:
1186 rol r_arg1L ; shift dividend (with CARRY)
1187 rol r_arg1H
1188 rol r_arg1HL
1189 rol r_arg1HH
1190 dec r_cnt ; decrement loop counter
1191 brne __udivmodsi4_loop
1192 ; __zero_reg__ now restored (r_cnt == 0)
1193 com r_arg1L
1194 com r_arg1H
1195 com r_arg1HL
1196 com r_arg1HH
1197 ; div/mod results to return registers, as for the ldiv() function
1198 mov_l r_arg2L, r_arg1L ; quotient
1199 mov_h r_arg2H, r_arg1H
1200 mov_l r_arg2HL, r_arg1HL
1201 mov_h r_arg2HH, r_arg1HH
1202 mov_l r_arg1L, r_remL ; remainder
1203 mov_h r_arg1H, r_remH
1204 mov_l r_arg1HL, r_remHL
1205 mov_h r_arg1HH, r_remHH
1206 ret
1207 ENDF __udivmodsi4
1208 #endif /* defined (L_udivmodsi4) */
1209
1210 #if defined (L_divmodsi4)
1211 DEFUN __divmodsi4
1212 mov __tmp_reg__,r_arg2HH
1213 bst r_arg1HH,7 ; store sign of dividend
1214 brtc 0f
1215 com __tmp_reg__ ; r0.7 is sign of result
1216 rcall __divmodsi4_neg1 ; dividend negative: negate
1217 0:
1218 sbrc r_arg2HH,7
1219 rcall __divmodsi4_neg2 ; divisor negative: negate
1220 XCALL __udivmodsi4 ; do the unsigned div/mod
1221 sbrc __tmp_reg__, 7 ; correct quotient sign
1222 rcall __divmodsi4_neg2
1223 brtc __divmodsi4_exit ; correct remainder sign
1224 __divmodsi4_neg1:
1225 ;; correct dividend/remainder sign
1226 com r_arg1HH
1227 com r_arg1HL
1228 com r_arg1H
1229 neg r_arg1L
1230 sbci r_arg1H, 0xff
1231 sbci r_arg1HL,0xff
1232 sbci r_arg1HH,0xff
1233 ret
1234 __divmodsi4_neg2:
1235 ;; correct divisor/quotient sign
1236 com r_arg2HH
1237 com r_arg2HL
1238 com r_arg2H
1239 neg r_arg2L
1240 sbci r_arg2H,0xff
1241 sbci r_arg2HL,0xff
1242 sbci r_arg2HH,0xff
1243 __divmodsi4_exit:
1244 ret
1245 ENDF __divmodsi4
1246 #endif /* defined (L_divmodsi4) */
1247
1248
1249 /*******************************************************
1250 Division 64 / 64
1251 Modulo 64 % 64
1252 *******************************************************/
1253
1254 ;; Use Speed-optimized Version on "big" Devices, i.e. Devices with
1255 ;; at least 16k of Program Memory. For smaller Devices, depend
1256 ;; on MOVW and SP Size. There is a Connexion between SP Size and
1257 ;; Flash Size so that SP Size can be used to test for Flash Size.
1258
1259 #if defined (__AVR_HAVE_JMP_CALL__)
1260 # define SPEED_DIV 8
1261 #elif defined (__AVR_HAVE_MOVW__) && !defined (__AVR_HAVE_8BIT_SP__)
1262 # define SPEED_DIV 16
1263 #else
1264 # define SPEED_DIV 0
1265 #endif
1266
1267 ;; A[0..7]: In: Dividend;
1268 ;; Out: Quotient (T = 0)
1269 ;; Out: Remainder (T = 1)
1270 #define A0 18
1271 #define A1 A0+1
1272 #define A2 A0+2
1273 #define A3 A0+3
1274 #define A4 A0+4
1275 #define A5 A0+5
1276 #define A6 A0+6
1277 #define A7 A0+7
1278
1279 ;; B[0..7]: In: Divisor; Out: Clobber
1280 #define B0 10
1281 #define B1 B0+1
1282 #define B2 B0+2
1283 #define B3 B0+3
1284 #define B4 B0+4
1285 #define B5 B0+5
1286 #define B6 B0+6
1287 #define B7 B0+7
1288
1289 ;; C[0..7]: Expand remainder; Out: Remainder (unused)
1290 #define C0 8
1291 #define C1 C0+1
1292 #define C2 30
1293 #define C3 C2+1
1294 #define C4 28
1295 #define C5 C4+1
1296 #define C6 26
1297 #define C7 C6+1
1298
1299 ;; Holds Signs during Division Routine
1300 #define SS __tmp_reg__
1301
1302 ;; Bit-Counter in Division Routine
1303 #define R_cnt __zero_reg__
1304
1305 ;; Scratch Register for Negation
1306 #define NN r31
1307
1308 #if defined (L_udivdi3)
1309
1310 ;; R25:R18 = R24:R18 umod R17:R10
1311 ;; Ordinary ABI-Function
1312
1313 DEFUN __umoddi3
1314 set
1315 rjmp __udivdi3_umoddi3
1316 ENDF __umoddi3
1317
1318 ;; R25:R18 = R24:R18 udiv R17:R10
1319 ;; Ordinary ABI-Function
1320
1321 DEFUN __udivdi3
1322 clt
1323 ENDF __udivdi3
1324
1325 DEFUN __udivdi3_umoddi3
1326 push C0
1327 push C1
1328 push C4
1329 push C5
1330 XCALL __udivmod64
1331 pop C5
1332 pop C4
1333 pop C1
1334 pop C0
1335 ret
1336 ENDF __udivdi3_umoddi3
1337 #endif /* L_udivdi3 */
1338
1339 #if defined (L_udivmod64)
1340
1341 ;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation
1342 ;; No Registers saved/restored; the Callers will take Care.
1343 ;; Preserves B[] and T-flag
1344 ;; T = 0: Compute Quotient in A[]
1345 ;; T = 1: Compute Remainder in A[] and shift SS one Bit left
1346
1347 DEFUN __udivmod64
1348
1349 ;; Clear Remainder (C6, C7 will follow)
1350 clr C0
1351 clr C1
1352 wmov C2, C0
1353 wmov C4, C0
1354 ldi C7, 64
1355
1356 #if SPEED_DIV == 0 || SPEED_DIV == 16
1357 ;; Initialize Loop-Counter
1358 mov R_cnt, C7
1359 wmov C6, C0
1360 #endif /* SPEED_DIV */
1361
1362 #if SPEED_DIV == 8
1363
1364 push A7
1365 clr C6
1366
1367 1: ;; Compare shifted Devidend against Divisor
1368 ;; If -- even after Shifting -- it is smaller...
1369 CP A7,B0 $ cpc C0,B1 $ cpc C1,B2 $ cpc C2,B3
1370 cpc C3,B4 $ cpc C4,B5 $ cpc C5,B6 $ cpc C6,B7
1371 brcc 2f
1372
1373 ;; ...then we can subtract it. Thus, it is legal to shift left
1374 $ mov C6,C5 $ mov C5,C4 $ mov C4,C3
1375 mov C3,C2 $ mov C2,C1 $ mov C1,C0 $ mov C0,A7
1376 mov A7,A6 $ mov A6,A5 $ mov A5,A4 $ mov A4,A3
1377 mov A3,A2 $ mov A2,A1 $ mov A1,A0 $ clr A0
1378
1379 ;; 8 Bits are done
1380 subi C7, 8
1381 brne 1b
1382
1383 ;; Shifted 64 Bits: A7 has traveled to C7
1384 pop C7
1385 ;; Divisor is greater than Dividend. We have:
1386 ;; A[] % B[] = A[]
1387 ;; A[] / B[] = 0
1388 ;; Thus, we can return immediately
1389 rjmp 5f
1390
1391 2: ;; Initialze Bit-Counter with Number of Bits still to be performed
1392 mov R_cnt, C7
1393
1394 ;; Push of A7 is not needed because C7 is still 0
1395 pop C7
1396 clr C7
1397
1398 #elif SPEED_DIV == 16
1399
1400 ;; Compare shifted Dividend against Divisor
1401 cp A7, B3
1402 cpc C0, B4
1403 cpc C1, B5
1404 cpc C2, B6
1405 cpc C3, B7
1406 brcc 2f
1407
1408 ;; Divisor is greater than shifted Dividen: We can shift the Dividend
1409 ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk
1410 wmov C2,A6 $ wmov C0,A4
1411 wmov A6,A2 $ wmov A4,A0
1412 wmov A2,C6 $ wmov A0,C4
1413
1414 ;; Set Bit Counter to 32
1415 lsr R_cnt
1416 2:
1417 #elif SPEED_DIV
1418 #error SPEED_DIV = ?
1419 #endif /* SPEED_DIV */
1420
1421 ;; The very Division + Remainder Routine
1422
1423 3: ;; Left-shift Dividend...
1424 lsl A0 $ rol A1 $ rol A2 $ rol A3
1425 rol A4 $ rol A5 $ rol A6 $ rol A7
1426
1427 ;; ...into Remainder
1428 rol C0 $ rol C1 $ rol C2 $ rol C3
1429 rol C4 $ rol C5 $ rol C6 $ rol C7
1430
1431 ;; Compare Remainder and Divisor
1432 CP C0,B0 $ cpc C1,B1 $ cpc C2,B2 $ cpc C3,B3
1433 cpc C4,B4 $ cpc C5,B5 $ cpc C6,B6 $ cpc C7,B7
1434
1435 brcs 4f
1436
1437 ;; Divisor fits into Remainder: Subtract it from Remainder...
1438 SUB C0,B0 $ sbc C1,B1 $ sbc C2,B2 $ sbc C3,B3
1439 sbc C4,B4 $ sbc C5,B5 $ sbc C6,B6 $ sbc C7,B7
1440
1441 ;; ...and set according Bit in the upcoming Quotient
1442 ;; The Bit will travel to its final Position
1443 ori A0, 1
1444
1445 4: ;; This Bit is done
1446 dec R_cnt
1447 brne 3b
1448 ;; __zero_reg__ is 0 again
1449
1450 ;; T = 0: We are fine with the Quotient in A[]
1451 ;; T = 1: Copy Remainder to A[]
1452 5: brtc 6f
1453 wmov A0, C0
1454 wmov A2, C2
1455 wmov A4, C4
1456 wmov A6, C6
1457 ;; Move the Sign of the Result to SS.7
1458 lsl SS
1459
1460 6: ret
1461
1462 ENDF __udivmod64
1463 #endif /* L_udivmod64 */
1464
1465
1466 #if defined (L_divdi3)
1467
1468 ;; R25:R18 = R24:R18 mod R17:R10
1469 ;; Ordinary ABI-Function
1470
1471 DEFUN __moddi3
1472 set
1473 rjmp __divdi3_moddi3
1474 ENDF __moddi3
1475
1476 ;; R25:R18 = R24:R18 div R17:R10
1477 ;; Ordinary ABI-Function
1478
1479 DEFUN __divdi3
1480 clt
1481 ENDF __divdi3
1482
1483 DEFUN __divdi3_moddi3
1484 #if SPEED_DIV
1485 mov r31, A7
1486 or r31, B7
1487 brmi 0f
1488 ;; Both Signs are 0: the following Complexitiy is not needed
1489 XJMP __udivdi3_umoddi3
1490 #endif /* SPEED_DIV */
1491
1492 0: ;; The Prologue
1493 ;; Save 12 Registers: Y, 17...8
1494 ;; No Frame needed (X = 0)
1495 clr r26
1496 clr r27
1497 ldi r30, lo8(gs(1f))
1498 ldi r31, hi8(gs(1f))
1499 XJMP __prologue_saves__ + ((18 - 12) * 2)
1500
1501 1: ;; SS.7 will contain the Sign of the Quotient (A.sign * B.sign)
1502 ;; SS.6 will contain the Sign of the Remainder (A.sign)
1503 mov SS, A7
1504 asr SS
1505 ;; Adjust Dividend's Sign as needed
1506 #if SPEED_DIV
1507 ;; Compiling for Speed we know that at least one Sign must be < 0
1508 ;; Thus, if A[] >= 0 then we know B[] < 0
1509 brpl 22f
1510 #else
1511 brpl 21f
1512 #endif /* SPEED_DIV */
1513
1514 XCALL __negdi2
1515
1516 ;; Adjust Divisor's Sign and SS.7 as needed
1517 21: tst B7
1518 brpl 3f
1519 22: ldi NN, 1 << 7
1520 eor SS, NN
1521
1522 ldi NN, -1
1523 com B4 $ com B5 $ com B6 $ com B7
1524 $ com B1 $ com B2 $ com B3
1525 NEG B0
1526 $ sbc B1,NN $ sbc B2,NN $ sbc B3,NN
1527 sbc B4,NN $ sbc B5,NN $ sbc B6,NN $ sbc B7,NN
1528
1529 3: ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag)
1530 XCALL __udivmod64
1531
1532 ;; Adjust Result's Sign
1533 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
1534 tst SS
1535 brpl 4f
1536 #else
1537 sbrc SS, 7
1538 #endif /* __AVR_HAVE_JMP_CALL__ */
1539 XCALL __negdi2
1540
1541 4: ;; Epilogue: Restore the Z = 12 Registers and return
1542 in r28, __SP_L__
1543 #if defined (__AVR_HAVE_8BIT_SP__)
1544 clr r29
1545 #else
1546 in r29, __SP_H__
1547 #endif /* #SP = 8/16 */
1548 ldi r30, 12
1549 XJMP __epilogue_restores__ + ((18 - 12) * 2)
1550
1551 ENDF __divdi3_moddi3
1552
1553 #undef R_cnt
1554 #undef SS
1555 #undef NN
1556
1557 #endif /* L_divdi3 */
1558
1559 .section .text.libgcc, "ax", @progbits
1560
1561 #define TT __tmp_reg__
1562
1563 #if defined (L_adddi3)
1564 ;; (set (reg:DI 18)
1565 ;; (plus:DI (reg:DI 18)
1566 ;; (reg:DI 10)))
1567 DEFUN __adddi3
1568 ADD A0,B0 $ adc A1,B1 $ adc A2,B2 $ adc A3,B3
1569 adc A4,B4 $ adc A5,B5 $ adc A6,B6 $ adc A7,B7
1570 ret
1571 ENDF __adddi3
1572 #endif /* L_adddi3 */
1573
1574 #if defined (L_adddi3_s8)
1575 ;; (set (reg:DI 18)
1576 ;; (plus:DI (reg:DI 18)
1577 ;; (sign_extend:SI (reg:QI 26))))
1578 DEFUN __adddi3_s8
1579 clr TT
1580 sbrc r26, 7
1581 com TT
1582 ADD A0,r26 $ adc A1,TT $ adc A2,TT $ adc A3,TT
1583 adc A4,TT $ adc A5,TT $ adc A6,TT $ adc A7,TT
1584 ret
1585 ENDF __adddi3_s8
1586 #endif /* L_adddi3_s8 */
1587
1588 #if defined (L_subdi3)
1589 ;; (set (reg:DI 18)
1590 ;; (minus:DI (reg:DI 18)
1591 ;; (reg:DI 10)))
1592 DEFUN __subdi3
1593 SUB A0,B0 $ sbc A1,B1 $ sbc A2,B2 $ sbc A3,B3
1594 sbc A4,B4 $ sbc A5,B5 $ sbc A6,B6 $ sbc A7,B7
1595 ret
1596 ENDF __subdi3
1597 #endif /* L_subdi3 */
1598
1599 #if defined (L_cmpdi2)
1600 ;; (set (cc0)
1601 ;; (compare (reg:DI 18)
1602 ;; (reg:DI 10)))
1603 DEFUN __cmpdi2
1604 CP A0,B0 $ cpc A1,B1 $ cpc A2,B2 $ cpc A3,B3
1605 cpc A4,B4 $ cpc A5,B5 $ cpc A6,B6 $ cpc A7,B7
1606 ret
1607 ENDF __cmpdi2
1608 #endif /* L_cmpdi2 */
1609
1610 #if defined (L_cmpdi2_s8)
1611 ;; (set (cc0)
1612 ;; (compare (reg:DI 18)
1613 ;; (sign_extend:SI (reg:QI 26))))
1614 DEFUN __cmpdi2_s8
1615 clr TT
1616 sbrc r26, 7
1617 com TT
1618 CP A0,r26 $ cpc A1,TT $ cpc A2,TT $ cpc A3,TT
1619 cpc A4,TT $ cpc A5,TT $ cpc A6,TT $ cpc A7,TT
1620 ret
1621 ENDF __cmpdi2_s8
1622 #endif /* L_cmpdi2_s8 */
1623
1624 #if defined (L_negdi2)
1625 DEFUN __negdi2
1626
1627 com A4 $ com A5 $ com A6 $ com A7
1628 $ com A1 $ com A2 $ com A3
1629 NEG A0
1630 $ sbci A1,-1 $ sbci A2,-1 $ sbci A3,-1
1631 sbci A4,-1 $ sbci A5,-1 $ sbci A6,-1 $ sbci A7,-1
1632 ret
1633
1634 ENDF __negdi2
1635 #endif /* L_negdi2 */
1636
1637 #undef TT
1638
1639 #undef C7
1640 #undef C6
1641 #undef C5
1642 #undef C4
1643 #undef C3
1644 #undef C2
1645 #undef C1
1646 #undef C0
1647
1648 #undef B7
1649 #undef B6
1650 #undef B5
1651 #undef B4
1652 #undef B3
1653 #undef B2
1654 #undef B1
1655 #undef B0
1656
1657 #undef A7
1658 #undef A6
1659 #undef A5
1660 #undef A4
1661 #undef A3
1662 #undef A2
1663 #undef A1
1664 #undef A0
1665
1666 \f
1667 .section .text.libgcc.prologue, "ax", @progbits
1668
1669 /**********************************
1670 * This is a prologue subroutine
1671 **********************************/
1672 #if defined (L_prologue)
1673
1674 ;; This function does not clobber T-flag; 64-bit division relies on it
1675 DEFUN __prologue_saves__
1676 push r2
1677 push r3
1678 push r4
1679 push r5
1680 push r6
1681 push r7
1682 push r8
1683 push r9
1684 push r10
1685 push r11
1686 push r12
1687 push r13
1688 push r14
1689 push r15
1690 push r16
1691 push r17
1692 push r28
1693 push r29
1694 #if defined (__AVR_HAVE_8BIT_SP__)
1695 in r28,__SP_L__
1696 sub r28,r26
1697 out __SP_L__,r28
1698 clr r29
1699 #elif defined (__AVR_XMEGA__)
1700 in r28,__SP_L__
1701 in r29,__SP_H__
1702 sub r28,r26
1703 sbc r29,r27
1704 out __SP_L__,r28
1705 out __SP_H__,r29
1706 #else
1707 in r28,__SP_L__
1708 in r29,__SP_H__
1709 sub r28,r26
1710 sbc r29,r27
1711 in __tmp_reg__,__SREG__
1712 cli
1713 out __SP_H__,r29
1714 out __SREG__,__tmp_reg__
1715 out __SP_L__,r28
1716 #endif /* #SP = 8/16 */
1717
1718 #if defined (__AVR_HAVE_EIJMP_EICALL__)
1719 eijmp
1720 #else
1721 ijmp
1722 #endif
1723
1724 ENDF __prologue_saves__
1725 #endif /* defined (L_prologue) */
1726
1727 /*
1728 * This is an epilogue subroutine
1729 */
1730 #if defined (L_epilogue)
1731
1732 DEFUN __epilogue_restores__
1733 ldd r2,Y+18
1734 ldd r3,Y+17
1735 ldd r4,Y+16
1736 ldd r5,Y+15
1737 ldd r6,Y+14
1738 ldd r7,Y+13
1739 ldd r8,Y+12
1740 ldd r9,Y+11
1741 ldd r10,Y+10
1742 ldd r11,Y+9
1743 ldd r12,Y+8
1744 ldd r13,Y+7
1745 ldd r14,Y+6
1746 ldd r15,Y+5
1747 ldd r16,Y+4
1748 ldd r17,Y+3
1749 ldd r26,Y+2
1750 #if defined (__AVR_HAVE_8BIT_SP__)
1751 ldd r29,Y+1
1752 add r28,r30
1753 out __SP_L__,r28
1754 mov r28, r26
1755 #elif defined (__AVR_XMEGA__)
1756 ldd r27,Y+1
1757 add r28,r30
1758 adc r29,__zero_reg__
1759 out __SP_L__,r28
1760 out __SP_H__,r29
1761 wmov 28, 26
1762 #else
1763 ldd r27,Y+1
1764 add r28,r30
1765 adc r29,__zero_reg__
1766 in __tmp_reg__,__SREG__
1767 cli
1768 out __SP_H__,r29
1769 out __SREG__,__tmp_reg__
1770 out __SP_L__,r28
1771 mov_l r28, r26
1772 mov_h r29, r27
1773 #endif /* #SP = 8/16 */
1774 ret
1775 ENDF __epilogue_restores__
1776 #endif /* defined (L_epilogue) */
1777
1778 #ifdef L_exit
1779 .section .fini9,"ax",@progbits
1780 DEFUN _exit
1781 .weak exit
1782 exit:
1783 ENDF _exit
1784
1785 /* Code from .fini8 ... .fini1 sections inserted by ld script. */
1786
1787 .section .fini0,"ax",@progbits
1788 cli
1789 __stop_program:
1790 rjmp __stop_program
1791 #endif /* defined (L_exit) */
1792
1793 #ifdef L_cleanup
1794 .weak _cleanup
1795 .func _cleanup
1796 _cleanup:
1797 ret
1798 .endfunc
1799 #endif /* defined (L_cleanup) */
1800
1801 \f
1802 .section .text.libgcc, "ax", @progbits
1803
1804 #ifdef L_tablejump
1805 DEFUN __tablejump2__
1806 lsl r30
1807 rol r31
1808 ;; FALLTHRU
1809 ENDF __tablejump2__
1810
1811 DEFUN __tablejump__
1812 #if defined (__AVR_HAVE_LPMX__)
1813 lpm __tmp_reg__, Z+
1814 lpm r31, Z
1815 mov r30, __tmp_reg__
1816 #if defined (__AVR_HAVE_EIJMP_EICALL__)
1817 eijmp
1818 #else
1819 ijmp
1820 #endif
1821
1822 #else /* !HAVE_LPMX */
1823 lpm
1824 adiw r30, 1
1825 push r0
1826 lpm
1827 push r0
1828 #if defined (__AVR_HAVE_EIJMP_EICALL__)
1829 in __tmp_reg__, __EIND__
1830 push __tmp_reg__
1831 #endif
1832 ret
1833 #endif /* !HAVE_LPMX */
1834 ENDF __tablejump__
1835 #endif /* defined (L_tablejump) */
1836
1837 #ifdef L_copy_data
1838 .section .init4,"ax",@progbits
1839 DEFUN __do_copy_data
1840 #if defined(__AVR_HAVE_ELPMX__)
1841 ldi r17, hi8(__data_end)
1842 ldi r26, lo8(__data_start)
1843 ldi r27, hi8(__data_start)
1844 ldi r30, lo8(__data_load_start)
1845 ldi r31, hi8(__data_load_start)
1846 ldi r16, hh8(__data_load_start)
1847 out __RAMPZ__, r16
1848 rjmp .L__do_copy_data_start
1849 .L__do_copy_data_loop:
1850 elpm r0, Z+
1851 st X+, r0
1852 .L__do_copy_data_start:
1853 cpi r26, lo8(__data_end)
1854 cpc r27, r17
1855 brne .L__do_copy_data_loop
1856 #elif !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
1857 ldi r17, hi8(__data_end)
1858 ldi r26, lo8(__data_start)
1859 ldi r27, hi8(__data_start)
1860 ldi r30, lo8(__data_load_start)
1861 ldi r31, hi8(__data_load_start)
1862 ldi r16, hh8(__data_load_start - 0x10000)
1863 .L__do_copy_data_carry:
1864 inc r16
1865 out __RAMPZ__, r16
1866 rjmp .L__do_copy_data_start
1867 .L__do_copy_data_loop:
1868 elpm
1869 st X+, r0
1870 adiw r30, 1
1871 brcs .L__do_copy_data_carry
1872 .L__do_copy_data_start:
1873 cpi r26, lo8(__data_end)
1874 cpc r27, r17
1875 brne .L__do_copy_data_loop
1876 #elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
1877 ldi r17, hi8(__data_end)
1878 ldi r26, lo8(__data_start)
1879 ldi r27, hi8(__data_start)
1880 ldi r30, lo8(__data_load_start)
1881 ldi r31, hi8(__data_load_start)
1882 rjmp .L__do_copy_data_start
1883 .L__do_copy_data_loop:
1884 #if defined (__AVR_HAVE_LPMX__)
1885 lpm r0, Z+
1886 #else
1887 lpm
1888 adiw r30, 1
1889 #endif
1890 st X+, r0
1891 .L__do_copy_data_start:
1892 cpi r26, lo8(__data_end)
1893 cpc r27, r17
1894 brne .L__do_copy_data_loop
1895 #endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
1896 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
1897 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
1898 out __RAMPZ__, __zero_reg__
1899 #endif /* ELPM && RAMPD */
1900 ENDF __do_copy_data
1901 #endif /* L_copy_data */
1902
1903 /* __do_clear_bss is only necessary if there is anything in .bss section. */
1904
1905 #ifdef L_clear_bss
1906 .section .init4,"ax",@progbits
1907 DEFUN __do_clear_bss
1908 ldi r17, hi8(__bss_end)
1909 ldi r26, lo8(__bss_start)
1910 ldi r27, hi8(__bss_start)
1911 rjmp .do_clear_bss_start
1912 .do_clear_bss_loop:
1913 st X+, __zero_reg__
1914 .do_clear_bss_start:
1915 cpi r26, lo8(__bss_end)
1916 cpc r27, r17
1917 brne .do_clear_bss_loop
1918 ENDF __do_clear_bss
1919 #endif /* L_clear_bss */
1920
1921 /* __do_global_ctors and __do_global_dtors are only necessary
1922 if there are any constructors/destructors. */
1923
1924 #ifdef L_ctors
1925 .section .init6,"ax",@progbits
1926 DEFUN __do_global_ctors
1927 #if defined(__AVR_HAVE_ELPM__)
1928 ldi r17, hi8(__ctors_start)
1929 ldi r28, lo8(__ctors_end)
1930 ldi r29, hi8(__ctors_end)
1931 ldi r16, hh8(__ctors_end)
1932 rjmp .L__do_global_ctors_start
1933 .L__do_global_ctors_loop:
1934 sbiw r28, 2
1935 sbc r16, __zero_reg__
1936 mov_h r31, r29
1937 mov_l r30, r28
1938 out __RAMPZ__, r16
1939 XCALL __tablejump_elpm__
1940 .L__do_global_ctors_start:
1941 cpi r28, lo8(__ctors_start)
1942 cpc r29, r17
1943 ldi r24, hh8(__ctors_start)
1944 cpc r16, r24
1945 brne .L__do_global_ctors_loop
1946 #else
1947 ldi r17, hi8(__ctors_start)
1948 ldi r28, lo8(__ctors_end)
1949 ldi r29, hi8(__ctors_end)
1950 rjmp .L__do_global_ctors_start
1951 .L__do_global_ctors_loop:
1952 sbiw r28, 2
1953 mov_h r31, r29
1954 mov_l r30, r28
1955 XCALL __tablejump__
1956 .L__do_global_ctors_start:
1957 cpi r28, lo8(__ctors_start)
1958 cpc r29, r17
1959 brne .L__do_global_ctors_loop
1960 #endif /* defined(__AVR_HAVE_ELPM__) */
1961 ENDF __do_global_ctors
1962 #endif /* L_ctors */
1963
1964 #ifdef L_dtors
1965 .section .fini6,"ax",@progbits
1966 DEFUN __do_global_dtors
1967 #if defined(__AVR_HAVE_ELPM__)
1968 ldi r17, hi8(__dtors_end)
1969 ldi r28, lo8(__dtors_start)
1970 ldi r29, hi8(__dtors_start)
1971 ldi r16, hh8(__dtors_start)
1972 rjmp .L__do_global_dtors_start
1973 .L__do_global_dtors_loop:
1974 sbiw r28, 2
1975 sbc r16, __zero_reg__
1976 mov_h r31, r29
1977 mov_l r30, r28
1978 out __RAMPZ__, r16
1979 XCALL __tablejump_elpm__
1980 .L__do_global_dtors_start:
1981 cpi r28, lo8(__dtors_end)
1982 cpc r29, r17
1983 ldi r24, hh8(__dtors_end)
1984 cpc r16, r24
1985 brne .L__do_global_dtors_loop
1986 #else
1987 ldi r17, hi8(__dtors_end)
1988 ldi r28, lo8(__dtors_start)
1989 ldi r29, hi8(__dtors_start)
1990 rjmp .L__do_global_dtors_start
1991 .L__do_global_dtors_loop:
1992 mov_h r31, r29
1993 mov_l r30, r28
1994 XCALL __tablejump__
1995 adiw r28, 2
1996 .L__do_global_dtors_start:
1997 cpi r28, lo8(__dtors_end)
1998 cpc r29, r17
1999 brne .L__do_global_dtors_loop
2000 #endif /* defined(__AVR_HAVE_ELPM__) */
2001 ENDF __do_global_dtors
2002 #endif /* L_dtors */
2003
2004 .section .text.libgcc, "ax", @progbits
2005
2006 #ifdef L_tablejump_elpm
2007 DEFUN __tablejump_elpm__
2008 #if defined (__AVR_HAVE_ELPMX__)
2009 elpm __tmp_reg__, Z+
2010 elpm r31, Z
2011 mov r30, __tmp_reg__
2012 #if defined (__AVR_HAVE_RAMPD__)
2013 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2014 out __RAMPZ__, __zero_reg__
2015 #endif /* RAMPD */
2016 #if defined (__AVR_HAVE_EIJMP_EICALL__)
2017 eijmp
2018 #else
2019 ijmp
2020 #endif
2021
2022 #elif defined (__AVR_HAVE_ELPM__)
2023 elpm
2024 adiw r30, 1
2025 push r0
2026 elpm
2027 push r0
2028 #if defined (__AVR_HAVE_EIJMP_EICALL__)
2029 in __tmp_reg__, __EIND__
2030 push __tmp_reg__
2031 #endif
2032 ret
2033 #endif
2034 ENDF __tablejump_elpm__
2035 #endif /* defined (L_tablejump_elpm) */
2036
2037 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2038 ;; Loading n bytes from Flash; n = 3,4
2039 ;; R22... = Flash[Z]
2040 ;; Clobbers: __tmp_reg__
2041
2042 #if (defined (L_load_3) \
2043 || defined (L_load_4)) \
2044 && !defined (__AVR_HAVE_LPMX__)
2045
2046 ;; Destination
2047 #define D0 22
2048 #define D1 D0+1
2049 #define D2 D0+2
2050 #define D3 D0+3
2051
2052 .macro .load dest, n
2053 lpm
2054 mov \dest, r0
2055 .if \dest != D0+\n-1
2056 adiw r30, 1
2057 .else
2058 sbiw r30, \n-1
2059 .endif
2060 .endm
2061
2062 #if defined (L_load_3)
2063 DEFUN __load_3
2064 push D3
2065 XCALL __load_4
2066 pop D3
2067 ret
2068 ENDF __load_3
2069 #endif /* L_load_3 */
2070
2071 #if defined (L_load_4)
2072 DEFUN __load_4
2073 .load D0, 4
2074 .load D1, 4
2075 .load D2, 4
2076 .load D3, 4
2077 ret
2078 ENDF __load_4
2079 #endif /* L_load_4 */
2080
2081 #endif /* L_load_3 || L_load_3 */
2082
2083 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2084 ;; Loading n bytes from Flash or RAM; n = 1,2,3,4
2085 ;; R22... = Flash[R21:Z] or RAM[Z] depending on R21.7
2086 ;; Clobbers: __tmp_reg__, R21, R30, R31
2087
2088 #if (defined (L_xload_1) \
2089 || defined (L_xload_2) \
2090 || defined (L_xload_3) \
2091 || defined (L_xload_4))
2092
2093 ;; Destination
2094 #define D0 22
2095 #define D1 D0+1
2096 #define D2 D0+2
2097 #define D3 D0+3
2098
2099 ;; Register containing bits 16+ of the address
2100
2101 #define HHI8 21
2102
2103 .macro .xload dest, n
2104 #if defined (__AVR_HAVE_ELPMX__)
2105 elpm \dest, Z+
2106 #elif defined (__AVR_HAVE_ELPM__)
2107 elpm
2108 mov \dest, r0
2109 .if \dest != D0+\n-1
2110 adiw r30, 1
2111 adc HHI8, __zero_reg__
2112 out __RAMPZ__, HHI8
2113 .endif
2114 #elif defined (__AVR_HAVE_LPMX__)
2115 lpm \dest, Z+
2116 #else
2117 lpm
2118 mov \dest, r0
2119 .if \dest != D0+\n-1
2120 adiw r30, 1
2121 .endif
2122 #endif
2123 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2124 .if \dest == D0+\n-1
2125 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2126 out __RAMPZ__, __zero_reg__
2127 .endif
2128 #endif
2129 .endm ; .xload
2130
2131 #if defined (L_xload_1)
2132 DEFUN __xload_1
2133 #if defined (__AVR_HAVE_LPMX__) && !defined (__AVR_HAVE_ELPM__)
2134 sbrc HHI8, 7
2135 ld D0, Z
2136 sbrs HHI8, 7
2137 lpm D0, Z
2138 ret
2139 #else
2140 sbrc HHI8, 7
2141 rjmp 1f
2142 #if defined (__AVR_HAVE_ELPM__)
2143 out __RAMPZ__, HHI8
2144 #endif /* __AVR_HAVE_ELPM__ */
2145 .xload D0, 1
2146 ret
2147 1: ld D0, Z
2148 ret
2149 #endif /* LPMx && ! ELPM */
2150 ENDF __xload_1
2151 #endif /* L_xload_1 */
2152
2153 #if defined (L_xload_2)
2154 DEFUN __xload_2
2155 sbrc HHI8, 7
2156 rjmp 1f
2157 #if defined (__AVR_HAVE_ELPM__)
2158 out __RAMPZ__, HHI8
2159 #endif /* __AVR_HAVE_ELPM__ */
2160 .xload D0, 2
2161 .xload D1, 2
2162 ret
2163 1: ld D0, Z+
2164 ld D1, Z+
2165 ret
2166 ENDF __xload_2
2167 #endif /* L_xload_2 */
2168
2169 #if defined (L_xload_3)
2170 DEFUN __xload_3
2171 sbrc HHI8, 7
2172 rjmp 1f
2173 #if defined (__AVR_HAVE_ELPM__)
2174 out __RAMPZ__, HHI8
2175 #endif /* __AVR_HAVE_ELPM__ */
2176 .xload D0, 3
2177 .xload D1, 3
2178 .xload D2, 3
2179 ret
2180 1: ld D0, Z+
2181 ld D1, Z+
2182 ld D2, Z+
2183 ret
2184 ENDF __xload_3
2185 #endif /* L_xload_3 */
2186
2187 #if defined (L_xload_4)
2188 DEFUN __xload_4
2189 sbrc HHI8, 7
2190 rjmp 1f
2191 #if defined (__AVR_HAVE_ELPM__)
2192 out __RAMPZ__, HHI8
2193 #endif /* __AVR_HAVE_ELPM__ */
2194 .xload D0, 4
2195 .xload D1, 4
2196 .xload D2, 4
2197 .xload D3, 4
2198 ret
2199 1: ld D0, Z+
2200 ld D1, Z+
2201 ld D2, Z+
2202 ld D3, Z+
2203 ret
2204 ENDF __xload_4
2205 #endif /* L_xload_4 */
2206
2207 #endif /* L_xload_{1|2|3|4} */
2208
2209 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2210 ;; memcopy from Address Space __pgmx to RAM
2211 ;; R23:Z = Source Address
2212 ;; X = Destination Address
2213 ;; Clobbers: __tmp_reg__, R23, R24, R25, X, Z
2214
2215 #if defined (L_movmemx)
2216
2217 #define HHI8 23
2218 #define LOOP 24
2219
2220 DEFUN __movmemx_qi
2221 ;; #Bytes to copy fity in 8 Bits (1..255)
2222 ;; Zero-extend Loop Counter
2223 clr LOOP+1
2224 ;; FALLTHRU
2225 ENDF __movmemx_qi
2226
2227 DEFUN __movmemx_hi
2228
2229 ;; Read from where?
2230 sbrc HHI8, 7
2231 rjmp 1f
2232
2233 ;; Read from Flash
2234
2235 #if defined (__AVR_HAVE_ELPM__)
2236 out __RAMPZ__, HHI8
2237 #endif
2238
2239 0: ;; Load 1 Byte from Flash...
2240
2241 #if defined (__AVR_HAVE_ELPMX__)
2242 elpm r0, Z+
2243 #elif defined (__AVR_HAVE_ELPM__)
2244 elpm
2245 adiw r30, 1
2246 adc HHI8, __zero_reg__
2247 out __RAMPZ__, HHI8
2248 #elif defined (__AVR_HAVE_LPMX__)
2249 lpm r0, Z+
2250 #else
2251 lpm
2252 adiw r30, 1
2253 #endif
2254
2255 ;; ...and store that Byte to RAM Destination
2256 st X+, r0
2257 sbiw LOOP, 1
2258 brne 0b
2259 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2260 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2261 out __RAMPZ__, __zero_reg__
2262 #endif /* ELPM && RAMPD */
2263 ret
2264
2265 ;; Read from RAM
2266
2267 1: ;; Read 1 Byte from RAM...
2268 ld r0, Z+
2269 ;; and store that Byte to RAM Destination
2270 st X+, r0
2271 sbiw LOOP, 1
2272 brne 1b
2273 ret
2274 ENDF __movmemx_hi
2275
2276 #undef HHI8
2277 #undef LOOP
2278
2279 #endif /* L_movmemx */
2280
2281 \f
2282 .section .text.libgcc.builtins, "ax", @progbits
2283
2284 /**********************************
2285 * Find first set Bit (ffs)
2286 **********************************/
2287
2288 #if defined (L_ffssi2)
2289 ;; find first set bit
2290 ;; r25:r24 = ffs32 (r25:r22)
2291 ;; clobbers: r22, r26
2292 DEFUN __ffssi2
2293 clr r26
2294 tst r22
2295 brne 1f
2296 subi r26, -8
2297 or r22, r23
2298 brne 1f
2299 subi r26, -8
2300 or r22, r24
2301 brne 1f
2302 subi r26, -8
2303 or r22, r25
2304 brne 1f
2305 ret
2306 1: mov r24, r22
2307 XJMP __loop_ffsqi2
2308 ENDF __ffssi2
2309 #endif /* defined (L_ffssi2) */
2310
2311 #if defined (L_ffshi2)
2312 ;; find first set bit
2313 ;; r25:r24 = ffs16 (r25:r24)
2314 ;; clobbers: r26
2315 DEFUN __ffshi2
2316 clr r26
2317 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2318 ;; Some cores have problem skipping 2-word instruction
2319 tst r24
2320 breq 2f
2321 #else
2322 cpse r24, __zero_reg__
2323 #endif /* __AVR_HAVE_JMP_CALL__ */
2324 1: XJMP __loop_ffsqi2
2325 2: ldi r26, 8
2326 or r24, r25
2327 brne 1b
2328 ret
2329 ENDF __ffshi2
2330 #endif /* defined (L_ffshi2) */
2331
2332 #if defined (L_loop_ffsqi2)
2333 ;; Helper for ffshi2, ffssi2
2334 ;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
2335 ;; r24 must be != 0
2336 ;; clobbers: r26
2337 DEFUN __loop_ffsqi2
2338 inc r26
2339 lsr r24
2340 brcc __loop_ffsqi2
2341 mov r24, r26
2342 clr r25
2343 ret
2344 ENDF __loop_ffsqi2
2345 #endif /* defined (L_loop_ffsqi2) */
2346
2347 \f
2348 /**********************************
2349 * Count trailing Zeros (ctz)
2350 **********************************/
2351
2352 #if defined (L_ctzsi2)
2353 ;; count trailing zeros
2354 ;; r25:r24 = ctz32 (r25:r22)
2355 ;; clobbers: r26, r22
2356 ;; ctz(0) = 255
2357 ;; Note that ctz(0) in undefined for GCC
2358 DEFUN __ctzsi2
2359 XCALL __ffssi2
2360 dec r24
2361 ret
2362 ENDF __ctzsi2
2363 #endif /* defined (L_ctzsi2) */
2364
2365 #if defined (L_ctzhi2)
2366 ;; count trailing zeros
2367 ;; r25:r24 = ctz16 (r25:r24)
2368 ;; clobbers: r26
2369 ;; ctz(0) = 255
2370 ;; Note that ctz(0) in undefined for GCC
2371 DEFUN __ctzhi2
2372 XCALL __ffshi2
2373 dec r24
2374 ret
2375 ENDF __ctzhi2
2376 #endif /* defined (L_ctzhi2) */
2377
2378 \f
2379 /**********************************
2380 * Count leading Zeros (clz)
2381 **********************************/
2382
2383 #if defined (L_clzdi2)
2384 ;; count leading zeros
2385 ;; r25:r24 = clz64 (r25:r18)
2386 ;; clobbers: r22, r23, r26
2387 DEFUN __clzdi2
2388 XCALL __clzsi2
2389 sbrs r24, 5
2390 ret
2391 mov_l r22, r18
2392 mov_h r23, r19
2393 mov_l r24, r20
2394 mov_h r25, r21
2395 XCALL __clzsi2
2396 subi r24, -32
2397 ret
2398 ENDF __clzdi2
2399 #endif /* defined (L_clzdi2) */
2400
2401 #if defined (L_clzsi2)
2402 ;; count leading zeros
2403 ;; r25:r24 = clz32 (r25:r22)
2404 ;; clobbers: r26
2405 DEFUN __clzsi2
2406 XCALL __clzhi2
2407 sbrs r24, 4
2408 ret
2409 mov_l r24, r22
2410 mov_h r25, r23
2411 XCALL __clzhi2
2412 subi r24, -16
2413 ret
2414 ENDF __clzsi2
2415 #endif /* defined (L_clzsi2) */
2416
2417 #if defined (L_clzhi2)
2418 ;; count leading zeros
2419 ;; r25:r24 = clz16 (r25:r24)
2420 ;; clobbers: r26
2421 DEFUN __clzhi2
2422 clr r26
2423 tst r25
2424 brne 1f
2425 subi r26, -8
2426 or r25, r24
2427 brne 1f
2428 ldi r24, 16
2429 ret
2430 1: cpi r25, 16
2431 brsh 3f
2432 subi r26, -3
2433 swap r25
2434 2: inc r26
2435 3: lsl r25
2436 brcc 2b
2437 mov r24, r26
2438 clr r25
2439 ret
2440 ENDF __clzhi2
2441 #endif /* defined (L_clzhi2) */
2442
2443 \f
2444 /**********************************
2445 * Parity
2446 **********************************/
2447
2448 #if defined (L_paritydi2)
2449 ;; r25:r24 = parity64 (r25:r18)
2450 ;; clobbers: __tmp_reg__
2451 DEFUN __paritydi2
2452 eor r24, r18
2453 eor r24, r19
2454 eor r24, r20
2455 eor r24, r21
2456 XJMP __paritysi2
2457 ENDF __paritydi2
2458 #endif /* defined (L_paritydi2) */
2459
2460 #if defined (L_paritysi2)
2461 ;; r25:r24 = parity32 (r25:r22)
2462 ;; clobbers: __tmp_reg__
2463 DEFUN __paritysi2
2464 eor r24, r22
2465 eor r24, r23
2466 XJMP __parityhi2
2467 ENDF __paritysi2
2468 #endif /* defined (L_paritysi2) */
2469
2470 #if defined (L_parityhi2)
2471 ;; r25:r24 = parity16 (r25:r24)
2472 ;; clobbers: __tmp_reg__
2473 DEFUN __parityhi2
2474 eor r24, r25
2475 ;; FALLTHRU
2476 ENDF __parityhi2
2477
2478 ;; r25:r24 = parity8 (r24)
2479 ;; clobbers: __tmp_reg__
2480 DEFUN __parityqi2
2481 ;; parity is in r24[0..7]
2482 mov __tmp_reg__, r24
2483 swap __tmp_reg__
2484 eor r24, __tmp_reg__
2485 ;; parity is in r24[0..3]
2486 subi r24, -4
2487 andi r24, -5
2488 subi r24, -6
2489 ;; parity is in r24[0,3]
2490 sbrc r24, 3
2491 inc r24
2492 ;; parity is in r24[0]
2493 andi r24, 1
2494 clr r25
2495 ret
2496 ENDF __parityqi2
2497 #endif /* defined (L_parityhi2) */
2498
2499 \f
2500 /**********************************
2501 * Population Count
2502 **********************************/
2503
2504 #if defined (L_popcounthi2)
2505 ;; population count
2506 ;; r25:r24 = popcount16 (r25:r24)
2507 ;; clobbers: __tmp_reg__
2508 DEFUN __popcounthi2
2509 XCALL __popcountqi2
2510 push r24
2511 mov r24, r25
2512 XCALL __popcountqi2
2513 clr r25
2514 ;; FALLTHRU
2515 ENDF __popcounthi2
2516
2517 DEFUN __popcounthi2_tail
2518 pop __tmp_reg__
2519 add r24, __tmp_reg__
2520 ret
2521 ENDF __popcounthi2_tail
2522 #endif /* defined (L_popcounthi2) */
2523
2524 #if defined (L_popcountsi2)
2525 ;; population count
2526 ;; r25:r24 = popcount32 (r25:r22)
2527 ;; clobbers: __tmp_reg__
2528 DEFUN __popcountsi2
2529 XCALL __popcounthi2
2530 push r24
2531 mov_l r24, r22
2532 mov_h r25, r23
2533 XCALL __popcounthi2
2534 XJMP __popcounthi2_tail
2535 ENDF __popcountsi2
2536 #endif /* defined (L_popcountsi2) */
2537
2538 #if defined (L_popcountdi2)
2539 ;; population count
2540 ;; r25:r24 = popcount64 (r25:r18)
2541 ;; clobbers: r22, r23, __tmp_reg__
2542 DEFUN __popcountdi2
2543 XCALL __popcountsi2
2544 push r24
2545 mov_l r22, r18
2546 mov_h r23, r19
2547 mov_l r24, r20
2548 mov_h r25, r21
2549 XCALL __popcountsi2
2550 XJMP __popcounthi2_tail
2551 ENDF __popcountdi2
2552 #endif /* defined (L_popcountdi2) */
2553
2554 #if defined (L_popcountqi2)
2555 ;; population count
2556 ;; r24 = popcount8 (r24)
2557 ;; clobbers: __tmp_reg__
2558 DEFUN __popcountqi2
2559 mov __tmp_reg__, r24
2560 andi r24, 1
2561 lsr __tmp_reg__
2562 lsr __tmp_reg__
2563 adc r24, __zero_reg__
2564 lsr __tmp_reg__
2565 adc r24, __zero_reg__
2566 lsr __tmp_reg__
2567 adc r24, __zero_reg__
2568 lsr __tmp_reg__
2569 adc r24, __zero_reg__
2570 lsr __tmp_reg__
2571 adc r24, __zero_reg__
2572 lsr __tmp_reg__
2573 adc r24, __tmp_reg__
2574 ret
2575 ENDF __popcountqi2
2576 #endif /* defined (L_popcountqi2) */
2577
2578 \f
2579 /**********************************
2580 * Swap bytes
2581 **********************************/
2582
2583 ;; swap two registers with different register number
2584 .macro bswap a, b
2585 eor \a, \b
2586 eor \b, \a
2587 eor \a, \b
2588 .endm
2589
2590 #if defined (L_bswapsi2)
2591 ;; swap bytes
2592 ;; r25:r22 = bswap32 (r25:r22)
2593 DEFUN __bswapsi2
2594 bswap r22, r25
2595 bswap r23, r24
2596 ret
2597 ENDF __bswapsi2
2598 #endif /* defined (L_bswapsi2) */
2599
2600 #if defined (L_bswapdi2)
2601 ;; swap bytes
2602 ;; r25:r18 = bswap64 (r25:r18)
2603 DEFUN __bswapdi2
2604 bswap r18, r25
2605 bswap r19, r24
2606 bswap r20, r23
2607 bswap r21, r22
2608 ret
2609 ENDF __bswapdi2
2610 #endif /* defined (L_bswapdi2) */
2611
2612 \f
2613 /**********************************
2614 * 64-bit shifts
2615 **********************************/
2616
2617 #if defined (L_ashrdi3)
2618 ;; Arithmetic shift right
2619 ;; r25:r18 = ashr64 (r25:r18, r17:r16)
2620 DEFUN __ashrdi3
2621 push r16
2622 andi r16, 63
2623 breq 2f
2624 1: asr r25
2625 ror r24
2626 ror r23
2627 ror r22
2628 ror r21
2629 ror r20
2630 ror r19
2631 ror r18
2632 dec r16
2633 brne 1b
2634 2: pop r16
2635 ret
2636 ENDF __ashrdi3
2637 #endif /* defined (L_ashrdi3) */
2638
2639 #if defined (L_lshrdi3)
2640 ;; Logic shift right
2641 ;; r25:r18 = lshr64 (r25:r18, r17:r16)
2642 DEFUN __lshrdi3
2643 push r16
2644 andi r16, 63
2645 breq 2f
2646 1: lsr r25
2647 ror r24
2648 ror r23
2649 ror r22
2650 ror r21
2651 ror r20
2652 ror r19
2653 ror r18
2654 dec r16
2655 brne 1b
2656 2: pop r16
2657 ret
2658 ENDF __lshrdi3
2659 #endif /* defined (L_lshrdi3) */
2660
2661 #if defined (L_ashldi3)
2662 ;; Shift left
2663 ;; r25:r18 = ashl64 (r25:r18, r17:r16)
2664 DEFUN __ashldi3
2665 push r16
2666 andi r16, 63
2667 breq 2f
2668 1: lsl r18
2669 rol r19
2670 rol r20
2671 rol r21
2672 rol r22
2673 rol r23
2674 rol r24
2675 rol r25
2676 dec r16
2677 brne 1b
2678 2: pop r16
2679 ret
2680 ENDF __ashldi3
2681 #endif /* defined (L_ashldi3) */
2682
2683 #if defined (L_rotldi3)
2684 ;; Shift left
2685 ;; r25:r18 = rotl64 (r25:r18, r17:r16)
2686 DEFUN __rotldi3
2687 push r16
2688 andi r16, 63
2689 breq 2f
2690 1: lsl r18
2691 rol r19
2692 rol r20
2693 rol r21
2694 rol r22
2695 rol r23
2696 rol r24
2697 rol r25
2698 adc r18, __zero_reg__
2699 dec r16
2700 brne 1b
2701 2: pop r16
2702 ret
2703 ENDF __rotldi3
2704 #endif /* defined (L_rotldi3) */
2705
2706 \f
2707 .section .text.libgcc.fmul, "ax", @progbits
2708
2709 /***********************************************************/
2710 ;;; Softmul versions of FMUL, FMULS and FMULSU to implement
2711 ;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
2712 /***********************************************************/
2713
2714 #define A1 24
2715 #define B1 25
2716 #define C0 22
2717 #define C1 23
2718 #define A0 __tmp_reg__
2719
2720 #ifdef L_fmuls
2721 ;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
2722 ;;; Clobbers: r24, r25, __tmp_reg__
2723 DEFUN __fmuls
2724 ;; A0.7 = negate result?
2725 mov A0, A1
2726 eor A0, B1
2727 ;; B1 = |B1|
2728 sbrc B1, 7
2729 neg B1
2730 XJMP __fmulsu_exit
2731 ENDF __fmuls
2732 #endif /* L_fmuls */
2733
2734 #ifdef L_fmulsu
2735 ;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
2736 ;;; Clobbers: r24, r25, __tmp_reg__
2737 DEFUN __fmulsu
2738 ;; A0.7 = negate result?
2739 mov A0, A1
2740 ;; FALLTHRU
2741 ENDF __fmulsu
2742
2743 ;; Helper for __fmuls and __fmulsu
2744 DEFUN __fmulsu_exit
2745 ;; A1 = |A1|
2746 sbrc A1, 7
2747 neg A1
2748 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2749 ;; Some cores have problem skipping 2-word instruction
2750 tst A0
2751 brmi 1f
2752 #else
2753 sbrs A0, 7
2754 #endif /* __AVR_HAVE_JMP_CALL__ */
2755 XJMP __fmul
2756 1: XCALL __fmul
2757 ;; C = -C iff A0.7 = 1
2758 com C1
2759 neg C0
2760 sbci C1, -1
2761 ret
2762 ENDF __fmulsu_exit
2763 #endif /* L_fmulsu */
2764
2765
2766 #ifdef L_fmul
2767 ;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
2768 ;;; Clobbers: r24, r25, __tmp_reg__
2769 DEFUN __fmul
2770 ; clear result
2771 clr C0
2772 clr C1
2773 clr A0
2774 1: tst B1
2775 ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
2776 2: brpl 3f
2777 ;; C += A
2778 add C0, A0
2779 adc C1, A1
2780 3: ;; A >>= 1
2781 lsr A1
2782 ror A0
2783 ;; B <<= 1
2784 lsl B1
2785 brne 2b
2786 ret
2787 ENDF __fmul
2788 #endif /* L_fmul */
2789
2790 #undef A0
2791 #undef A1
2792 #undef B1
2793 #undef C0
2794 #undef C1