]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgcc/config/avr/lib1funcs.S
Update copyright years.
[thirdparty/gcc.git] / libgcc / config / avr / lib1funcs.S
1 /* -*- Mode: Asm -*- */
2 /* Copyright (C) 1998-2023 Free Software Foundation, Inc.
3 Contributed by Denis Chertykov <chertykov@gmail.com>
4
5 This file is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 3, or (at your option) any
8 later version.
9
10 This file is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24 #if defined (__AVR_TINY__)
25 #define __zero_reg__ r17
26 #define __tmp_reg__ r16
27 #else
28 #define __zero_reg__ r1
29 #define __tmp_reg__ r0
30 #endif
31 #define __SREG__ 0x3f
32 #if defined (__AVR_HAVE_SPH__)
33 #define __SP_H__ 0x3e
34 #endif
35 #define __SP_L__ 0x3d
36 #define __RAMPZ__ 0x3B
37 #define __EIND__ 0x3C
38
39 /* Most of the functions here are called directly from avr.md
40 patterns, instead of using the standard libcall mechanisms.
41 This can make better code because GCC knows exactly which
42 of the call-used registers (not all of them) are clobbered. */
43
44 /* FIXME: At present, there is no SORT directive in the linker
45 script so that we must not assume that different modules
46 in the same input section like .libgcc.text.mul will be
47 located close together. Therefore, we cannot use
48 RCALL/RJMP to call a function like __udivmodhi4 from
49 __divmodhi4 and have to use lengthy XCALL/XJMP even
50 though they are in the same input section and all same
51 input sections together are small enough to reach every
52 location with a RCALL/RJMP instruction. */
53
54 #if defined (__AVR_HAVE_EIJMP_EICALL__) && !defined (__AVR_HAVE_ELPMX__)
55 #error device not supported
56 #endif
57
58 .macro mov_l r_dest, r_src
59 #if defined (__AVR_HAVE_MOVW__)
60 movw \r_dest, \r_src
61 #else
62 mov \r_dest, \r_src
63 #endif
64 .endm
65
66 .macro mov_h r_dest, r_src
67 #if defined (__AVR_HAVE_MOVW__)
68 ; empty
69 #else
70 mov \r_dest, \r_src
71 #endif
72 .endm
73
74 .macro wmov r_dest, r_src
75 #if defined (__AVR_HAVE_MOVW__)
76 movw \r_dest, \r_src
77 #else
78 mov \r_dest, \r_src
79 mov \r_dest+1, \r_src+1
80 #endif
81 .endm
82
83 #if defined (__AVR_HAVE_JMP_CALL__)
84 #define XCALL call
85 #define XJMP jmp
86 #else
87 #define XCALL rcall
88 #define XJMP rjmp
89 #endif
90
91 #if defined (__AVR_HAVE_EIJMP_EICALL__)
92 #define XICALL eicall
93 #define XIJMP eijmp
94 #else
95 #define XICALL icall
96 #define XIJMP ijmp
97 #endif
98
99 ;; Prologue stuff
100
101 .macro do_prologue_saves n_pushed n_frame=0
102 ldi r26, lo8(\n_frame)
103 ldi r27, hi8(\n_frame)
104 ldi r30, lo8(gs(.L_prologue_saves.\@))
105 ldi r31, hi8(gs(.L_prologue_saves.\@))
106 XJMP __prologue_saves__ + ((18 - (\n_pushed)) * 2)
107 .L_prologue_saves.\@:
108 .endm
109
110 ;; Epilogue stuff
111
112 .macro do_epilogue_restores n_pushed n_frame=0
113 in r28, __SP_L__
114 #ifdef __AVR_HAVE_SPH__
115 in r29, __SP_H__
116 .if \n_frame > 63
117 subi r28, lo8(-\n_frame)
118 sbci r29, hi8(-\n_frame)
119 .elseif \n_frame > 0
120 adiw r28, \n_frame
121 .endif
122 #else
123 clr r29
124 .if \n_frame > 0
125 subi r28, lo8(-\n_frame)
126 .endif
127 #endif /* HAVE SPH */
128 ldi r30, \n_pushed
129 XJMP __epilogue_restores__ + ((18 - (\n_pushed)) * 2)
130 .endm
131
132 ;; Support function entry and exit for convenience
133
134 .macro wsubi r_arg1, i_arg2
135 #if defined (__AVR_TINY__)
136 subi \r_arg1, lo8(\i_arg2)
137 sbci \r_arg1+1, hi8(\i_arg2)
138 #else
139 sbiw \r_arg1, \i_arg2
140 #endif
141 .endm
142
143 .macro waddi r_arg1, i_arg2
144 #if defined (__AVR_TINY__)
145 subi \r_arg1, lo8(-\i_arg2)
146 sbci \r_arg1+1, hi8(-\i_arg2)
147 #else
148 adiw \r_arg1, \i_arg2
149 #endif
150 .endm
151
152 .macro DEFUN name
153 .global \name
154 .func \name
155 \name:
156 .endm
157
158 .macro ENDF name
159 .size \name, .-\name
160 .endfunc
161 .endm
162
163 .macro FALIAS name
164 .global \name
165 .func \name
166 \name:
167 .size \name, .-\name
168 .endfunc
169 .endm
170
171 ;; Skip next instruction, typically a jump target
172 #define skip cpse 16,16
173
174 ;; Negate a 2-byte value held in consecutive registers
175 .macro NEG2 reg
176 com \reg+1
177 neg \reg
178 sbci \reg+1, -1
179 .endm
180
181 ;; Negate a 4-byte value held in consecutive registers
182 ;; Sets the V flag for signed overflow tests if REG >= 16
183 .macro NEG4 reg
184 com \reg+3
185 com \reg+2
186 com \reg+1
187 .if \reg >= 16
188 neg \reg
189 sbci \reg+1, -1
190 sbci \reg+2, -1
191 sbci \reg+3, -1
192 .else
193 com \reg
194 adc \reg, __zero_reg__
195 adc \reg+1, __zero_reg__
196 adc \reg+2, __zero_reg__
197 adc \reg+3, __zero_reg__
198 .endif
199 .endm
200
201 #define exp_lo(N) hlo8 ((N) << 23)
202 #define exp_hi(N) hhi8 ((N) << 23)
203
204 \f
205 .section .text.libgcc.mul, "ax", @progbits
206
207 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
208 /* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */
209 #if !defined (__AVR_HAVE_MUL__)
210 /*******************************************************
211 Multiplication 8 x 8 without MUL
212 *******************************************************/
213 #if defined (L_mulqi3)
214
215 #define r_arg2 r22 /* multiplicand */
216 #define r_arg1 r24 /* multiplier */
217 #define r_res __tmp_reg__ /* result */
218
219 DEFUN __mulqi3
220 clr r_res ; clear result
221 __mulqi3_loop:
222 sbrc r_arg1,0
223 add r_res,r_arg2
224 add r_arg2,r_arg2 ; shift multiplicand
225 breq __mulqi3_exit ; while multiplicand != 0
226 lsr r_arg1 ;
227 brne __mulqi3_loop ; exit if multiplier = 0
228 __mulqi3_exit:
229 mov r_arg1,r_res ; result to return register
230 ret
231 ENDF __mulqi3
232
233 #undef r_arg2
234 #undef r_arg1
235 #undef r_res
236
237 #endif /* defined (L_mulqi3) */
238
239
240 /*******************************************************
241 Widening Multiplication 16 = 8 x 8 without MUL
242 Multiplication 16 x 16 without MUL
243 *******************************************************/
244
245 #define A0 22
246 #define A1 23
247 #define B0 24
248 #define BB0 20
249 #define B1 25
250 ;; Output overlaps input, thus expand result in CC0/1
251 #define C0 24
252 #define C1 25
253 #define CC0 __tmp_reg__
254 #define CC1 21
255
256 #if defined (L_umulqihi3)
257 ;;; R25:R24 = (unsigned int) R22 * (unsigned int) R24
258 ;;; (C1:C0) = (unsigned int) A0 * (unsigned int) B0
259 ;;; Clobbers: __tmp_reg__, R21..R23
260 DEFUN __umulqihi3
261 clr A1
262 clr B1
263 XJMP __mulhi3
264 ENDF __umulqihi3
265 #endif /* L_umulqihi3 */
266
267 #if defined (L_mulqihi3)
268 ;;; R25:R24 = (signed int) R22 * (signed int) R24
269 ;;; (C1:C0) = (signed int) A0 * (signed int) B0
270 ;;; Clobbers: __tmp_reg__, R20..R23
271 DEFUN __mulqihi3
272 ;; Sign-extend B0
273 clr B1
274 sbrc B0, 7
275 com B1
276 ;; The multiplication runs twice as fast if A1 is zero, thus:
277 ;; Zero-extend A0
278 clr A1
279 #ifdef __AVR_HAVE_JMP_CALL__
280 ;; Store B0 * sign of A
281 clr BB0
282 sbrc A0, 7
283 mov BB0, B0
284 call __mulhi3
285 #else /* have no CALL */
286 ;; Skip sign-extension of A if A >= 0
287 ;; Same size as with the first alternative but avoids errata skip
288 ;; and is faster if A >= 0
289 sbrs A0, 7
290 rjmp __mulhi3
291 ;; If A < 0 store B
292 mov BB0, B0
293 rcall __mulhi3
294 #endif /* HAVE_JMP_CALL */
295 ;; 1-extend A after the multiplication
296 sub C1, BB0
297 ret
298 ENDF __mulqihi3
299 #endif /* L_mulqihi3 */
300
301 #if defined (L_mulhi3)
302 ;;; R25:R24 = R23:R22 * R25:R24
303 ;;; (C1:C0) = (A1:A0) * (B1:B0)
304 ;;; Clobbers: __tmp_reg__, R21..R23
305 DEFUN __mulhi3
306
307 ;; Clear result
308 clr CC0
309 clr CC1
310 rjmp 3f
311 1:
312 ;; Bit n of A is 1 --> C += B << n
313 add CC0, B0
314 adc CC1, B1
315 2:
316 lsl B0
317 rol B1
318 3:
319 ;; If B == 0 we are ready
320 wsubi B0, 0
321 breq 9f
322
323 ;; Carry = n-th bit of A
324 lsr A1
325 ror A0
326 ;; If bit n of A is set, then go add B * 2^n to C
327 brcs 1b
328
329 ;; Carry = 0 --> The ROR above acts like CP A0, 0
330 ;; Thus, it is sufficient to CPC the high part to test A against 0
331 cpc A1, __zero_reg__
332 ;; Only proceed if A != 0
333 brne 2b
334 9:
335 ;; Move Result into place
336 mov C0, CC0
337 mov C1, CC1
338 ret
339 ENDF __mulhi3
340 #endif /* L_mulhi3 */
341
342 #undef A0
343 #undef A1
344 #undef B0
345 #undef BB0
346 #undef B1
347 #undef C0
348 #undef C1
349 #undef CC0
350 #undef CC1
351
352 \f
353 #define A0 22
354 #define A1 A0+1
355 #define A2 A0+2
356 #define A3 A0+3
357
358 #define B0 18
359 #define B1 B0+1
360 #define B2 B0+2
361 #define B3 B0+3
362
363 #define CC0 26
364 #define CC1 CC0+1
365 #define CC2 30
366 #define CC3 CC2+1
367
368 #define C0 22
369 #define C1 C0+1
370 #define C2 C0+2
371 #define C3 C0+3
372
373 /*******************************************************
374 Widening Multiplication 32 = 16 x 16 without MUL
375 *******************************************************/
376
377 #if defined (L_umulhisi3)
378 DEFUN __umulhisi3
379 wmov B0, 24
380 ;; Zero-extend B
381 clr B2
382 clr B3
383 ;; Zero-extend A
384 wmov A2, B2
385 XJMP __mulsi3
386 ENDF __umulhisi3
387 #endif /* L_umulhisi3 */
388
389 #if defined (L_mulhisi3)
390 DEFUN __mulhisi3
391 wmov B0, 24
392 ;; Sign-extend B
393 lsl r25
394 sbc B2, B2
395 mov B3, B2
396 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
397 ;; Sign-extend A
398 clr A2
399 sbrc A1, 7
400 com A2
401 mov A3, A2
402 XJMP __mulsi3
403 #else /* no __AVR_ERRATA_SKIP_JMP_CALL__ */
404 ;; Zero-extend A and __mulsi3 will run at least twice as fast
405 ;; compared to a sign-extended A.
406 clr A2
407 clr A3
408 sbrs A1, 7
409 XJMP __mulsi3
410 ;; If A < 0 then perform the B * 0xffff.... before the
411 ;; very multiplication by initializing the high part of the
412 ;; result CC with -B.
413 wmov CC2, A2
414 sub CC2, B0
415 sbc CC3, B1
416 XJMP __mulsi3_helper
417 #endif /* __AVR_ERRATA_SKIP_JMP_CALL__ */
418 ENDF __mulhisi3
419 #endif /* L_mulhisi3 */
420
421
422 /*******************************************************
423 Multiplication 32 x 32 without MUL
424 *******************************************************/
425
426 #if defined (L_mulsi3)
427 DEFUN __mulsi3
428 #if defined (__AVR_TINY__)
429 in r26, __SP_L__ ; safe to use X, as it is CC0/CC1
430 in r27, __SP_H__
431 subi r26, lo8(-3) ; Add 3 to point past return address
432 sbci r27, hi8(-3)
433 push B0 ; save callee saved regs
434 push B1
435 ld B0, X+ ; load from caller stack
436 ld B1, X+
437 ld B2, X+
438 ld B3, X
439 #endif
440 ;; Clear result
441 clr CC2
442 clr CC3
443 ;; FALLTHRU
444 ENDF __mulsi3
445
446 DEFUN __mulsi3_helper
447 clr CC0
448 clr CC1
449 rjmp 3f
450
451 1: ;; If bit n of A is set, then add B * 2^n to the result in CC
452 ;; CC += B
453 add CC0,B0 $ adc CC1,B1 $ adc CC2,B2 $ adc CC3,B3
454
455 2: ;; B <<= 1
456 lsl B0 $ rol B1 $ rol B2 $ rol B3
457
458 3: ;; A >>= 1: Carry = n-th bit of A
459 lsr A3 $ ror A2 $ ror A1 $ ror A0
460
461 brcs 1b
462 ;; Only continue if A != 0
463 sbci A1, 0
464 brne 2b
465 wsubi A2, 0
466 brne 2b
467
468 ;; All bits of A are consumed: Copy result to return register C
469 wmov C0, CC0
470 wmov C2, CC2
471 #if defined (__AVR_TINY__)
472 pop B1 ; restore callee saved regs
473 pop B0
474 #endif /* defined (__AVR_TINY__) */
475
476 ret
477 ENDF __mulsi3_helper
478 #endif /* L_mulsi3 */
479
480 #undef A0
481 #undef A1
482 #undef A2
483 #undef A3
484 #undef B0
485 #undef B1
486 #undef B2
487 #undef B3
488 #undef C0
489 #undef C1
490 #undef C2
491 #undef C3
492 #undef CC0
493 #undef CC1
494 #undef CC2
495 #undef CC3
496
497 #endif /* !defined (__AVR_HAVE_MUL__) */
498 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
499 \f
500 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
501 #if defined (__AVR_HAVE_MUL__)
502 #define A0 26
503 #define B0 18
504 #define C0 22
505
506 #define A1 A0+1
507
508 #define B1 B0+1
509 #define B2 B0+2
510 #define B3 B0+3
511
512 #define C1 C0+1
513 #define C2 C0+2
514 #define C3 C0+3
515
516 /*******************************************************
517 Widening Multiplication 32 = 16 x 16 with MUL
518 *******************************************************/
519
520 #if defined (L_mulhisi3)
521 ;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
522 ;;; C3:C0 = (signed long) A1:A0 * (signed long) B1:B0
523 ;;; Clobbers: __tmp_reg__
524 DEFUN __mulhisi3
525 XCALL __umulhisi3
526 ;; Sign-extend B
527 tst B1
528 brpl 1f
529 sub C2, A0
530 sbc C3, A1
531 1: ;; Sign-extend A
532 XJMP __usmulhisi3_tail
533 ENDF __mulhisi3
534 #endif /* L_mulhisi3 */
535
536 #if defined (L_usmulhisi3)
537 ;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
538 ;;; C3:C0 = (signed long) A1:A0 * (unsigned long) B1:B0
539 ;;; Clobbers: __tmp_reg__
540 DEFUN __usmulhisi3
541 XCALL __umulhisi3
542 ;; FALLTHRU
543 ENDF __usmulhisi3
544
545 DEFUN __usmulhisi3_tail
546 ;; Sign-extend A
547 sbrs A1, 7
548 ret
549 sub C2, B0
550 sbc C3, B1
551 ret
552 ENDF __usmulhisi3_tail
553 #endif /* L_usmulhisi3 */
554
555 #if defined (L_umulhisi3)
556 ;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
557 ;;; C3:C0 = (unsigned long) A1:A0 * (unsigned long) B1:B0
558 ;;; Clobbers: __tmp_reg__
559 DEFUN __umulhisi3
560 mul A0, B0
561 movw C0, r0
562 mul A1, B1
563 movw C2, r0
564 mul A0, B1
565 #ifdef __AVR_HAVE_JMP_CALL__
566 ;; This function is used by many other routines, often multiple times.
567 ;; Therefore, if the flash size is not too limited, avoid the RCALL
568 ;; and inverst 6 Bytes to speed things up.
569 add C1, r0
570 adc C2, r1
571 clr __zero_reg__
572 adc C3, __zero_reg__
573 #else
574 rcall 1f
575 #endif
576 mul A1, B0
577 1: add C1, r0
578 adc C2, r1
579 clr __zero_reg__
580 adc C3, __zero_reg__
581 ret
582 ENDF __umulhisi3
583 #endif /* L_umulhisi3 */
584
585 /*******************************************************
586 Widening Multiplication 32 = 16 x 32 with MUL
587 *******************************************************/
588
589 #if defined (L_mulshisi3)
590 ;;; R25:R22 = (signed long) R27:R26 * R21:R18
591 ;;; (C3:C0) = (signed long) A1:A0 * B3:B0
592 ;;; Clobbers: __tmp_reg__
593 DEFUN __mulshisi3
594 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
595 ;; Some cores have problem skipping 2-word instruction
596 tst A1
597 brmi __mulohisi3
598 #else
599 sbrs A1, 7
600 #endif /* __AVR_HAVE_JMP_CALL__ */
601 XJMP __muluhisi3
602 ;; FALLTHRU
603 ENDF __mulshisi3
604
605 ;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
606 ;;; (C3:C0) = (one-extended long) A1:A0 * B3:B0
607 ;;; Clobbers: __tmp_reg__
608 DEFUN __mulohisi3
609 XCALL __muluhisi3
610 ;; One-extend R27:R26 (A1:A0)
611 sub C2, B0
612 sbc C3, B1
613 ret
614 ENDF __mulohisi3
615 #endif /* L_mulshisi3 */
616
617 #if defined (L_muluhisi3)
618 ;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
619 ;;; (C3:C0) = (unsigned long) A1:A0 * B3:B0
620 ;;; Clobbers: __tmp_reg__
621 DEFUN __muluhisi3
622 XCALL __umulhisi3
623 mul A0, B3
624 add C3, r0
625 mul A1, B2
626 add C3, r0
627 mul A0, B2
628 add C2, r0
629 adc C3, r1
630 clr __zero_reg__
631 ret
632 ENDF __muluhisi3
633 #endif /* L_muluhisi3 */
634
635 /*******************************************************
636 Multiplication 32 x 32 with MUL
637 *******************************************************/
638
639 #if defined (L_mulsi3)
640 ;;; R25:R22 = R25:R22 * R21:R18
641 ;;; (C3:C0) = C3:C0 * B3:B0
642 ;;; Clobbers: R26, R27, __tmp_reg__
643 DEFUN __mulsi3
644 movw A0, C0
645 push C2
646 push C3
647 XCALL __muluhisi3
648 pop A1
649 pop A0
650 ;; A1:A0 now contains the high word of A
651 mul A0, B0
652 add C2, r0
653 adc C3, r1
654 mul A0, B1
655 add C3, r0
656 mul A1, B0
657 add C3, r0
658 clr __zero_reg__
659 ret
660 ENDF __mulsi3
661 #endif /* L_mulsi3 */
662
663 #undef A0
664 #undef A1
665
666 #undef B0
667 #undef B1
668 #undef B2
669 #undef B3
670
671 #undef C0
672 #undef C1
673 #undef C2
674 #undef C3
675
676 #endif /* __AVR_HAVE_MUL__ */
677
678 /*******************************************************
679 Multiplication 24 x 24 with MUL
680 *******************************************************/
681
682 #if defined (L_mulpsi3)
683
684 ;; A[0..2]: In: Multiplicand; Out: Product
685 #define A0 22
686 #define A1 A0+1
687 #define A2 A0+2
688
689 ;; B[0..2]: In: Multiplier
690 #define B0 18
691 #define B1 B0+1
692 #define B2 B0+2
693
694 #if defined (__AVR_HAVE_MUL__)
695
696 ;; C[0..2]: Expand Result
697 #define C0 22
698 #define C1 C0+1
699 #define C2 C0+2
700
701 ;; R24:R22 *= R20:R18
702 ;; Clobbers: r21, r25, r26, r27, __tmp_reg__
703
704 #define AA0 26
705 #define AA2 21
706
707 DEFUN __mulpsi3
708 wmov AA0, A0
709 mov AA2, A2
710 XCALL __umulhisi3
711 mul AA2, B0 $ add C2, r0
712 mul AA0, B2 $ add C2, r0
713 clr __zero_reg__
714 ret
715 ENDF __mulpsi3
716
717 #undef AA2
718 #undef AA0
719
720 #undef C2
721 #undef C1
722 #undef C0
723
724 #else /* !HAVE_MUL */
725 ;; C[0..2]: Expand Result
726 #if defined (__AVR_TINY__)
727 #define C0 16
728 #else
729 #define C0 0
730 #endif /* defined (__AVR_TINY__) */
731 #define C1 C0+1
732 #define C2 21
733
734 ;; R24:R22 *= R20:R18
735 ;; Clobbers: __tmp_reg__, R18, R19, R20, R21
736
737 DEFUN __mulpsi3
738 #if defined (__AVR_TINY__)
739 in r26,__SP_L__
740 in r27,__SP_H__
741 subi r26, lo8(-3) ; Add 3 to point past return address
742 sbci r27, hi8(-3)
743 push B0 ; save callee saved regs
744 push B1
745 ld B0,X+ ; load from caller stack
746 ld B1,X+
747 ld B2,X+
748 #endif /* defined (__AVR_TINY__) */
749
750 ;; C[] = 0
751 clr __tmp_reg__
752 clr C2
753
754 0: ;; Shift N-th Bit of B[] into Carry. N = 24 - Loop
755 LSR B2 $ ror B1 $ ror B0
756
757 ;; If the N-th Bit of B[] was set...
758 brcc 1f
759
760 ;; ...then add A[] * 2^N to the Result C[]
761 ADD C0,A0 $ adc C1,A1 $ adc C2,A2
762
763 1: ;; Multiply A[] by 2
764 LSL A0 $ rol A1 $ rol A2
765
766 ;; Loop until B[] is 0
767 subi B0,0 $ sbci B1,0 $ sbci B2,0
768 brne 0b
769
770 ;; Copy C[] to the return Register A[]
771 wmov A0, C0
772 mov A2, C2
773
774 clr __zero_reg__
775 #if defined (__AVR_TINY__)
776 pop B1
777 pop B0
778 #endif /* (__AVR_TINY__) */
779 ret
780 ENDF __mulpsi3
781
782 #undef C2
783 #undef C1
784 #undef C0
785
786 #endif /* HAVE_MUL */
787
788 #undef B2
789 #undef B1
790 #undef B0
791
792 #undef A2
793 #undef A1
794 #undef A0
795
796 #endif /* L_mulpsi3 */
797
798 #if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__)
799
800 ;; A[0..2]: In: Multiplicand
801 #define A0 22
802 #define A1 A0+1
803 #define A2 A0+2
804
805 ;; BB: In: Multiplier
806 #define BB 25
807
808 ;; C[0..2]: Result
809 #define C0 18
810 #define C1 C0+1
811 #define C2 C0+2
812
813 ;; C[] = A[] * sign_extend (BB)
814 DEFUN __mulsqipsi3
815 mul A0, BB
816 movw C0, r0
817 mul A2, BB
818 mov C2, r0
819 mul A1, BB
820 add C1, r0
821 adc C2, r1
822 clr __zero_reg__
823 sbrs BB, 7
824 ret
825 ;; One-extend BB
826 sub C1, A0
827 sbc C2, A1
828 ret
829 ENDF __mulsqipsi3
830
831 #undef C2
832 #undef C1
833 #undef C0
834
835 #undef BB
836
837 #undef A2
838 #undef A1
839 #undef A0
840
841 #endif /* L_mulsqipsi3 && HAVE_MUL */
842
843 /*******************************************************
844 Multiplication 64 x 64
845 *******************************************************/
846
847 ;; A[] = A[] * B[]
848
849 ;; A[0..7]: In: Multiplicand
850 ;; Out: Product
851 #define A0 18
852 #define A1 A0+1
853 #define A2 A0+2
854 #define A3 A0+3
855 #define A4 A0+4
856 #define A5 A0+5
857 #define A6 A0+6
858 #define A7 A0+7
859
860 ;; B[0..7]: In: Multiplier
861 #define B0 10
862 #define B1 B0+1
863 #define B2 B0+2
864 #define B3 B0+3
865 #define B4 B0+4
866 #define B5 B0+5
867 #define B6 B0+6
868 #define B7 B0+7
869
870 #ifndef __AVR_TINY__
871 #if defined (__AVR_HAVE_MUL__)
872 ;; Define C[] for convenience
873 ;; Notice that parts of C[] overlap A[] respective B[]
874 #define C0 16
875 #define C1 C0+1
876 #define C2 20
877 #define C3 C2+1
878 #define C4 28
879 #define C5 C4+1
880 #define C6 C4+2
881 #define C7 C4+3
882
883 #if defined (L_muldi3)
884
885 ;; A[] *= B[]
886 ;; R25:R18 *= R17:R10
887 ;; Ordinary ABI-Function
888
889 DEFUN __muldi3
890 push r29
891 push r28
892 push r17
893 push r16
894
895 ;; Counting in Words, we have to perform a 4 * 4 Multiplication
896
897 ;; 3 * 0 + 0 * 3
898 mul A7,B0 $ $ mov C7,r0
899 mul A0,B7 $ $ add C7,r0
900 mul A6,B1 $ $ add C7,r0
901 mul A6,B0 $ mov C6,r0 $ add C7,r1
902 mul B6,A1 $ $ add C7,r0
903 mul B6,A0 $ add C6,r0 $ adc C7,r1
904
905 ;; 1 * 2
906 mul A2,B4 $ add C6,r0 $ adc C7,r1
907 mul A3,B4 $ $ add C7,r0
908 mul A2,B5 $ $ add C7,r0
909
910 push A5
911 push A4
912 push B1
913 push B0
914 push A3
915 push A2
916
917 ;; 0 * 0
918 wmov 26, B0
919 XCALL __umulhisi3
920 wmov C0, 22
921 wmov C2, 24
922
923 ;; 0 * 2
924 wmov 26, B4
925 XCALL __umulhisi3 $ wmov C4,22 $ add C6,24 $ adc C7,25
926
927 wmov 26, B2
928 ;; 0 * 1
929 XCALL __muldi3_6
930
931 pop A0
932 pop A1
933 ;; 1 * 1
934 wmov 26, B2
935 XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
936
937 pop r26
938 pop r27
939 ;; 1 * 0
940 XCALL __muldi3_6
941
942 pop A0
943 pop A1
944 ;; 2 * 0
945 XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
946
947 ;; 2 * 1
948 wmov 26, B2
949 XCALL __umulhisi3 $ $ $ add C6,22 $ adc C7,23
950
951 ;; A[] = C[]
952 wmov A0, C0
953 ;; A2 = C2 already
954 wmov A4, C4
955 wmov A6, C6
956
957 pop r16
958 pop r17
959 pop r28
960 pop r29
961 ret
962 ENDF __muldi3
963 #endif /* L_muldi3 */
964
965 #if defined (L_muldi3_6)
966 ;; A helper for some 64-bit multiplications with MUL available
967 DEFUN __muldi3_6
968 __muldi3_6:
969 XCALL __umulhisi3
970 add C2, 22
971 adc C3, 23
972 adc C4, 24
973 adc C5, 25
974 brcc 0f
975 adiw C6, 1
976 0: ret
977 ENDF __muldi3_6
978 #endif /* L_muldi3_6 */
979
980 #undef C7
981 #undef C6
982 #undef C5
983 #undef C4
984 #undef C3
985 #undef C2
986 #undef C1
987 #undef C0
988
989 #else /* !HAVE_MUL */
990
991 #if defined (L_muldi3)
992
993 #define C0 26
994 #define C1 C0+1
995 #define C2 C0+2
996 #define C3 C0+3
997 #define C4 C0+4
998 #define C5 C0+5
999 #define C6 0
1000 #define C7 C6+1
1001
1002 #define Loop 9
1003
1004 ;; A[] *= B[]
1005 ;; R25:R18 *= R17:R10
1006 ;; Ordinary ABI-Function
1007
1008 DEFUN __muldi3
1009 push r29
1010 push r28
1011 push Loop
1012
1013 ldi C0, 64
1014 mov Loop, C0
1015
1016 ;; C[] = 0
1017 clr __tmp_reg__
1018 wmov C0, 0
1019 wmov C2, 0
1020 wmov C4, 0
1021
1022 0: ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[]
1023 ;; where N = 64 - Loop.
1024 ;; Notice that B[] = B[] >>> 64 so after this Routine has finished,
1025 ;; B[] will have its initial Value again.
1026 LSR B7 $ ror B6 $ ror B5 $ ror B4
1027 ror B3 $ ror B2 $ ror B1 $ ror B0
1028
1029 ;; If the N-th Bit of B[] was set then...
1030 brcc 1f
1031 ;; ...finish Rotation...
1032 ori B7, 1 << 7
1033
1034 ;; ...and add A[] * 2^N to the Result C[]
1035 ADD C0,A0 $ adc C1,A1 $ adc C2,A2 $ adc C3,A3
1036 adc C4,A4 $ adc C5,A5 $ adc C6,A6 $ adc C7,A7
1037
1038 1: ;; Multiply A[] by 2
1039 LSL A0 $ rol A1 $ rol A2 $ rol A3
1040 rol A4 $ rol A5 $ rol A6 $ rol A7
1041
1042 dec Loop
1043 brne 0b
1044
1045 ;; We expanded the Result in C[]
1046 ;; Copy Result to the Return Register A[]
1047 wmov A0, C0
1048 wmov A2, C2
1049 wmov A4, C4
1050 wmov A6, C6
1051
1052 clr __zero_reg__
1053 pop Loop
1054 pop r28
1055 pop r29
1056 ret
1057 ENDF __muldi3
1058
1059 #undef Loop
1060
1061 #undef C7
1062 #undef C6
1063 #undef C5
1064 #undef C4
1065 #undef C3
1066 #undef C2
1067 #undef C1
1068 #undef C0
1069
1070 #endif /* L_muldi3 */
1071 #endif /* HAVE_MUL */
1072 #endif /* if not __AVR_TINY__ */
1073
1074 #undef B7
1075 #undef B6
1076 #undef B5
1077 #undef B4
1078 #undef B3
1079 #undef B2
1080 #undef B1
1081 #undef B0
1082
1083 #undef A7
1084 #undef A6
1085 #undef A5
1086 #undef A4
1087 #undef A3
1088 #undef A2
1089 #undef A1
1090 #undef A0
1091
1092 /*******************************************************
1093 Widening Multiplication 64 = 32 x 32 with MUL
1094 *******************************************************/
1095
1096 #if defined (__AVR_HAVE_MUL__)
1097 #define A0 r22
1098 #define A1 r23
1099 #define A2 r24
1100 #define A3 r25
1101
1102 #define B0 r18
1103 #define B1 r19
1104 #define B2 r20
1105 #define B3 r21
1106
1107 #define C0 18
1108 #define C1 C0+1
1109 #define C2 20
1110 #define C3 C2+1
1111 #define C4 28
1112 #define C5 C4+1
1113 #define C6 C4+2
1114 #define C7 C4+3
1115
1116 #if defined (L_umulsidi3)
1117
1118 ;; Unsigned widening 64 = 32 * 32 Multiplication with MUL
1119
1120 ;; R18[8] = R22[4] * R18[4]
1121 ;;
1122 ;; Ordinary ABI Function, but additionally sets
1123 ;; X = R20[2] = B2[2]
1124 ;; Z = R22[2] = A0[2]
1125 DEFUN __umulsidi3
1126 clt
1127 ;; FALLTHRU
1128 ENDF __umulsidi3
1129 ;; T = sign (A)
1130 DEFUN __umulsidi3_helper
1131 push 29 $ push 28 ; Y
1132 wmov 30, A2
1133 ;; Counting in Words, we have to perform 4 Multiplications
1134 ;; 0 * 0
1135 wmov 26, A0
1136 XCALL __umulhisi3
1137 push 23 $ push 22 ; C0
1138 wmov 28, B0
1139 wmov 18, B2
1140 wmov C2, 24
1141 push 27 $ push 26 ; A0
1142 push 19 $ push 18 ; B2
1143 ;;
1144 ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
1145 ;; B2 C2 -- -- -- B0 A2
1146 ;; 1 * 1
1147 wmov 26, 30 ; A2
1148 XCALL __umulhisi3
1149 ;; Sign-extend A. T holds the sign of A
1150 brtc 0f
1151 ;; Subtract B from the high part of the result
1152 sub 22, 28
1153 sbc 23, 29
1154 sbc 24, 18
1155 sbc 25, 19
1156 0: wmov 18, 28 ;; B0
1157 wmov C4, 22
1158 wmov C6, 24
1159 ;;
1160 ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
1161 ;; B0 C2 -- -- A2 C4 C6
1162 ;;
1163 ;; 1 * 0
1164 XCALL __muldi3_6
1165 ;; 0 * 1
1166 pop 26 $ pop 27 ;; B2
1167 pop 18 $ pop 19 ;; A0
1168 XCALL __muldi3_6
1169
1170 ;; Move result C into place and save A0 in Z
1171 wmov 22, C4
1172 wmov 24, C6
1173 wmov 30, 18 ; A0
1174 pop C0 $ pop C1
1175
1176 ;; Epilogue
1177 pop 28 $ pop 29 ;; Y
1178 ret
1179 ENDF __umulsidi3_helper
1180 #endif /* L_umulsidi3 */
1181
1182
1183 #if defined (L_mulsidi3)
1184
1185 ;; Signed widening 64 = 32 * 32 Multiplication
1186 ;;
1187 ;; R18[8] = R22[4] * R18[4]
1188 ;; Ordinary ABI Function
1189 DEFUN __mulsidi3
1190 bst A3, 7
1191 sbrs B3, 7 ; Enhanced core has no skip bug
1192 XJMP __umulsidi3_helper
1193
1194 ;; B needs sign-extension
1195 push A3
1196 push A2
1197 XCALL __umulsidi3_helper
1198 ;; A0 survived in Z
1199 sub r22, r30
1200 sbc r23, r31
1201 pop r26
1202 pop r27
1203 sbc r24, r26
1204 sbc r25, r27
1205 ret
1206 ENDF __mulsidi3
1207 #endif /* L_mulsidi3 */
1208
1209 #undef A0
1210 #undef A1
1211 #undef A2
1212 #undef A3
1213 #undef B0
1214 #undef B1
1215 #undef B2
1216 #undef B3
1217 #undef C0
1218 #undef C1
1219 #undef C2
1220 #undef C3
1221 #undef C4
1222 #undef C5
1223 #undef C6
1224 #undef C7
1225 #endif /* HAVE_MUL */
1226
1227 /**********************************************************
1228 Widening Multiplication 64 = 32 x 32 without MUL
1229 **********************************************************/
1230 #ifndef __AVR_TINY__ /* if not __AVR_TINY__ */
1231 #if defined (L_mulsidi3) && !defined (__AVR_HAVE_MUL__)
1232 #define A0 18
1233 #define A1 A0+1
1234 #define A2 A0+2
1235 #define A3 A0+3
1236 #define A4 A0+4
1237 #define A5 A0+5
1238 #define A6 A0+6
1239 #define A7 A0+7
1240
1241 #define B0 10
1242 #define B1 B0+1
1243 #define B2 B0+2
1244 #define B3 B0+3
1245 #define B4 B0+4
1246 #define B5 B0+5
1247 #define B6 B0+6
1248 #define B7 B0+7
1249
1250 #define AA0 22
1251 #define AA1 AA0+1
1252 #define AA2 AA0+2
1253 #define AA3 AA0+3
1254
1255 #define BB0 18
1256 #define BB1 BB0+1
1257 #define BB2 BB0+2
1258 #define BB3 BB0+3
1259
1260 #define Mask r30
1261
1262 ;; Signed / Unsigned widening 64 = 32 * 32 Multiplication without MUL
1263 ;;
1264 ;; R18[8] = R22[4] * R18[4]
1265 ;; Ordinary ABI Function
1266 DEFUN __mulsidi3
1267 set
1268 skip
1269 ;; FALLTHRU
1270 ENDF __mulsidi3
1271
1272 DEFUN __umulsidi3
1273 clt ; skipped
1274 ;; Save 10 Registers: R10..R17, R28, R29
1275 do_prologue_saves 10
1276 ldi Mask, 0xff
1277 bld Mask, 7
1278 ;; Move B into place...
1279 wmov B0, BB0
1280 wmov B2, BB2
1281 ;; ...and extend it
1282 and BB3, Mask
1283 lsl BB3
1284 sbc B4, B4
1285 mov B5, B4
1286 wmov B6, B4
1287 ;; Move A into place...
1288 wmov A0, AA0
1289 wmov A2, AA2
1290 ;; ...and extend it
1291 and AA3, Mask
1292 lsl AA3
1293 sbc A4, A4
1294 mov A5, A4
1295 wmov A6, A4
1296 XCALL __muldi3
1297 do_epilogue_restores 10
1298 ENDF __umulsidi3
1299
1300 #undef A0
1301 #undef A1
1302 #undef A2
1303 #undef A3
1304 #undef A4
1305 #undef A5
1306 #undef A6
1307 #undef A7
1308 #undef B0
1309 #undef B1
1310 #undef B2
1311 #undef B3
1312 #undef B4
1313 #undef B5
1314 #undef B6
1315 #undef B7
1316 #undef AA0
1317 #undef AA1
1318 #undef AA2
1319 #undef AA3
1320 #undef BB0
1321 #undef BB1
1322 #undef BB2
1323 #undef BB3
1324 #undef Mask
1325 #endif /* L_mulsidi3 && !HAVE_MUL */
1326 #endif /* if not __AVR_TINY__ */
1327 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1328
1329 \f
1330 .section .text.libgcc.div, "ax", @progbits
1331
1332 /*******************************************************
1333 Division 8 / 8 => (result + remainder)
1334 *******************************************************/
1335 #define r_rem r25 /* remainder */
1336 #define r_arg1 r24 /* dividend, quotient */
1337 #define r_arg2 r22 /* divisor */
1338 #define r_cnt r23 /* loop count */
1339
1340 #if defined (L_udivmodqi4)
1341 DEFUN __udivmodqi4
1342 sub r_rem,r_rem ; clear remainder and carry
1343 ldi r_cnt,9 ; init loop counter
1344 rjmp __udivmodqi4_ep ; jump to entry point
1345 __udivmodqi4_loop:
1346 rol r_rem ; shift dividend into remainder
1347 cp r_rem,r_arg2 ; compare remainder & divisor
1348 brcs __udivmodqi4_ep ; remainder <= divisor
1349 sub r_rem,r_arg2 ; restore remainder
1350 __udivmodqi4_ep:
1351 rol r_arg1 ; shift dividend (with CARRY)
1352 dec r_cnt ; decrement loop counter
1353 brne __udivmodqi4_loop
1354 com r_arg1 ; complement result
1355 ; because C flag was complemented in loop
1356 ret
1357 ENDF __udivmodqi4
1358 #endif /* defined (L_udivmodqi4) */
1359
1360 #if defined (L_divmodqi4)
1361 DEFUN __divmodqi4
1362 bst r_arg1,7 ; store sign of dividend
1363 mov __tmp_reg__,r_arg1
1364 eor __tmp_reg__,r_arg2; r0.7 is sign of result
1365 sbrc r_arg1,7
1366 neg r_arg1 ; dividend negative : negate
1367 sbrc r_arg2,7
1368 neg r_arg2 ; divisor negative : negate
1369 XCALL __udivmodqi4 ; do the unsigned div/mod
1370 brtc __divmodqi4_1
1371 neg r_rem ; correct remainder sign
1372 __divmodqi4_1:
1373 sbrc __tmp_reg__,7
1374 neg r_arg1 ; correct result sign
1375 __divmodqi4_exit:
1376 ret
1377 ENDF __divmodqi4
1378 #endif /* defined (L_divmodqi4) */
1379
1380 #undef r_rem
1381 #undef r_arg1
1382 #undef r_arg2
1383 #undef r_cnt
1384
1385
1386 /*******************************************************
1387 Division 16 / 16 => (result + remainder)
1388 *******************************************************/
1389 #define r_remL r26 /* remainder Low */
1390 #define r_remH r27 /* remainder High */
1391
1392 /* return: remainder */
1393 #define r_arg1L r24 /* dividend Low */
1394 #define r_arg1H r25 /* dividend High */
1395
1396 /* return: quotient */
1397 #define r_arg2L r22 /* divisor Low */
1398 #define r_arg2H r23 /* divisor High */
1399
1400 #define r_cnt r21 /* loop count */
1401
1402 #if defined (L_udivmodhi4)
1403 DEFUN __udivmodhi4
1404 sub r_remL,r_remL
1405 sub r_remH,r_remH ; clear remainder and carry
1406 ldi r_cnt,17 ; init loop counter
1407 rjmp __udivmodhi4_ep ; jump to entry point
1408 __udivmodhi4_loop:
1409 rol r_remL ; shift dividend into remainder
1410 rol r_remH
1411 cp r_remL,r_arg2L ; compare remainder & divisor
1412 cpc r_remH,r_arg2H
1413 brcs __udivmodhi4_ep ; remainder < divisor
1414 sub r_remL,r_arg2L ; restore remainder
1415 sbc r_remH,r_arg2H
1416 __udivmodhi4_ep:
1417 rol r_arg1L ; shift dividend (with CARRY)
1418 rol r_arg1H
1419 dec r_cnt ; decrement loop counter
1420 brne __udivmodhi4_loop
1421 com r_arg1L
1422 com r_arg1H
1423 ; div/mod results to return registers, as for the div() function
1424 mov_l r_arg2L, r_arg1L ; quotient
1425 mov_h r_arg2H, r_arg1H
1426 mov_l r_arg1L, r_remL ; remainder
1427 mov_h r_arg1H, r_remH
1428 ret
1429 ENDF __udivmodhi4
1430 #endif /* defined (L_udivmodhi4) */
1431
1432 #if defined (L_divmodhi4)
1433 DEFUN __divmodhi4
1434 .global _div
1435 _div:
1436 bst r_arg1H,7 ; store sign of dividend
1437 mov __tmp_reg__,r_arg2H
1438 brtc 0f
1439 com __tmp_reg__ ; r0.7 is sign of result
1440 rcall __divmodhi4_neg1 ; dividend negative: negate
1441 0:
1442 sbrc r_arg2H,7
1443 rcall __divmodhi4_neg2 ; divisor negative: negate
1444 XCALL __udivmodhi4 ; do the unsigned div/mod
1445 sbrc __tmp_reg__,7
1446 rcall __divmodhi4_neg2 ; correct remainder sign
1447 brtc __divmodhi4_exit
1448 __divmodhi4_neg1:
1449 ;; correct dividend/remainder sign
1450 com r_arg1H
1451 neg r_arg1L
1452 sbci r_arg1H,0xff
1453 ret
1454 __divmodhi4_neg2:
1455 ;; correct divisor/result sign
1456 com r_arg2H
1457 neg r_arg2L
1458 sbci r_arg2H,0xff
1459 __divmodhi4_exit:
1460 ret
1461 ENDF __divmodhi4
1462 #endif /* defined (L_divmodhi4) */
1463
1464 #undef r_remH
1465 #undef r_remL
1466
1467 #undef r_arg1H
1468 #undef r_arg1L
1469
1470 #undef r_arg2H
1471 #undef r_arg2L
1472
1473 #undef r_cnt
1474
1475 /*******************************************************
1476 Division 24 / 24 => (result + remainder)
1477 *******************************************************/
1478
1479 ;; A[0..2]: In: Dividend; Out: Quotient
1480 #define A0 22
1481 #define A1 A0+1
1482 #define A2 A0+2
1483
1484 ;; B[0..2]: In: Divisor; Out: Remainder
1485 #define B0 18
1486 #define B1 B0+1
1487 #define B2 B0+2
1488
1489 ;; C[0..2]: Expand remainder
1490 #define C0 __zero_reg__
1491 #define C1 26
1492 #define C2 25
1493
1494 ;; Loop counter
1495 #define r_cnt 21
1496
1497 #if defined (L_udivmodpsi4)
1498 ;; R24:R22 = R24:R24 udiv R20:R18
1499 ;; R20:R18 = R24:R22 umod R20:R18
1500 ;; Clobbers: R21, R25, R26
1501
1502 DEFUN __udivmodpsi4
1503 ; init loop counter
1504 ldi r_cnt, 24+1
1505 ; Clear remainder and carry. C0 is already 0
1506 clr C1
1507 sub C2, C2
1508 ; jump to entry point
1509 rjmp __udivmodpsi4_start
1510 __udivmodpsi4_loop:
1511 ; shift dividend into remainder
1512 rol C0
1513 rol C1
1514 rol C2
1515 ; compare remainder & divisor
1516 cp C0, B0
1517 cpc C1, B1
1518 cpc C2, B2
1519 brcs __udivmodpsi4_start ; remainder <= divisor
1520 sub C0, B0 ; restore remainder
1521 sbc C1, B1
1522 sbc C2, B2
1523 __udivmodpsi4_start:
1524 ; shift dividend (with CARRY)
1525 rol A0
1526 rol A1
1527 rol A2
1528 ; decrement loop counter
1529 dec r_cnt
1530 brne __udivmodpsi4_loop
1531 com A0
1532 com A1
1533 com A2
1534 ; div/mod results to return registers
1535 ; remainder
1536 mov B0, C0
1537 mov B1, C1
1538 mov B2, C2
1539 clr __zero_reg__ ; C0
1540 ret
1541 ENDF __udivmodpsi4
1542 #endif /* defined (L_udivmodpsi4) */
1543
1544 #if defined (L_divmodpsi4)
1545 ;; R24:R22 = R24:R22 div R20:R18
1546 ;; R20:R18 = R24:R22 mod R20:R18
1547 ;; Clobbers: T, __tmp_reg__, R21, R25, R26
1548
1549 DEFUN __divmodpsi4
1550 ; R0.7 will contain the sign of the result:
1551 ; R0.7 = A.sign ^ B.sign
1552 mov __tmp_reg__, B2
1553 ; T-flag = sign of dividend
1554 bst A2, 7
1555 brtc 0f
1556 com __tmp_reg__
1557 ; Adjust dividend's sign
1558 rcall __divmodpsi4_negA
1559 0:
1560 ; Adjust divisor's sign
1561 sbrc B2, 7
1562 rcall __divmodpsi4_negB
1563
1564 ; Do the unsigned div/mod
1565 XCALL __udivmodpsi4
1566
1567 ; Adjust quotient's sign
1568 sbrc __tmp_reg__, 7
1569 rcall __divmodpsi4_negA
1570
1571 ; Adjust remainder's sign
1572 brtc __divmodpsi4_end
1573
1574 __divmodpsi4_negB:
1575 ; Correct divisor/remainder sign
1576 com B2
1577 com B1
1578 neg B0
1579 sbci B1, -1
1580 sbci B2, -1
1581 ret
1582
1583 ; Correct dividend/quotient sign
1584 __divmodpsi4_negA:
1585 com A2
1586 com A1
1587 neg A0
1588 sbci A1, -1
1589 sbci A2, -1
1590 __divmodpsi4_end:
1591 ret
1592
1593 ENDF __divmodpsi4
1594 #endif /* defined (L_divmodpsi4) */
1595
1596 #undef A0
1597 #undef A1
1598 #undef A2
1599
1600 #undef B0
1601 #undef B1
1602 #undef B2
1603
1604 #undef C0
1605 #undef C1
1606 #undef C2
1607
1608 #undef r_cnt
1609
1610 /*******************************************************
1611 Division 32 / 32 => (result + remainder)
1612 *******************************************************/
1613 #define r_remHH r31 /* remainder High */
1614 #define r_remHL r30
1615 #define r_remH r27
1616 #define r_remL r26 /* remainder Low */
1617
1618 /* return: remainder */
1619 #define r_arg1HH r25 /* dividend High */
1620 #define r_arg1HL r24
1621 #define r_arg1H r23
1622 #define r_arg1L r22 /* dividend Low */
1623
1624 /* return: quotient */
1625 #define r_arg2HH r21 /* divisor High */
1626 #define r_arg2HL r20
1627 #define r_arg2H r19
1628 #define r_arg2L r18 /* divisor Low */
1629
1630 #define r_cnt __zero_reg__ /* loop count (0 after the loop!) */
1631
1632 #if defined (L_udivmodsi4)
1633 DEFUN __udivmodsi4
1634 ldi r_remL, 33 ; init loop counter
1635 mov r_cnt, r_remL
1636 sub r_remL,r_remL
1637 sub r_remH,r_remH ; clear remainder and carry
1638 mov_l r_remHL, r_remL
1639 mov_h r_remHH, r_remH
1640 rjmp __udivmodsi4_ep ; jump to entry point
1641 __udivmodsi4_loop:
1642 rol r_remL ; shift dividend into remainder
1643 rol r_remH
1644 rol r_remHL
1645 rol r_remHH
1646 cp r_remL,r_arg2L ; compare remainder & divisor
1647 cpc r_remH,r_arg2H
1648 cpc r_remHL,r_arg2HL
1649 cpc r_remHH,r_arg2HH
1650 brcs __udivmodsi4_ep ; remainder <= divisor
1651 sub r_remL,r_arg2L ; restore remainder
1652 sbc r_remH,r_arg2H
1653 sbc r_remHL,r_arg2HL
1654 sbc r_remHH,r_arg2HH
1655 __udivmodsi4_ep:
1656 rol r_arg1L ; shift dividend (with CARRY)
1657 rol r_arg1H
1658 rol r_arg1HL
1659 rol r_arg1HH
1660 dec r_cnt ; decrement loop counter
1661 brne __udivmodsi4_loop
1662 ; __zero_reg__ now restored (r_cnt == 0)
1663 com r_arg1L
1664 com r_arg1H
1665 com r_arg1HL
1666 com r_arg1HH
1667 ; div/mod results to return registers, as for the ldiv() function
1668 mov_l r_arg2L, r_arg1L ; quotient
1669 mov_h r_arg2H, r_arg1H
1670 mov_l r_arg2HL, r_arg1HL
1671 mov_h r_arg2HH, r_arg1HH
1672 mov_l r_arg1L, r_remL ; remainder
1673 mov_h r_arg1H, r_remH
1674 mov_l r_arg1HL, r_remHL
1675 mov_h r_arg1HH, r_remHH
1676 ret
1677 ENDF __udivmodsi4
1678 #endif /* defined (L_udivmodsi4) */
1679
1680 #if defined (L_divmodsi4)
1681 DEFUN __divmodsi4
1682 mov __tmp_reg__,r_arg2HH
1683 bst r_arg1HH,7 ; store sign of dividend
1684 brtc 0f
1685 com __tmp_reg__ ; r0.7 is sign of result
1686 XCALL __negsi2 ; dividend negative: negate
1687 0:
1688 sbrc r_arg2HH,7
1689 rcall __divmodsi4_neg2 ; divisor negative: negate
1690 XCALL __udivmodsi4 ; do the unsigned div/mod
1691 sbrc __tmp_reg__, 7 ; correct quotient sign
1692 rcall __divmodsi4_neg2
1693 brtc __divmodsi4_exit ; correct remainder sign
1694 XJMP __negsi2
1695 __divmodsi4_neg2:
1696 ;; correct divisor/quotient sign
1697 com r_arg2HH
1698 com r_arg2HL
1699 com r_arg2H
1700 neg r_arg2L
1701 sbci r_arg2H,0xff
1702 sbci r_arg2HL,0xff
1703 sbci r_arg2HH,0xff
1704 __divmodsi4_exit:
1705 ret
1706 ENDF __divmodsi4
1707 #endif /* defined (L_divmodsi4) */
1708
1709 #if defined (L_negsi2)
1710 ;; (set (reg:SI 22)
1711 ;; (neg:SI (reg:SI 22)))
1712 ;; Sets the V flag for signed overflow tests
1713 DEFUN __negsi2
1714 NEG4 22
1715 ret
1716 ENDF __negsi2
1717 #endif /* L_negsi2 */
1718
1719 #undef r_remHH
1720 #undef r_remHL
1721 #undef r_remH
1722 #undef r_remL
1723 #undef r_arg1HH
1724 #undef r_arg1HL
1725 #undef r_arg1H
1726 #undef r_arg1L
1727 #undef r_arg2HH
1728 #undef r_arg2HL
1729 #undef r_arg2H
1730 #undef r_arg2L
1731 #undef r_cnt
1732
1733 /* *di routines use registers below R19 and won't work with tiny arch
1734 right now. */
1735
1736 #if !defined (__AVR_TINY__)
1737 /*******************************************************
1738 Division 64 / 64
1739 Modulo 64 % 64
1740 *******************************************************/
1741
1742 ;; Use Speed-optimized Version on "big" Devices, i.e. Devices with
1743 ;; at least 16k of Program Memory. For smaller Devices, depend
1744 ;; on MOVW and SP Size. There is a Connexion between SP Size and
1745 ;; Flash Size so that SP Size can be used to test for Flash Size.
1746
1747 #if defined (__AVR_HAVE_JMP_CALL__)
1748 # define SPEED_DIV 8
1749 #elif defined (__AVR_HAVE_MOVW__) && defined (__AVR_HAVE_SPH__)
1750 # define SPEED_DIV 16
1751 #else
1752 # define SPEED_DIV 0
1753 #endif
1754
1755 ;; A[0..7]: In: Dividend;
1756 ;; Out: Quotient (T = 0)
1757 ;; Out: Remainder (T = 1)
1758 #define A0 18
1759 #define A1 A0+1
1760 #define A2 A0+2
1761 #define A3 A0+3
1762 #define A4 A0+4
1763 #define A5 A0+5
1764 #define A6 A0+6
1765 #define A7 A0+7
1766
1767 ;; B[0..7]: In: Divisor; Out: Clobber
1768 #define B0 10
1769 #define B1 B0+1
1770 #define B2 B0+2
1771 #define B3 B0+3
1772 #define B4 B0+4
1773 #define B5 B0+5
1774 #define B6 B0+6
1775 #define B7 B0+7
1776
1777 ;; C[0..7]: Expand remainder; Out: Remainder (unused)
1778 #define C0 8
1779 #define C1 C0+1
1780 #define C2 30
1781 #define C3 C2+1
1782 #define C4 28
1783 #define C5 C4+1
1784 #define C6 26
1785 #define C7 C6+1
1786
1787 ;; Holds Signs during Division Routine
1788 #define SS __tmp_reg__
1789
1790 ;; Bit-Counter in Division Routine
1791 #define R_cnt __zero_reg__
1792
1793 ;; Scratch Register for Negation
1794 #define NN r31
1795
1796 #if defined (L_udivdi3)
1797
1798 ;; R25:R18 = R24:R18 umod R17:R10
1799 ;; Ordinary ABI-Function
1800
1801 DEFUN __umoddi3
1802 set
1803 rjmp __udivdi3_umoddi3
1804 ENDF __umoddi3
1805
1806 ;; R25:R18 = R24:R18 udiv R17:R10
1807 ;; Ordinary ABI-Function
1808
1809 DEFUN __udivdi3
1810 clt
1811 ENDF __udivdi3
1812
1813 DEFUN __udivdi3_umoddi3
1814 push C0
1815 push C1
1816 push C4
1817 push C5
1818 XCALL __udivmod64
1819 pop C5
1820 pop C4
1821 pop C1
1822 pop C0
1823 ret
1824 ENDF __udivdi3_umoddi3
1825 #endif /* L_udivdi3 */
1826
1827 #if defined (L_udivmod64)
1828
1829 ;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation
1830 ;; No Registers saved/restored; the Callers will take Care.
1831 ;; Preserves B[] and T-flag
1832 ;; T = 0: Compute Quotient in A[]
1833 ;; T = 1: Compute Remainder in A[] and shift SS one Bit left
1834
1835 DEFUN __udivmod64
1836
1837 ;; Clear Remainder (C6, C7 will follow)
1838 clr C0
1839 clr C1
1840 wmov C2, C0
1841 wmov C4, C0
1842 ldi C7, 64
1843
1844 #if SPEED_DIV == 0 || SPEED_DIV == 16
1845 ;; Initialize Loop-Counter
1846 mov R_cnt, C7
1847 wmov C6, C0
1848 #endif /* SPEED_DIV */
1849
1850 #if SPEED_DIV == 8
1851
1852 push A7
1853 clr C6
1854
1855 1: ;; Compare shifted Devidend against Divisor
1856 ;; If -- even after Shifting -- it is smaller...
1857 CP A7,B0 $ cpc C0,B1 $ cpc C1,B2 $ cpc C2,B3
1858 cpc C3,B4 $ cpc C4,B5 $ cpc C5,B6 $ cpc C6,B7
1859 brcc 2f
1860
1861 ;; ...then we can subtract it. Thus, it is legal to shift left
1862 $ mov C6,C5 $ mov C5,C4 $ mov C4,C3
1863 mov C3,C2 $ mov C2,C1 $ mov C1,C0 $ mov C0,A7
1864 mov A7,A6 $ mov A6,A5 $ mov A5,A4 $ mov A4,A3
1865 mov A3,A2 $ mov A2,A1 $ mov A1,A0 $ clr A0
1866
1867 ;; 8 Bits are done
1868 subi C7, 8
1869 brne 1b
1870
1871 ;; Shifted 64 Bits: A7 has traveled to C7
1872 pop C7
1873 ;; Divisor is greater than Dividend. We have:
1874 ;; A[] % B[] = A[]
1875 ;; A[] / B[] = 0
1876 ;; Thus, we can return immediately
1877 rjmp 5f
1878
1879 2: ;; Initialze Bit-Counter with Number of Bits still to be performed
1880 mov R_cnt, C7
1881
1882 ;; Push of A7 is not needed because C7 is still 0
1883 pop C7
1884 clr C7
1885
1886 #elif SPEED_DIV == 16
1887
1888 ;; Compare shifted Dividend against Divisor
1889 cp A7, B3
1890 cpc C0, B4
1891 cpc C1, B5
1892 cpc C2, B6
1893 cpc C3, B7
1894 brcc 2f
1895
1896 ;; Divisor is greater than shifted Dividen: We can shift the Dividend
1897 ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk
1898 wmov C2,A6 $ wmov C0,A4
1899 wmov A6,A2 $ wmov A4,A0
1900 wmov A2,C6 $ wmov A0,C4
1901
1902 ;; Set Bit Counter to 32
1903 lsr R_cnt
1904 2:
1905 #elif SPEED_DIV
1906 #error SPEED_DIV = ?
1907 #endif /* SPEED_DIV */
1908
1909 ;; The very Division + Remainder Routine
1910
1911 3: ;; Left-shift Dividend...
1912 lsl A0 $ rol A1 $ rol A2 $ rol A3
1913 rol A4 $ rol A5 $ rol A6 $ rol A7
1914
1915 ;; ...into Remainder
1916 rol C0 $ rol C1 $ rol C2 $ rol C3
1917 rol C4 $ rol C5 $ rol C6 $ rol C7
1918
1919 ;; Compare Remainder and Divisor
1920 CP C0,B0 $ cpc C1,B1 $ cpc C2,B2 $ cpc C3,B3
1921 cpc C4,B4 $ cpc C5,B5 $ cpc C6,B6 $ cpc C7,B7
1922
1923 brcs 4f
1924
1925 ;; Divisor fits into Remainder: Subtract it from Remainder...
1926 SUB C0,B0 $ sbc C1,B1 $ sbc C2,B2 $ sbc C3,B3
1927 sbc C4,B4 $ sbc C5,B5 $ sbc C6,B6 $ sbc C7,B7
1928
1929 ;; ...and set according Bit in the upcoming Quotient
1930 ;; The Bit will travel to its final Position
1931 ori A0, 1
1932
1933 4: ;; This Bit is done
1934 dec R_cnt
1935 brne 3b
1936 ;; __zero_reg__ is 0 again
1937
1938 ;; T = 0: We are fine with the Quotient in A[]
1939 ;; T = 1: Copy Remainder to A[]
1940 5: brtc 6f
1941 wmov A0, C0
1942 wmov A2, C2
1943 wmov A4, C4
1944 wmov A6, C6
1945 ;; Move the Sign of the Result to SS.7
1946 lsl SS
1947
1948 6: ret
1949
1950 ENDF __udivmod64
1951 #endif /* L_udivmod64 */
1952
1953
1954 #if defined (L_divdi3)
1955
1956 ;; R25:R18 = R24:R18 mod R17:R10
1957 ;; Ordinary ABI-Function
1958
1959 DEFUN __moddi3
1960 set
1961 rjmp __divdi3_moddi3
1962 ENDF __moddi3
1963
1964 ;; R25:R18 = R24:R18 div R17:R10
1965 ;; Ordinary ABI-Function
1966
1967 DEFUN __divdi3
1968 clt
1969 ENDF __divdi3
1970
1971 DEFUN __divdi3_moddi3
1972 #if SPEED_DIV
1973 mov r31, A7
1974 or r31, B7
1975 brmi 0f
1976 ;; Both Signs are 0: the following Complexitiy is not needed
1977 XJMP __udivdi3_umoddi3
1978 #endif /* SPEED_DIV */
1979
1980 0: ;; The Prologue
1981 ;; Save 12 Registers: Y, 17...8
1982 ;; No Frame needed
1983 do_prologue_saves 12
1984
1985 ;; SS.7 will contain the Sign of the Quotient (A.sign * B.sign)
1986 ;; SS.6 will contain the Sign of the Remainder (A.sign)
1987 mov SS, A7
1988 asr SS
1989 ;; Adjust Dividend's Sign as needed
1990 #if SPEED_DIV
1991 ;; Compiling for Speed we know that at least one Sign must be < 0
1992 ;; Thus, if A[] >= 0 then we know B[] < 0
1993 brpl 22f
1994 #else
1995 brpl 21f
1996 #endif /* SPEED_DIV */
1997
1998 XCALL __negdi2
1999
2000 ;; Adjust Divisor's Sign and SS.7 as needed
2001 21: tst B7
2002 brpl 3f
2003 22: ldi NN, 1 << 7
2004 eor SS, NN
2005
2006 ldi NN, -1
2007 com B4 $ com B5 $ com B6 $ com B7
2008 $ com B1 $ com B2 $ com B3
2009 NEG B0
2010 $ sbc B1,NN $ sbc B2,NN $ sbc B3,NN
2011 sbc B4,NN $ sbc B5,NN $ sbc B6,NN $ sbc B7,NN
2012
2013 3: ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag)
2014 XCALL __udivmod64
2015
2016 ;; Adjust Result's Sign
2017 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2018 tst SS
2019 brpl 4f
2020 #else
2021 sbrc SS, 7
2022 #endif /* __AVR_HAVE_JMP_CALL__ */
2023 XCALL __negdi2
2024
2025 4: ;; Epilogue: Restore 12 Registers and return
2026 do_epilogue_restores 12
2027
2028 ENDF __divdi3_moddi3
2029
2030 #endif /* L_divdi3 */
2031
2032 #undef R_cnt
2033 #undef SS
2034 #undef NN
2035
2036 .section .text.libgcc, "ax", @progbits
2037
2038 #define TT __tmp_reg__
2039
2040 #if defined (L_adddi3)
2041 ;; (set (reg:DI 18)
2042 ;; (plus:DI (reg:DI 18)
2043 ;; (reg:DI 10)))
2044 ;; Sets the V flag for signed overflow tests
2045 ;; Sets the C flag for unsigned overflow tests
2046 DEFUN __adddi3
2047 ADD A0,B0 $ adc A1,B1 $ adc A2,B2 $ adc A3,B3
2048 adc A4,B4 $ adc A5,B5 $ adc A6,B6 $ adc A7,B7
2049 ret
2050 ENDF __adddi3
2051 #endif /* L_adddi3 */
2052
2053 #if defined (L_adddi3_s8)
2054 ;; (set (reg:DI 18)
2055 ;; (plus:DI (reg:DI 18)
2056 ;; (sign_extend:SI (reg:QI 26))))
2057 ;; Sets the V flag for signed overflow tests
2058 ;; Sets the C flag for unsigned overflow tests provided 0 <= R26 < 128
2059 DEFUN __adddi3_s8
2060 clr TT
2061 sbrc r26, 7
2062 com TT
2063 ADD A0,r26 $ adc A1,TT $ adc A2,TT $ adc A3,TT
2064 adc A4,TT $ adc A5,TT $ adc A6,TT $ adc A7,TT
2065 ret
2066 ENDF __adddi3_s8
2067 #endif /* L_adddi3_s8 */
2068
2069 #if defined (L_subdi3)
2070 ;; (set (reg:DI 18)
2071 ;; (minus:DI (reg:DI 18)
2072 ;; (reg:DI 10)))
2073 ;; Sets the V flag for signed overflow tests
2074 ;; Sets the C flag for unsigned overflow tests
2075 DEFUN __subdi3
2076 SUB A0,B0 $ sbc A1,B1 $ sbc A2,B2 $ sbc A3,B3
2077 sbc A4,B4 $ sbc A5,B5 $ sbc A6,B6 $ sbc A7,B7
2078 ret
2079 ENDF __subdi3
2080 #endif /* L_subdi3 */
2081
2082 #if defined (L_cmpdi2)
2083 ;; (set (cc0)
2084 ;; (compare (reg:DI 18)
2085 ;; (reg:DI 10)))
2086 DEFUN __cmpdi2
2087 CP A0,B0 $ cpc A1,B1 $ cpc A2,B2 $ cpc A3,B3
2088 cpc A4,B4 $ cpc A5,B5 $ cpc A6,B6 $ cpc A7,B7
2089 ret
2090 ENDF __cmpdi2
2091 #endif /* L_cmpdi2 */
2092
2093 #if defined (L_cmpdi2_s8)
2094 ;; (set (cc0)
2095 ;; (compare (reg:DI 18)
2096 ;; (sign_extend:SI (reg:QI 26))))
2097 DEFUN __cmpdi2_s8
2098 clr TT
2099 sbrc r26, 7
2100 com TT
2101 CP A0,r26 $ cpc A1,TT $ cpc A2,TT $ cpc A3,TT
2102 cpc A4,TT $ cpc A5,TT $ cpc A6,TT $ cpc A7,TT
2103 ret
2104 ENDF __cmpdi2_s8
2105 #endif /* L_cmpdi2_s8 */
2106
2107 #if defined (L_negdi2)
2108 ;; (set (reg:DI 18)
2109 ;; (neg:DI (reg:DI 18)))
2110 ;; Sets the V flag for signed overflow tests
2111 DEFUN __negdi2
2112
2113 com A4 $ com A5 $ com A6 $ com A7
2114 $ com A1 $ com A2 $ com A3
2115 NEG A0
2116 $ sbci A1,-1 $ sbci A2,-1 $ sbci A3,-1
2117 sbci A4,-1 $ sbci A5,-1 $ sbci A6,-1 $ sbci A7,-1
2118 ret
2119
2120 ENDF __negdi2
2121 #endif /* L_negdi2 */
2122
2123 #undef TT
2124
2125 #undef C7
2126 #undef C6
2127 #undef C5
2128 #undef C4
2129 #undef C3
2130 #undef C2
2131 #undef C1
2132 #undef C0
2133
2134 #undef B7
2135 #undef B6
2136 #undef B5
2137 #undef B4
2138 #undef B3
2139 #undef B2
2140 #undef B1
2141 #undef B0
2142
2143 #undef A7
2144 #undef A6
2145 #undef A5
2146 #undef A4
2147 #undef A3
2148 #undef A2
2149 #undef A1
2150 #undef A0
2151
2152 #endif /* !defined (__AVR_TINY__) */
2153
2154 \f
2155 .section .text.libgcc.prologue, "ax", @progbits
2156
2157 /**********************************
2158 * This is a prologue subroutine
2159 **********************************/
2160 #if !defined (__AVR_TINY__)
2161 #if defined (L_prologue)
2162
2163 ;; This function does not clobber T-flag; 64-bit division relies on it
2164 DEFUN __prologue_saves__
2165 push r2
2166 push r3
2167 push r4
2168 push r5
2169 push r6
2170 push r7
2171 push r8
2172 push r9
2173 push r10
2174 push r11
2175 push r12
2176 push r13
2177 push r14
2178 push r15
2179 push r16
2180 push r17
2181 push r28
2182 push r29
2183 #if !defined (__AVR_HAVE_SPH__)
2184 in r28,__SP_L__
2185 sub r28,r26
2186 out __SP_L__,r28
2187 clr r29
2188 #elif defined (__AVR_XMEGA__)
2189 in r28,__SP_L__
2190 in r29,__SP_H__
2191 sub r28,r26
2192 sbc r29,r27
2193 out __SP_L__,r28
2194 out __SP_H__,r29
2195 #else
2196 in r28,__SP_L__
2197 in r29,__SP_H__
2198 sub r28,r26
2199 sbc r29,r27
2200 in __tmp_reg__,__SREG__
2201 cli
2202 out __SP_H__,r29
2203 out __SREG__,__tmp_reg__
2204 out __SP_L__,r28
2205 #endif /* #SP = 8/16 */
2206
2207 XIJMP
2208
2209 ENDF __prologue_saves__
2210 #endif /* defined (L_prologue) */
2211
2212 /*
2213 * This is an epilogue subroutine
2214 */
2215 #if defined (L_epilogue)
2216
2217 DEFUN __epilogue_restores__
2218 ldd r2,Y+18
2219 ldd r3,Y+17
2220 ldd r4,Y+16
2221 ldd r5,Y+15
2222 ldd r6,Y+14
2223 ldd r7,Y+13
2224 ldd r8,Y+12
2225 ldd r9,Y+11
2226 ldd r10,Y+10
2227 ldd r11,Y+9
2228 ldd r12,Y+8
2229 ldd r13,Y+7
2230 ldd r14,Y+6
2231 ldd r15,Y+5
2232 ldd r16,Y+4
2233 ldd r17,Y+3
2234 ldd r26,Y+2
2235 #if !defined (__AVR_HAVE_SPH__)
2236 ldd r29,Y+1
2237 add r28,r30
2238 out __SP_L__,r28
2239 mov r28, r26
2240 #elif defined (__AVR_XMEGA__)
2241 ldd r27,Y+1
2242 add r28,r30
2243 adc r29,__zero_reg__
2244 out __SP_L__,r28
2245 out __SP_H__,r29
2246 wmov 28, 26
2247 #else
2248 ldd r27,Y+1
2249 add r28,r30
2250 adc r29,__zero_reg__
2251 in __tmp_reg__,__SREG__
2252 cli
2253 out __SP_H__,r29
2254 out __SREG__,__tmp_reg__
2255 out __SP_L__,r28
2256 mov_l r28, r26
2257 mov_h r29, r27
2258 #endif /* #SP = 8/16 */
2259 ret
2260 ENDF __epilogue_restores__
2261 #endif /* defined (L_epilogue) */
2262 #endif /* !defined (__AVR_TINY__) */
2263
2264 #ifdef L_exit
2265 .section .fini9,"ax",@progbits
2266 DEFUN _exit
2267 .weak exit
2268 exit:
2269 ENDF _exit
2270
2271 /* Code from .fini8 ... .fini1 sections inserted by ld script. */
2272
2273 .section .fini0,"ax",@progbits
2274 cli
2275 __stop_program:
2276 rjmp __stop_program
2277 #endif /* defined (L_exit) */
2278
2279 #ifdef L_cleanup
2280 .weak _cleanup
2281 .func _cleanup
2282 _cleanup:
2283 ret
2284 .endfunc
2285 #endif /* defined (L_cleanup) */
2286
2287 \f
2288 .section .text.libgcc, "ax", @progbits
2289
2290 #ifdef L_tablejump2
2291 DEFUN __tablejump2__
2292 lsl r30
2293 rol r31
2294 #if defined (__AVR_HAVE_EIJMP_EICALL__)
2295 ;; Word address of gs() jumptable entry in R24:Z
2296 rol r24
2297 out __RAMPZ__, r24
2298 #elif defined (__AVR_HAVE_ELPM__)
2299 ;; Word address of jumptable entry in Z
2300 clr __tmp_reg__
2301 rol __tmp_reg__
2302 out __RAMPZ__, __tmp_reg__
2303 #endif
2304
2305 ;; Read word address from jumptable and jump
2306
2307 #if defined (__AVR_HAVE_ELPMX__)
2308 elpm __tmp_reg__, Z+
2309 elpm r31, Z
2310 mov r30, __tmp_reg__
2311 #ifdef __AVR_HAVE_RAMPD__
2312 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2313 out __RAMPZ__, __zero_reg__
2314 #endif /* RAMPD */
2315 XIJMP
2316 #elif defined (__AVR_HAVE_ELPM__)
2317 elpm
2318 push r0
2319 adiw r30, 1
2320 elpm
2321 push r0
2322 ret
2323 #elif defined (__AVR_HAVE_LPMX__)
2324 lpm __tmp_reg__, Z+
2325 lpm r31, Z
2326 mov r30, __tmp_reg__
2327 ijmp
2328 #elif defined (__AVR_TINY__)
2329 wsubi 30, -(__AVR_TINY_PM_BASE_ADDRESS__) ; Add PM offset to Z
2330 ld __tmp_reg__, Z+
2331 ld r31, Z ; Use ld instead of lpm to load Z
2332 mov r30, __tmp_reg__
2333 ijmp
2334 #else
2335 lpm
2336 push r0
2337 adiw r30, 1
2338 lpm
2339 push r0
2340 ret
2341 #endif
2342 ENDF __tablejump2__
2343 #endif /* L_tablejump2 */
2344
2345 #if defined(__AVR_TINY__)
2346 #ifdef L_copy_data
2347 .section .init4,"ax",@progbits
2348 .global __do_copy_data
2349 __do_copy_data:
2350 ldi r18, hi8(__data_end)
2351 ldi r26, lo8(__data_start)
2352 ldi r27, hi8(__data_start)
2353 ldi r30, lo8(__data_load_start + __AVR_TINY_PM_BASE_ADDRESS__)
2354 ldi r31, hi8(__data_load_start + __AVR_TINY_PM_BASE_ADDRESS__)
2355 rjmp .L__do_copy_data_start
2356 .L__do_copy_data_loop:
2357 ld r19, z+
2358 st X+, r19
2359 .L__do_copy_data_start:
2360 cpi r26, lo8(__data_end)
2361 cpc r27, r18
2362 brne .L__do_copy_data_loop
2363 #endif
2364 #else
2365 #ifdef L_copy_data
2366 .section .init4,"ax",@progbits
2367 DEFUN __do_copy_data
2368 #if defined(__AVR_HAVE_ELPMX__)
2369 ldi r17, hi8(__data_end)
2370 ldi r26, lo8(__data_start)
2371 ldi r27, hi8(__data_start)
2372 ldi r30, lo8(__data_load_start)
2373 ldi r31, hi8(__data_load_start)
2374 ldi r16, hh8(__data_load_start)
2375 out __RAMPZ__, r16
2376 rjmp .L__do_copy_data_start
2377 .L__do_copy_data_loop:
2378 elpm r0, Z+
2379 st X+, r0
2380 .L__do_copy_data_start:
2381 cpi r26, lo8(__data_end)
2382 cpc r27, r17
2383 brne .L__do_copy_data_loop
2384 #elif !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
2385 ldi r17, hi8(__data_end)
2386 ldi r26, lo8(__data_start)
2387 ldi r27, hi8(__data_start)
2388 ldi r30, lo8(__data_load_start)
2389 ldi r31, hi8(__data_load_start)
2390 ldi r16, hh8(__data_load_start - 0x10000)
2391 .L__do_copy_data_carry:
2392 inc r16
2393 out __RAMPZ__, r16
2394 rjmp .L__do_copy_data_start
2395 .L__do_copy_data_loop:
2396 elpm
2397 st X+, r0
2398 adiw r30, 1
2399 brcs .L__do_copy_data_carry
2400 .L__do_copy_data_start:
2401 cpi r26, lo8(__data_end)
2402 cpc r27, r17
2403 brne .L__do_copy_data_loop
2404 #elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
2405 ldi r17, hi8(__data_end)
2406 ldi r26, lo8(__data_start)
2407 ldi r27, hi8(__data_start)
2408 ldi r30, lo8(__data_load_start)
2409 ldi r31, hi8(__data_load_start)
2410 rjmp .L__do_copy_data_start
2411 .L__do_copy_data_loop:
2412 #if defined (__AVR_HAVE_LPMX__)
2413 lpm r0, Z+
2414 #else
2415 lpm
2416 adiw r30, 1
2417 #endif
2418 st X+, r0
2419 .L__do_copy_data_start:
2420 cpi r26, lo8(__data_end)
2421 cpc r27, r17
2422 brne .L__do_copy_data_loop
2423 #endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
2424 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2425 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2426 out __RAMPZ__, __zero_reg__
2427 #endif /* ELPM && RAMPD */
2428 ENDF __do_copy_data
2429 #endif /* L_copy_data */
2430 #endif /* !defined (__AVR_TINY__) */
2431
2432 /* __do_clear_bss is only necessary if there is anything in .bss section. */
2433
2434 #ifdef L_clear_bss
2435 .section .init4,"ax",@progbits
2436 DEFUN __do_clear_bss
2437 ldi r18, hi8(__bss_end)
2438 ldi r26, lo8(__bss_start)
2439 ldi r27, hi8(__bss_start)
2440 rjmp .do_clear_bss_start
2441 .do_clear_bss_loop:
2442 st X+, __zero_reg__
2443 .do_clear_bss_start:
2444 cpi r26, lo8(__bss_end)
2445 cpc r27, r18
2446 brne .do_clear_bss_loop
2447 ENDF __do_clear_bss
2448 #endif /* L_clear_bss */
2449
2450 /* __do_global_ctors and __do_global_dtors are only necessary
2451 if there are any constructors/destructors. */
2452
2453 #if defined(__AVR_TINY__)
2454 #define cdtors_tst_reg r18
2455 #else
2456 #define cdtors_tst_reg r17
2457 #endif
2458
2459 #ifdef L_ctors
2460 .section .init6,"ax",@progbits
2461 DEFUN __do_global_ctors
2462 ldi cdtors_tst_reg, pm_hi8(__ctors_start)
2463 ldi r28, pm_lo8(__ctors_end)
2464 ldi r29, pm_hi8(__ctors_end)
2465 #ifdef __AVR_HAVE_EIJMP_EICALL__
2466 ldi r16, pm_hh8(__ctors_end)
2467 #endif /* HAVE_EIJMP */
2468 rjmp .L__do_global_ctors_start
2469 .L__do_global_ctors_loop:
2470 wsubi 28, 1
2471 #ifdef __AVR_HAVE_EIJMP_EICALL__
2472 sbc r16, __zero_reg__
2473 mov r24, r16
2474 #endif /* HAVE_EIJMP */
2475 mov_h r31, r29
2476 mov_l r30, r28
2477 XCALL __tablejump2__
2478 .L__do_global_ctors_start:
2479 cpi r28, pm_lo8(__ctors_start)
2480 cpc r29, cdtors_tst_reg
2481 #ifdef __AVR_HAVE_EIJMP_EICALL__
2482 ldi r24, pm_hh8(__ctors_start)
2483 cpc r16, r24
2484 #endif /* HAVE_EIJMP */
2485 brne .L__do_global_ctors_loop
2486 ENDF __do_global_ctors
2487 #endif /* L_ctors */
2488
2489 #ifdef L_dtors
2490 .section .fini6,"ax",@progbits
2491 DEFUN __do_global_dtors
2492 ldi cdtors_tst_reg, pm_hi8(__dtors_end)
2493 ldi r28, pm_lo8(__dtors_start)
2494 ldi r29, pm_hi8(__dtors_start)
2495 #ifdef __AVR_HAVE_EIJMP_EICALL__
2496 ldi r16, pm_hh8(__dtors_start)
2497 #endif /* HAVE_EIJMP */
2498 rjmp .L__do_global_dtors_start
2499 .L__do_global_dtors_loop:
2500 #ifdef __AVR_HAVE_EIJMP_EICALL__
2501 mov r24, r16
2502 #endif /* HAVE_EIJMP */
2503 mov_h r31, r29
2504 mov_l r30, r28
2505 XCALL __tablejump2__
2506 waddi 28, 1
2507 #ifdef __AVR_HAVE_EIJMP_EICALL__
2508 adc r16, __zero_reg__
2509 #endif /* HAVE_EIJMP */
2510 .L__do_global_dtors_start:
2511 cpi r28, pm_lo8(__dtors_end)
2512 cpc r29, cdtors_tst_reg
2513 #ifdef __AVR_HAVE_EIJMP_EICALL__
2514 ldi r24, pm_hh8(__dtors_end)
2515 cpc r16, r24
2516 #endif /* HAVE_EIJMP */
2517 brne .L__do_global_dtors_loop
2518 ENDF __do_global_dtors
2519 #endif /* L_dtors */
2520
2521 #undef cdtors_tst_reg
2522
2523 .section .text.libgcc, "ax", @progbits
2524
2525 #if !defined (__AVR_TINY__)
2526 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2527 ;; Loading n bytes from Flash; n = 3,4
2528 ;; R22... = Flash[Z]
2529 ;; Clobbers: __tmp_reg__
2530
2531 #if (defined (L_load_3) \
2532 || defined (L_load_4)) \
2533 && !defined (__AVR_HAVE_LPMX__)
2534
2535 ;; Destination
2536 #define D0 22
2537 #define D1 D0+1
2538 #define D2 D0+2
2539 #define D3 D0+3
2540
2541 .macro .load dest, n
2542 lpm
2543 mov \dest, r0
2544 .if \dest != D0+\n-1
2545 adiw r30, 1
2546 .else
2547 sbiw r30, \n-1
2548 .endif
2549 .endm
2550
2551 #if defined (L_load_3)
2552 DEFUN __load_3
2553 push D3
2554 XCALL __load_4
2555 pop D3
2556 ret
2557 ENDF __load_3
2558 #endif /* L_load_3 */
2559
2560 #if defined (L_load_4)
2561 DEFUN __load_4
2562 .load D0, 4
2563 .load D1, 4
2564 .load D2, 4
2565 .load D3, 4
2566 ret
2567 ENDF __load_4
2568 #endif /* L_load_4 */
2569
2570 #endif /* L_load_3 || L_load_3 */
2571 #endif /* !defined (__AVR_TINY__) */
2572
2573 #if !defined (__AVR_TINY__)
2574 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2575 ;; Loading n bytes from Flash or RAM; n = 1,2,3,4
2576 ;; R22... = Flash[R21:Z] or RAM[Z] depending on R21.7
2577 ;; Clobbers: __tmp_reg__, R21, R30, R31
2578
2579 #if (defined (L_xload_1) \
2580 || defined (L_xload_2) \
2581 || defined (L_xload_3) \
2582 || defined (L_xload_4))
2583
2584 ;; Destination
2585 #define D0 22
2586 #define D1 D0+1
2587 #define D2 D0+2
2588 #define D3 D0+3
2589
2590 ;; Register containing bits 16+ of the address
2591
2592 #define HHI8 21
2593
2594 .macro .xload dest, n
2595 #if defined (__AVR_HAVE_ELPMX__)
2596 elpm \dest, Z+
2597 #elif defined (__AVR_HAVE_ELPM__)
2598 elpm
2599 mov \dest, r0
2600 .if \dest != D0+\n-1
2601 adiw r30, 1
2602 adc HHI8, __zero_reg__
2603 out __RAMPZ__, HHI8
2604 .endif
2605 #elif defined (__AVR_HAVE_LPMX__)
2606 lpm \dest, Z+
2607 #else
2608 lpm
2609 mov \dest, r0
2610 .if \dest != D0+\n-1
2611 adiw r30, 1
2612 .endif
2613 #endif
2614 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2615 .if \dest == D0+\n-1
2616 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2617 out __RAMPZ__, __zero_reg__
2618 .endif
2619 #endif
2620 .endm ; .xload
2621
2622 #if defined (L_xload_1)
2623 DEFUN __xload_1
2624 #if defined (__AVR_HAVE_LPMX__) && !defined (__AVR_HAVE_ELPM__)
2625 sbrc HHI8, 7
2626 ld D0, Z
2627 sbrs HHI8, 7
2628 lpm D0, Z
2629 ret
2630 #else
2631 sbrc HHI8, 7
2632 rjmp 1f
2633 #if defined (__AVR_HAVE_ELPM__)
2634 out __RAMPZ__, HHI8
2635 #endif /* __AVR_HAVE_ELPM__ */
2636 .xload D0, 1
2637 ret
2638 1: ld D0, Z
2639 ret
2640 #endif /* LPMx && ! ELPM */
2641 ENDF __xload_1
2642 #endif /* L_xload_1 */
2643
2644 #if defined (L_xload_2)
2645 DEFUN __xload_2
2646 sbrc HHI8, 7
2647 rjmp 1f
2648 #if defined (__AVR_HAVE_ELPM__)
2649 out __RAMPZ__, HHI8
2650 #endif /* __AVR_HAVE_ELPM__ */
2651 .xload D0, 2
2652 .xload D1, 2
2653 ret
2654 1: ld D0, Z+
2655 ld D1, Z+
2656 ret
2657 ENDF __xload_2
2658 #endif /* L_xload_2 */
2659
2660 #if defined (L_xload_3)
2661 DEFUN __xload_3
2662 sbrc HHI8, 7
2663 rjmp 1f
2664 #if defined (__AVR_HAVE_ELPM__)
2665 out __RAMPZ__, HHI8
2666 #endif /* __AVR_HAVE_ELPM__ */
2667 .xload D0, 3
2668 .xload D1, 3
2669 .xload D2, 3
2670 ret
2671 1: ld D0, Z+
2672 ld D1, Z+
2673 ld D2, Z+
2674 ret
2675 ENDF __xload_3
2676 #endif /* L_xload_3 */
2677
2678 #if defined (L_xload_4)
2679 DEFUN __xload_4
2680 sbrc HHI8, 7
2681 rjmp 1f
2682 #if defined (__AVR_HAVE_ELPM__)
2683 out __RAMPZ__, HHI8
2684 #endif /* __AVR_HAVE_ELPM__ */
2685 .xload D0, 4
2686 .xload D1, 4
2687 .xload D2, 4
2688 .xload D3, 4
2689 ret
2690 1: ld D0, Z+
2691 ld D1, Z+
2692 ld D2, Z+
2693 ld D3, Z+
2694 ret
2695 ENDF __xload_4
2696 #endif /* L_xload_4 */
2697
2698 #endif /* L_xload_{1|2|3|4} */
2699 #endif /* if !defined (__AVR_TINY__) */
2700
2701 #if !defined (__AVR_TINY__)
2702 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2703 ;; memcopy from Address Space __pgmx to RAM
2704 ;; R23:Z = Source Address
2705 ;; X = Destination Address
2706 ;; Clobbers: __tmp_reg__, R23, R24, R25, X, Z
2707
2708 #if defined (L_movmemx)
2709
2710 #define HHI8 23
2711 #define LOOP 24
2712
2713 DEFUN __movmemx_qi
2714 ;; #Bytes to copy fity in 8 Bits (1..255)
2715 ;; Zero-extend Loop Counter
2716 clr LOOP+1
2717 ;; FALLTHRU
2718 ENDF __movmemx_qi
2719
2720 DEFUN __movmemx_hi
2721
2722 ;; Read from where?
2723 sbrc HHI8, 7
2724 rjmp 1f
2725
2726 ;; Read from Flash
2727
2728 #if defined (__AVR_HAVE_ELPM__)
2729 out __RAMPZ__, HHI8
2730 #endif
2731
2732 0: ;; Load 1 Byte from Flash...
2733
2734 #if defined (__AVR_HAVE_ELPMX__)
2735 elpm r0, Z+
2736 #elif defined (__AVR_HAVE_ELPM__)
2737 elpm
2738 adiw r30, 1
2739 adc HHI8, __zero_reg__
2740 out __RAMPZ__, HHI8
2741 #elif defined (__AVR_HAVE_LPMX__)
2742 lpm r0, Z+
2743 #else
2744 lpm
2745 adiw r30, 1
2746 #endif
2747
2748 ;; ...and store that Byte to RAM Destination
2749 st X+, r0
2750 sbiw LOOP, 1
2751 brne 0b
2752 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2753 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2754 out __RAMPZ__, __zero_reg__
2755 #endif /* ELPM && RAMPD */
2756 ret
2757
2758 ;; Read from RAM
2759
2760 1: ;; Read 1 Byte from RAM...
2761 ld r0, Z+
2762 ;; and store that Byte to RAM Destination
2763 st X+, r0
2764 sbiw LOOP, 1
2765 brne 1b
2766 ret
2767 ENDF __movmemx_hi
2768
2769 #undef HHI8
2770 #undef LOOP
2771
2772 #endif /* L_movmemx */
2773 #endif /* !defined (__AVR_TINY__) */
2774
2775 \f
2776 .section .text.libgcc.builtins, "ax", @progbits
2777
2778 /**********************************
2779 * Find first set Bit (ffs)
2780 **********************************/
2781
2782 #if defined (L_ffssi2)
2783 ;; find first set bit
2784 ;; r25:r24 = ffs32 (r25:r22)
2785 ;; clobbers: r22, r26
2786 DEFUN __ffssi2
2787 clr r26
2788 tst r22
2789 brne 1f
2790 subi r26, -8
2791 or r22, r23
2792 brne 1f
2793 subi r26, -8
2794 or r22, r24
2795 brne 1f
2796 subi r26, -8
2797 or r22, r25
2798 brne 1f
2799 ret
2800 1: mov r24, r22
2801 XJMP __loop_ffsqi2
2802 ENDF __ffssi2
2803 #endif /* defined (L_ffssi2) */
2804
2805 #if defined (L_ffshi2)
2806 ;; find first set bit
2807 ;; r25:r24 = ffs16 (r25:r24)
2808 ;; clobbers: r26
2809 DEFUN __ffshi2
2810 clr r26
2811 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2812 ;; Some cores have problem skipping 2-word instruction
2813 tst r24
2814 breq 2f
2815 #else
2816 cpse r24, __zero_reg__
2817 #endif /* __AVR_HAVE_JMP_CALL__ */
2818 1: XJMP __loop_ffsqi2
2819 2: ldi r26, 8
2820 or r24, r25
2821 brne 1b
2822 ret
2823 ENDF __ffshi2
2824 #endif /* defined (L_ffshi2) */
2825
2826 #if defined (L_loop_ffsqi2)
2827 ;; Helper for ffshi2, ffssi2
2828 ;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
2829 ;; r24 must be != 0
2830 ;; clobbers: r26
2831 DEFUN __loop_ffsqi2
2832 inc r26
2833 lsr r24
2834 brcc __loop_ffsqi2
2835 mov r24, r26
2836 clr r25
2837 ret
2838 ENDF __loop_ffsqi2
2839 #endif /* defined (L_loop_ffsqi2) */
2840
2841 \f
2842 /**********************************
2843 * Count trailing Zeros (ctz)
2844 **********************************/
2845
2846 #if defined (L_ctzsi2)
2847 ;; count trailing zeros
2848 ;; r25:r24 = ctz32 (r25:r22)
2849 ;; clobbers: r26, r22
2850 ;; ctz(0) = 255
2851 ;; Note that ctz(0) in undefined for GCC
2852 DEFUN __ctzsi2
2853 XCALL __ffssi2
2854 dec r24
2855 ret
2856 ENDF __ctzsi2
2857 #endif /* defined (L_ctzsi2) */
2858
2859 #if defined (L_ctzhi2)
2860 ;; count trailing zeros
2861 ;; r25:r24 = ctz16 (r25:r24)
2862 ;; clobbers: r26
2863 ;; ctz(0) = 255
2864 ;; Note that ctz(0) in undefined for GCC
2865 DEFUN __ctzhi2
2866 XCALL __ffshi2
2867 dec r24
2868 ret
2869 ENDF __ctzhi2
2870 #endif /* defined (L_ctzhi2) */
2871
2872 \f
2873 /**********************************
2874 * Count leading Zeros (clz)
2875 **********************************/
2876
2877 #if defined (L_clzdi2)
2878 ;; count leading zeros
2879 ;; r25:r24 = clz64 (r25:r18)
2880 ;; clobbers: r22, r23, r26
2881 DEFUN __clzdi2
2882 XCALL __clzsi2
2883 sbrs r24, 5
2884 ret
2885 mov_l r22, r18
2886 mov_h r23, r19
2887 mov_l r24, r20
2888 mov_h r25, r21
2889 XCALL __clzsi2
2890 subi r24, -32
2891 ret
2892 ENDF __clzdi2
2893 #endif /* defined (L_clzdi2) */
2894
2895 #if defined (L_clzsi2)
2896 ;; count leading zeros
2897 ;; r25:r24 = clz32 (r25:r22)
2898 ;; clobbers: r26
2899 DEFUN __clzsi2
2900 XCALL __clzhi2
2901 sbrs r24, 4
2902 ret
2903 mov_l r24, r22
2904 mov_h r25, r23
2905 XCALL __clzhi2
2906 subi r24, -16
2907 ret
2908 ENDF __clzsi2
2909 #endif /* defined (L_clzsi2) */
2910
2911 #if defined (L_clzhi2)
2912 ;; count leading zeros
2913 ;; r25:r24 = clz16 (r25:r24)
2914 ;; clobbers: r26
2915 DEFUN __clzhi2
2916 clr r26
2917 tst r25
2918 brne 1f
2919 subi r26, -8
2920 or r25, r24
2921 brne 1f
2922 ldi r24, 16
2923 ret
2924 1: cpi r25, 16
2925 brsh 3f
2926 subi r26, -3
2927 swap r25
2928 2: inc r26
2929 3: lsl r25
2930 brcc 2b
2931 mov r24, r26
2932 clr r25
2933 ret
2934 ENDF __clzhi2
2935 #endif /* defined (L_clzhi2) */
2936
2937 \f
2938 /**********************************
2939 * Parity
2940 **********************************/
2941
2942 #if defined (L_paritydi2)
2943 ;; r25:r24 = parity64 (r25:r18)
2944 ;; clobbers: __tmp_reg__
2945 DEFUN __paritydi2
2946 eor r24, r18
2947 eor r24, r19
2948 eor r24, r20
2949 eor r24, r21
2950 XJMP __paritysi2
2951 ENDF __paritydi2
2952 #endif /* defined (L_paritydi2) */
2953
2954 #if defined (L_paritysi2)
2955 ;; r25:r24 = parity32 (r25:r22)
2956 ;; clobbers: __tmp_reg__
2957 DEFUN __paritysi2
2958 eor r24, r22
2959 eor r24, r23
2960 XJMP __parityhi2
2961 ENDF __paritysi2
2962 #endif /* defined (L_paritysi2) */
2963
2964 #if defined (L_parityhi2)
2965 ;; r25:r24 = parity16 (r25:r24)
2966 ;; clobbers: __tmp_reg__
2967 DEFUN __parityhi2
2968 eor r24, r25
2969 ;; FALLTHRU
2970 ENDF __parityhi2
2971
2972 ;; r25:r24 = parity8 (r24)
2973 ;; clobbers: __tmp_reg__
2974 DEFUN __parityqi2
2975 ;; parity is in r24[0..7]
2976 mov __tmp_reg__, r24
2977 swap __tmp_reg__
2978 eor r24, __tmp_reg__
2979 ;; parity is in r24[0..3]
2980 subi r24, -4
2981 andi r24, -5
2982 subi r24, -6
2983 ;; parity is in r24[0,3]
2984 sbrc r24, 3
2985 inc r24
2986 ;; parity is in r24[0]
2987 andi r24, 1
2988 clr r25
2989 ret
2990 ENDF __parityqi2
2991 #endif /* defined (L_parityhi2) */
2992
2993 \f
2994 /**********************************
2995 * Population Count
2996 **********************************/
2997
2998 #if defined (L_popcounthi2)
2999 ;; population count
3000 ;; r25:r24 = popcount16 (r25:r24)
3001 ;; clobbers: __tmp_reg__
3002 DEFUN __popcounthi2
3003 XCALL __popcountqi2
3004 push r24
3005 mov r24, r25
3006 XCALL __popcountqi2
3007 clr r25
3008 ;; FALLTHRU
3009 ENDF __popcounthi2
3010
3011 DEFUN __popcounthi2_tail
3012 pop __tmp_reg__
3013 add r24, __tmp_reg__
3014 ret
3015 ENDF __popcounthi2_tail
3016 #endif /* defined (L_popcounthi2) */
3017
3018 #if defined (L_popcountsi2)
3019 ;; population count
3020 ;; r25:r24 = popcount32 (r25:r22)
3021 ;; clobbers: __tmp_reg__
3022 DEFUN __popcountsi2
3023 XCALL __popcounthi2
3024 push r24
3025 mov_l r24, r22
3026 mov_h r25, r23
3027 XCALL __popcounthi2
3028 XJMP __popcounthi2_tail
3029 ENDF __popcountsi2
3030 #endif /* defined (L_popcountsi2) */
3031
3032 #if defined (L_popcountdi2)
3033 ;; population count
3034 ;; r25:r24 = popcount64 (r25:r18)
3035 ;; clobbers: r22, r23, __tmp_reg__
3036 DEFUN __popcountdi2
3037 XCALL __popcountsi2
3038 push r24
3039 mov_l r22, r18
3040 mov_h r23, r19
3041 mov_l r24, r20
3042 mov_h r25, r21
3043 XCALL __popcountsi2
3044 XJMP __popcounthi2_tail
3045 ENDF __popcountdi2
3046 #endif /* defined (L_popcountdi2) */
3047
3048 #if defined (L_popcountqi2)
3049 ;; population count
3050 ;; r24 = popcount8 (r24)
3051 ;; clobbers: __tmp_reg__
3052 DEFUN __popcountqi2
3053 mov __tmp_reg__, r24
3054 andi r24, 1
3055 lsr __tmp_reg__
3056 lsr __tmp_reg__
3057 adc r24, __zero_reg__
3058 lsr __tmp_reg__
3059 adc r24, __zero_reg__
3060 lsr __tmp_reg__
3061 adc r24, __zero_reg__
3062 lsr __tmp_reg__
3063 adc r24, __zero_reg__
3064 lsr __tmp_reg__
3065 adc r24, __zero_reg__
3066 lsr __tmp_reg__
3067 adc r24, __tmp_reg__
3068 ret
3069 ENDF __popcountqi2
3070 #endif /* defined (L_popcountqi2) */
3071
3072 \f
3073 /**********************************
3074 * Swap bytes
3075 **********************************/
3076
3077 ;; swap two registers with different register number
3078 .macro bswap a, b
3079 eor \a, \b
3080 eor \b, \a
3081 eor \a, \b
3082 .endm
3083
3084 #if defined (L_bswapsi2)
3085 ;; swap bytes
3086 ;; r25:r22 = bswap32 (r25:r22)
3087 DEFUN __bswapsi2
3088 bswap r22, r25
3089 bswap r23, r24
3090 ret
3091 ENDF __bswapsi2
3092 #endif /* defined (L_bswapsi2) */
3093
3094 #if defined (L_bswapdi2)
3095 ;; swap bytes
3096 ;; r25:r18 = bswap64 (r25:r18)
3097 DEFUN __bswapdi2
3098 bswap r18, r25
3099 bswap r19, r24
3100 bswap r20, r23
3101 bswap r21, r22
3102 ret
3103 ENDF __bswapdi2
3104 #endif /* defined (L_bswapdi2) */
3105
3106 \f
3107 /**********************************
3108 * 64-bit shifts
3109 **********************************/
3110
3111 #if defined (L_ashrdi3)
3112
3113 #define SS __zero_reg__
3114
3115 ;; Arithmetic shift right
3116 ;; r25:r18 = ashr64 (r25:r18, r17:r16)
3117 DEFUN __ashrdi3
3118 sbrc r25, 7
3119 com SS
3120 ;; FALLTHRU
3121 ENDF __ashrdi3
3122
3123 ;; Logic shift right
3124 ;; r25:r18 = lshr64 (r25:r18, r17:r16)
3125 DEFUN __lshrdi3
3126 ;; Signs are in SS (zero_reg)
3127 mov __tmp_reg__, r16
3128 0: cpi r16, 8
3129 brlo 2f
3130 subi r16, 8
3131 mov r18, r19
3132 mov r19, r20
3133 mov r20, r21
3134 mov r21, r22
3135 mov r22, r23
3136 mov r23, r24
3137 mov r24, r25
3138 mov r25, SS
3139 rjmp 0b
3140 1: asr SS
3141 ror r25
3142 ror r24
3143 ror r23
3144 ror r22
3145 ror r21
3146 ror r20
3147 ror r19
3148 ror r18
3149 2: dec r16
3150 brpl 1b
3151 clr __zero_reg__
3152 mov r16, __tmp_reg__
3153 ret
3154 ENDF __lshrdi3
3155
3156 #undef SS
3157
3158 #endif /* defined (L_ashrdi3) */
3159
3160 #if defined (L_ashldi3)
3161 ;; Shift left
3162 ;; r25:r18 = ashl64 (r25:r18, r17:r16)
3163 ;; This function does not clobber T.
3164 DEFUN __ashldi3
3165 mov __tmp_reg__, r16
3166 0: cpi r16, 8
3167 brlo 2f
3168 mov r25, r24
3169 mov r24, r23
3170 mov r23, r22
3171 mov r22, r21
3172 mov r21, r20
3173 mov r20, r19
3174 mov r19, r18
3175 clr r18
3176 subi r16, 8
3177 rjmp 0b
3178 1: lsl r18
3179 rol r19
3180 rol r20
3181 rol r21
3182 rol r22
3183 rol r23
3184 rol r24
3185 rol r25
3186 2: dec r16
3187 brpl 1b
3188 mov r16, __tmp_reg__
3189 ret
3190 ENDF __ashldi3
3191 #endif /* defined (L_ashldi3) */
3192
3193 #if defined (L_rotldi3)
3194 ;; Rotate left
3195 ;; r25:r18 = rotl64 (r25:r18, r17:r16)
3196 DEFUN __rotldi3
3197 push r16
3198 0: cpi r16, 8
3199 brlo 2f
3200 subi r16, 8
3201 mov __tmp_reg__, r25
3202 mov r25, r24
3203 mov r24, r23
3204 mov r23, r22
3205 mov r22, r21
3206 mov r21, r20
3207 mov r20, r19
3208 mov r19, r18
3209 mov r18, __tmp_reg__
3210 rjmp 0b
3211 1: lsl r18
3212 rol r19
3213 rol r20
3214 rol r21
3215 rol r22
3216 rol r23
3217 rol r24
3218 rol r25
3219 adc r18, __zero_reg__
3220 2: dec r16
3221 brpl 1b
3222 pop r16
3223 ret
3224 ENDF __rotldi3
3225 #endif /* defined (L_rotldi3) */
3226
3227 \f
3228 .section .text.libgcc.fmul, "ax", @progbits
3229
3230 /***********************************************************/
3231 ;;; Softmul versions of FMUL, FMULS and FMULSU to implement
3232 ;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
3233 /***********************************************************/
3234
3235 #define A1 24
3236 #define B1 25
3237 #define C0 22
3238 #define C1 23
3239 #define A0 __tmp_reg__
3240
3241 #ifdef L_fmuls
3242 ;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
3243 ;;; Clobbers: r24, r25, __tmp_reg__
3244 DEFUN __fmuls
3245 ;; A0.7 = negate result?
3246 mov A0, A1
3247 eor A0, B1
3248 ;; B1 = |B1|
3249 sbrc B1, 7
3250 neg B1
3251 XJMP __fmulsu_exit
3252 ENDF __fmuls
3253 #endif /* L_fmuls */
3254
3255 #ifdef L_fmulsu
3256 ;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
3257 ;;; Clobbers: r24, r25, __tmp_reg__
3258 DEFUN __fmulsu
3259 ;; A0.7 = negate result?
3260 mov A0, A1
3261 ;; FALLTHRU
3262 ENDF __fmulsu
3263
3264 ;; Helper for __fmuls and __fmulsu
3265 DEFUN __fmulsu_exit
3266 ;; A1 = |A1|
3267 sbrc A1, 7
3268 neg A1
3269 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
3270 ;; Some cores have problem skipping 2-word instruction
3271 tst A0
3272 brmi 1f
3273 #else
3274 sbrs A0, 7
3275 #endif /* __AVR_HAVE_JMP_CALL__ */
3276 XJMP __fmul
3277 1: XCALL __fmul
3278 ;; C = -C iff A0.7 = 1
3279 NEG2 C0
3280 ret
3281 ENDF __fmulsu_exit
3282 #endif /* L_fmulsu */
3283
3284
3285 #ifdef L_fmul
3286 ;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
3287 ;;; Clobbers: r24, r25, __tmp_reg__
3288 DEFUN __fmul
3289 ; clear result
3290 clr C0
3291 clr C1
3292 clr A0
3293 1: tst B1
3294 ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
3295 2: brpl 3f
3296 ;; C += A
3297 add C0, A0
3298 adc C1, A1
3299 3: ;; A >>= 1
3300 lsr A1
3301 ror A0
3302 ;; B <<= 1
3303 lsl B1
3304 brne 2b
3305 ret
3306 ENDF __fmul
3307 #endif /* L_fmul */
3308
3309 #undef A0
3310 #undef A1
3311 #undef B1
3312 #undef C0
3313 #undef C1
3314
3315 #include "lib1funcs-fixed.S"