]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgcc/config/avr/lib1funcs.S
Update copyright years.
[thirdparty/gcc.git] / libgcc / config / avr / lib1funcs.S
1 /* -*- Mode: Asm -*- */
2 /* Copyright (C) 1998-2020 Free Software Foundation, Inc.
3 Contributed by Denis Chertykov <chertykov@gmail.com>
4
5 This file is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 3, or (at your option) any
8 later version.
9
10 This file is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24 #if defined (__AVR_TINY__)
25 #define __zero_reg__ r17
26 #define __tmp_reg__ r16
27 #else
28 #define __zero_reg__ r1
29 #define __tmp_reg__ r0
30 #endif
31 #define __SREG__ 0x3f
32 #if defined (__AVR_HAVE_SPH__)
33 #define __SP_H__ 0x3e
34 #endif
35 #define __SP_L__ 0x3d
36 #define __RAMPZ__ 0x3B
37 #define __EIND__ 0x3C
38
39 /* Most of the functions here are called directly from avr.md
40 patterns, instead of using the standard libcall mechanisms.
41 This can make better code because GCC knows exactly which
42 of the call-used registers (not all of them) are clobbered. */
43
44 /* FIXME: At present, there is no SORT directive in the linker
45 script so that we must not assume that different modules
46 in the same input section like .libgcc.text.mul will be
47 located close together. Therefore, we cannot use
48 RCALL/RJMP to call a function like __udivmodhi4 from
49 __divmodhi4 and have to use lengthy XCALL/XJMP even
50 though they are in the same input section and all same
51 input sections together are small enough to reach every
52 location with a RCALL/RJMP instruction. */
53
54 #if defined (__AVR_HAVE_EIJMP_EICALL__) && !defined (__AVR_HAVE_ELPMX__)
55 #error device not supported
56 #endif
57
58 .macro mov_l r_dest, r_src
59 #if defined (__AVR_HAVE_MOVW__)
60 movw \r_dest, \r_src
61 #else
62 mov \r_dest, \r_src
63 #endif
64 .endm
65
66 .macro mov_h r_dest, r_src
67 #if defined (__AVR_HAVE_MOVW__)
68 ; empty
69 #else
70 mov \r_dest, \r_src
71 #endif
72 .endm
73
74 .macro wmov r_dest, r_src
75 #if defined (__AVR_HAVE_MOVW__)
76 movw \r_dest, \r_src
77 #else
78 mov \r_dest, \r_src
79 mov \r_dest+1, \r_src+1
80 #endif
81 .endm
82
83 #if defined (__AVR_HAVE_JMP_CALL__)
84 #define XCALL call
85 #define XJMP jmp
86 #else
87 #define XCALL rcall
88 #define XJMP rjmp
89 #endif
90
91 #if defined (__AVR_HAVE_EIJMP_EICALL__)
92 #define XICALL eicall
93 #define XIJMP eijmp
94 #else
95 #define XICALL icall
96 #define XIJMP ijmp
97 #endif
98
99 ;; Prologue stuff
100
101 .macro do_prologue_saves n_pushed n_frame=0
102 ldi r26, lo8(\n_frame)
103 ldi r27, hi8(\n_frame)
104 ldi r30, lo8(gs(.L_prologue_saves.\@))
105 ldi r31, hi8(gs(.L_prologue_saves.\@))
106 XJMP __prologue_saves__ + ((18 - (\n_pushed)) * 2)
107 .L_prologue_saves.\@:
108 .endm
109
110 ;; Epilogue stuff
111
112 .macro do_epilogue_restores n_pushed n_frame=0
113 in r28, __SP_L__
114 #ifdef __AVR_HAVE_SPH__
115 in r29, __SP_H__
116 .if \n_frame > 63
117 subi r28, lo8(-\n_frame)
118 sbci r29, hi8(-\n_frame)
119 .elseif \n_frame > 0
120 adiw r28, \n_frame
121 .endif
122 #else
123 clr r29
124 .if \n_frame > 0
125 subi r28, lo8(-\n_frame)
126 .endif
127 #endif /* HAVE SPH */
128 ldi r30, \n_pushed
129 XJMP __epilogue_restores__ + ((18 - (\n_pushed)) * 2)
130 .endm
131
132 ;; Support function entry and exit for convenience
133
134 .macro wsubi r_arg1, i_arg2
135 #if defined (__AVR_TINY__)
136 subi \r_arg1, lo8(\i_arg2)
137 sbci \r_arg1+1, hi8(\i_arg2)
138 #else
139 sbiw \r_arg1, \i_arg2
140 #endif
141 .endm
142
143 .macro waddi r_arg1, i_arg2
144 #if defined (__AVR_TINY__)
145 subi \r_arg1, lo8(-\i_arg2)
146 sbci \r_arg1+1, hi8(-\i_arg2)
147 #else
148 adiw \r_arg1, \i_arg2
149 #endif
150 .endm
151
152 .macro DEFUN name
153 .global \name
154 .func \name
155 \name:
156 .endm
157
158 .macro ENDF name
159 .size \name, .-\name
160 .endfunc
161 .endm
162
163 .macro FALIAS name
164 .global \name
165 .func \name
166 \name:
167 .size \name, .-\name
168 .endfunc
169 .endm
170
171 ;; Skip next instruction, typically a jump target
172 #if defined(__AVR_TINY__)
173 #define skip cpse 0,0
174 #else
175 #define skip cpse 16,16
176 #endif
177
178 ;; Negate a 2-byte value held in consecutive registers
179 .macro NEG2 reg
180 com \reg+1
181 neg \reg
182 sbci \reg+1, -1
183 .endm
184
185 ;; Negate a 4-byte value held in consecutive registers
186 ;; Sets the V flag for signed overflow tests if REG >= 16
187 .macro NEG4 reg
188 com \reg+3
189 com \reg+2
190 com \reg+1
191 .if \reg >= 16
192 neg \reg
193 sbci \reg+1, -1
194 sbci \reg+2, -1
195 sbci \reg+3, -1
196 .else
197 com \reg
198 adc \reg, __zero_reg__
199 adc \reg+1, __zero_reg__
200 adc \reg+2, __zero_reg__
201 adc \reg+3, __zero_reg__
202 .endif
203 .endm
204
205 #define exp_lo(N) hlo8 ((N) << 23)
206 #define exp_hi(N) hhi8 ((N) << 23)
207
208 \f
209 .section .text.libgcc.mul, "ax", @progbits
210
211 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
212 /* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */
213 #if !defined (__AVR_HAVE_MUL__)
214 /*******************************************************
215 Multiplication 8 x 8 without MUL
216 *******************************************************/
217 #if defined (L_mulqi3)
218
219 #define r_arg2 r22 /* multiplicand */
220 #define r_arg1 r24 /* multiplier */
221 #define r_res __tmp_reg__ /* result */
222
223 DEFUN __mulqi3
224 clr r_res ; clear result
225 __mulqi3_loop:
226 sbrc r_arg1,0
227 add r_res,r_arg2
228 add r_arg2,r_arg2 ; shift multiplicand
229 breq __mulqi3_exit ; while multiplicand != 0
230 lsr r_arg1 ;
231 brne __mulqi3_loop ; exit if multiplier = 0
232 __mulqi3_exit:
233 mov r_arg1,r_res ; result to return register
234 ret
235 ENDF __mulqi3
236
237 #undef r_arg2
238 #undef r_arg1
239 #undef r_res
240
241 #endif /* defined (L_mulqi3) */
242
243
244 /*******************************************************
245 Widening Multiplication 16 = 8 x 8 without MUL
246 Multiplication 16 x 16 without MUL
247 *******************************************************/
248
249 #define A0 22
250 #define A1 23
251 #define B0 24
252 #define BB0 20
253 #define B1 25
254 ;; Output overlaps input, thus expand result in CC0/1
255 #define C0 24
256 #define C1 25
257 #define CC0 __tmp_reg__
258 #define CC1 21
259
260 #if defined (L_umulqihi3)
261 ;;; R25:R24 = (unsigned int) R22 * (unsigned int) R24
262 ;;; (C1:C0) = (unsigned int) A0 * (unsigned int) B0
263 ;;; Clobbers: __tmp_reg__, R21..R23
264 DEFUN __umulqihi3
265 clr A1
266 clr B1
267 XJMP __mulhi3
268 ENDF __umulqihi3
269 #endif /* L_umulqihi3 */
270
271 #if defined (L_mulqihi3)
272 ;;; R25:R24 = (signed int) R22 * (signed int) R24
273 ;;; (C1:C0) = (signed int) A0 * (signed int) B0
274 ;;; Clobbers: __tmp_reg__, R20..R23
275 DEFUN __mulqihi3
276 ;; Sign-extend B0
277 clr B1
278 sbrc B0, 7
279 com B1
280 ;; The multiplication runs twice as fast if A1 is zero, thus:
281 ;; Zero-extend A0
282 clr A1
283 #ifdef __AVR_HAVE_JMP_CALL__
284 ;; Store B0 * sign of A
285 clr BB0
286 sbrc A0, 7
287 mov BB0, B0
288 call __mulhi3
289 #else /* have no CALL */
290 ;; Skip sign-extension of A if A >= 0
291 ;; Same size as with the first alternative but avoids errata skip
292 ;; and is faster if A >= 0
293 sbrs A0, 7
294 rjmp __mulhi3
295 ;; If A < 0 store B
296 mov BB0, B0
297 rcall __mulhi3
298 #endif /* HAVE_JMP_CALL */
299 ;; 1-extend A after the multiplication
300 sub C1, BB0
301 ret
302 ENDF __mulqihi3
303 #endif /* L_mulqihi3 */
304
305 #if defined (L_mulhi3)
306 ;;; R25:R24 = R23:R22 * R25:R24
307 ;;; (C1:C0) = (A1:A0) * (B1:B0)
308 ;;; Clobbers: __tmp_reg__, R21..R23
309 DEFUN __mulhi3
310
311 ;; Clear result
312 clr CC0
313 clr CC1
314 rjmp 3f
315 1:
316 ;; Bit n of A is 1 --> C += B << n
317 add CC0, B0
318 adc CC1, B1
319 2:
320 lsl B0
321 rol B1
322 3:
323 ;; If B == 0 we are ready
324 wsubi B0, 0
325 breq 9f
326
327 ;; Carry = n-th bit of A
328 lsr A1
329 ror A0
330 ;; If bit n of A is set, then go add B * 2^n to C
331 brcs 1b
332
333 ;; Carry = 0 --> The ROR above acts like CP A0, 0
334 ;; Thus, it is sufficient to CPC the high part to test A against 0
335 cpc A1, __zero_reg__
336 ;; Only proceed if A != 0
337 brne 2b
338 9:
339 ;; Move Result into place
340 mov C0, CC0
341 mov C1, CC1
342 ret
343 ENDF __mulhi3
344 #endif /* L_mulhi3 */
345
346 #undef A0
347 #undef A1
348 #undef B0
349 #undef BB0
350 #undef B1
351 #undef C0
352 #undef C1
353 #undef CC0
354 #undef CC1
355
356 \f
357 #define A0 22
358 #define A1 A0+1
359 #define A2 A0+2
360 #define A3 A0+3
361
362 #define B0 18
363 #define B1 B0+1
364 #define B2 B0+2
365 #define B3 B0+3
366
367 #define CC0 26
368 #define CC1 CC0+1
369 #define CC2 30
370 #define CC3 CC2+1
371
372 #define C0 22
373 #define C1 C0+1
374 #define C2 C0+2
375 #define C3 C0+3
376
377 /*******************************************************
378 Widening Multiplication 32 = 16 x 16 without MUL
379 *******************************************************/
380
381 #if defined (L_umulhisi3)
382 DEFUN __umulhisi3
383 wmov B0, 24
384 ;; Zero-extend B
385 clr B2
386 clr B3
387 ;; Zero-extend A
388 wmov A2, B2
389 XJMP __mulsi3
390 ENDF __umulhisi3
391 #endif /* L_umulhisi3 */
392
393 #if defined (L_mulhisi3)
394 DEFUN __mulhisi3
395 wmov B0, 24
396 ;; Sign-extend B
397 lsl r25
398 sbc B2, B2
399 mov B3, B2
400 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
401 ;; Sign-extend A
402 clr A2
403 sbrc A1, 7
404 com A2
405 mov A3, A2
406 XJMP __mulsi3
407 #else /* no __AVR_ERRATA_SKIP_JMP_CALL__ */
408 ;; Zero-extend A and __mulsi3 will run at least twice as fast
409 ;; compared to a sign-extended A.
410 clr A2
411 clr A3
412 sbrs A1, 7
413 XJMP __mulsi3
414 ;; If A < 0 then perform the B * 0xffff.... before the
415 ;; very multiplication by initializing the high part of the
416 ;; result CC with -B.
417 wmov CC2, A2
418 sub CC2, B0
419 sbc CC3, B1
420 XJMP __mulsi3_helper
421 #endif /* __AVR_ERRATA_SKIP_JMP_CALL__ */
422 ENDF __mulhisi3
423 #endif /* L_mulhisi3 */
424
425
426 /*******************************************************
427 Multiplication 32 x 32 without MUL
428 *******************************************************/
429
430 #if defined (L_mulsi3)
431 DEFUN __mulsi3
432 #if defined (__AVR_TINY__)
433 in r26, __SP_L__ ; safe to use X, as it is CC0/CC1
434 in r27, __SP_H__
435 subi r26, lo8(-3) ; Add 3 to point past return address
436 sbci r27, hi8(-3)
437 push B0 ; save callee saved regs
438 push B1
439 ld B0, X+ ; load from caller stack
440 ld B1, X+
441 ld B2, X+
442 ld B3, X
443 #endif
444 ;; Clear result
445 clr CC2
446 clr CC3
447 ;; FALLTHRU
448 ENDF __mulsi3
449
450 DEFUN __mulsi3_helper
451 clr CC0
452 clr CC1
453 rjmp 3f
454
455 1: ;; If bit n of A is set, then add B * 2^n to the result in CC
456 ;; CC += B
457 add CC0,B0 $ adc CC1,B1 $ adc CC2,B2 $ adc CC3,B3
458
459 2: ;; B <<= 1
460 lsl B0 $ rol B1 $ rol B2 $ rol B3
461
462 3: ;; A >>= 1: Carry = n-th bit of A
463 lsr A3 $ ror A2 $ ror A1 $ ror A0
464
465 brcs 1b
466 ;; Only continue if A != 0
467 sbci A1, 0
468 brne 2b
469 wsubi A2, 0
470 brne 2b
471
472 ;; All bits of A are consumed: Copy result to return register C
473 wmov C0, CC0
474 wmov C2, CC2
475 #if defined (__AVR_TINY__)
476 pop B1 ; restore callee saved regs
477 pop B0
478 #endif /* defined (__AVR_TINY__) */
479
480 ret
481 ENDF __mulsi3_helper
482 #endif /* L_mulsi3 */
483
484 #undef A0
485 #undef A1
486 #undef A2
487 #undef A3
488 #undef B0
489 #undef B1
490 #undef B2
491 #undef B3
492 #undef C0
493 #undef C1
494 #undef C2
495 #undef C3
496 #undef CC0
497 #undef CC1
498 #undef CC2
499 #undef CC3
500
501 #endif /* !defined (__AVR_HAVE_MUL__) */
502 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
503 \f
504 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
505 #if defined (__AVR_HAVE_MUL__)
506 #define A0 26
507 #define B0 18
508 #define C0 22
509
510 #define A1 A0+1
511
512 #define B1 B0+1
513 #define B2 B0+2
514 #define B3 B0+3
515
516 #define C1 C0+1
517 #define C2 C0+2
518 #define C3 C0+3
519
520 /*******************************************************
521 Widening Multiplication 32 = 16 x 16 with MUL
522 *******************************************************/
523
524 #if defined (L_mulhisi3)
525 ;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
526 ;;; C3:C0 = (signed long) A1:A0 * (signed long) B1:B0
527 ;;; Clobbers: __tmp_reg__
528 DEFUN __mulhisi3
529 XCALL __umulhisi3
530 ;; Sign-extend B
531 tst B1
532 brpl 1f
533 sub C2, A0
534 sbc C3, A1
535 1: ;; Sign-extend A
536 XJMP __usmulhisi3_tail
537 ENDF __mulhisi3
538 #endif /* L_mulhisi3 */
539
540 #if defined (L_usmulhisi3)
541 ;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
542 ;;; C3:C0 = (signed long) A1:A0 * (unsigned long) B1:B0
543 ;;; Clobbers: __tmp_reg__
544 DEFUN __usmulhisi3
545 XCALL __umulhisi3
546 ;; FALLTHRU
547 ENDF __usmulhisi3
548
549 DEFUN __usmulhisi3_tail
550 ;; Sign-extend A
551 sbrs A1, 7
552 ret
553 sub C2, B0
554 sbc C3, B1
555 ret
556 ENDF __usmulhisi3_tail
557 #endif /* L_usmulhisi3 */
558
559 #if defined (L_umulhisi3)
560 ;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
561 ;;; C3:C0 = (unsigned long) A1:A0 * (unsigned long) B1:B0
562 ;;; Clobbers: __tmp_reg__
563 DEFUN __umulhisi3
564 mul A0, B0
565 movw C0, r0
566 mul A1, B1
567 movw C2, r0
568 mul A0, B1
569 #ifdef __AVR_HAVE_JMP_CALL__
570 ;; This function is used by many other routines, often multiple times.
571 ;; Therefore, if the flash size is not too limited, avoid the RCALL
572 ;; and inverst 6 Bytes to speed things up.
573 add C1, r0
574 adc C2, r1
575 clr __zero_reg__
576 adc C3, __zero_reg__
577 #else
578 rcall 1f
579 #endif
580 mul A1, B0
581 1: add C1, r0
582 adc C2, r1
583 clr __zero_reg__
584 adc C3, __zero_reg__
585 ret
586 ENDF __umulhisi3
587 #endif /* L_umulhisi3 */
588
589 /*******************************************************
590 Widening Multiplication 32 = 16 x 32 with MUL
591 *******************************************************/
592
593 #if defined (L_mulshisi3)
594 ;;; R25:R22 = (signed long) R27:R26 * R21:R18
595 ;;; (C3:C0) = (signed long) A1:A0 * B3:B0
596 ;;; Clobbers: __tmp_reg__
597 DEFUN __mulshisi3
598 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
599 ;; Some cores have problem skipping 2-word instruction
600 tst A1
601 brmi __mulohisi3
602 #else
603 sbrs A1, 7
604 #endif /* __AVR_HAVE_JMP_CALL__ */
605 XJMP __muluhisi3
606 ;; FALLTHRU
607 ENDF __mulshisi3
608
609 ;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
610 ;;; (C3:C0) = (one-extended long) A1:A0 * B3:B0
611 ;;; Clobbers: __tmp_reg__
612 DEFUN __mulohisi3
613 XCALL __muluhisi3
614 ;; One-extend R27:R26 (A1:A0)
615 sub C2, B0
616 sbc C3, B1
617 ret
618 ENDF __mulohisi3
619 #endif /* L_mulshisi3 */
620
621 #if defined (L_muluhisi3)
622 ;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
623 ;;; (C3:C0) = (unsigned long) A1:A0 * B3:B0
624 ;;; Clobbers: __tmp_reg__
625 DEFUN __muluhisi3
626 XCALL __umulhisi3
627 mul A0, B3
628 add C3, r0
629 mul A1, B2
630 add C3, r0
631 mul A0, B2
632 add C2, r0
633 adc C3, r1
634 clr __zero_reg__
635 ret
636 ENDF __muluhisi3
637 #endif /* L_muluhisi3 */
638
639 /*******************************************************
640 Multiplication 32 x 32 with MUL
641 *******************************************************/
642
643 #if defined (L_mulsi3)
644 ;;; R25:R22 = R25:R22 * R21:R18
645 ;;; (C3:C0) = C3:C0 * B3:B0
646 ;;; Clobbers: R26, R27, __tmp_reg__
647 DEFUN __mulsi3
648 movw A0, C0
649 push C2
650 push C3
651 XCALL __muluhisi3
652 pop A1
653 pop A0
654 ;; A1:A0 now contains the high word of A
655 mul A0, B0
656 add C2, r0
657 adc C3, r1
658 mul A0, B1
659 add C3, r0
660 mul A1, B0
661 add C3, r0
662 clr __zero_reg__
663 ret
664 ENDF __mulsi3
665 #endif /* L_mulsi3 */
666
667 #undef A0
668 #undef A1
669
670 #undef B0
671 #undef B1
672 #undef B2
673 #undef B3
674
675 #undef C0
676 #undef C1
677 #undef C2
678 #undef C3
679
680 #endif /* __AVR_HAVE_MUL__ */
681
682 /*******************************************************
683 Multiplication 24 x 24 with MUL
684 *******************************************************/
685
686 #if defined (L_mulpsi3)
687
688 ;; A[0..2]: In: Multiplicand; Out: Product
689 #define A0 22
690 #define A1 A0+1
691 #define A2 A0+2
692
693 ;; B[0..2]: In: Multiplier
694 #define B0 18
695 #define B1 B0+1
696 #define B2 B0+2
697
698 #if defined (__AVR_HAVE_MUL__)
699
700 ;; C[0..2]: Expand Result
701 #define C0 22
702 #define C1 C0+1
703 #define C2 C0+2
704
705 ;; R24:R22 *= R20:R18
706 ;; Clobbers: r21, r25, r26, r27, __tmp_reg__
707
708 #define AA0 26
709 #define AA2 21
710
711 DEFUN __mulpsi3
712 wmov AA0, A0
713 mov AA2, A2
714 XCALL __umulhisi3
715 mul AA2, B0 $ add C2, r0
716 mul AA0, B2 $ add C2, r0
717 clr __zero_reg__
718 ret
719 ENDF __mulpsi3
720
721 #undef AA2
722 #undef AA0
723
724 #undef C2
725 #undef C1
726 #undef C0
727
728 #else /* !HAVE_MUL */
729 ;; C[0..2]: Expand Result
730 #if defined (__AVR_TINY__)
731 #define C0 16
732 #else
733 #define C0 0
734 #endif /* defined (__AVR_TINY__) */
735 #define C1 C0+1
736 #define C2 21
737
738 ;; R24:R22 *= R20:R18
739 ;; Clobbers: __tmp_reg__, R18, R19, R20, R21
740
741 DEFUN __mulpsi3
742 #if defined (__AVR_TINY__)
743 in r26,__SP_L__
744 in r27,__SP_H__
745 subi r26, lo8(-3) ; Add 3 to point past return address
746 sbci r27, hi8(-3)
747 push B0 ; save callee saved regs
748 push B1
749 ld B0,X+ ; load from caller stack
750 ld B1,X+
751 ld B2,X+
752 #endif /* defined (__AVR_TINY__) */
753
754 ;; C[] = 0
755 clr __tmp_reg__
756 clr C2
757
758 0: ;; Shift N-th Bit of B[] into Carry. N = 24 - Loop
759 LSR B2 $ ror B1 $ ror B0
760
761 ;; If the N-th Bit of B[] was set...
762 brcc 1f
763
764 ;; ...then add A[] * 2^N to the Result C[]
765 ADD C0,A0 $ adc C1,A1 $ adc C2,A2
766
767 1: ;; Multiply A[] by 2
768 LSL A0 $ rol A1 $ rol A2
769
770 ;; Loop until B[] is 0
771 subi B0,0 $ sbci B1,0 $ sbci B2,0
772 brne 0b
773
774 ;; Copy C[] to the return Register A[]
775 wmov A0, C0
776 mov A2, C2
777
778 clr __zero_reg__
779 #if defined (__AVR_TINY__)
780 pop B1
781 pop B0
782 #endif /* (__AVR_TINY__) */
783 ret
784 ENDF __mulpsi3
785
786 #undef C2
787 #undef C1
788 #undef C0
789
790 #endif /* HAVE_MUL */
791
792 #undef B2
793 #undef B1
794 #undef B0
795
796 #undef A2
797 #undef A1
798 #undef A0
799
800 #endif /* L_mulpsi3 */
801
802 #if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__)
803
804 ;; A[0..2]: In: Multiplicand
805 #define A0 22
806 #define A1 A0+1
807 #define A2 A0+2
808
809 ;; BB: In: Multiplier
810 #define BB 25
811
812 ;; C[0..2]: Result
813 #define C0 18
814 #define C1 C0+1
815 #define C2 C0+2
816
817 ;; C[] = A[] * sign_extend (BB)
818 DEFUN __mulsqipsi3
819 mul A0, BB
820 movw C0, r0
821 mul A2, BB
822 mov C2, r0
823 mul A1, BB
824 add C1, r0
825 adc C2, r1
826 clr __zero_reg__
827 sbrs BB, 7
828 ret
829 ;; One-extend BB
830 sub C1, A0
831 sbc C2, A1
832 ret
833 ENDF __mulsqipsi3
834
835 #undef C2
836 #undef C1
837 #undef C0
838
839 #undef BB
840
841 #undef A2
842 #undef A1
843 #undef A0
844
845 #endif /* L_mulsqipsi3 && HAVE_MUL */
846
847 /*******************************************************
848 Multiplication 64 x 64
849 *******************************************************/
850
851 ;; A[] = A[] * B[]
852
853 ;; A[0..7]: In: Multiplicand
854 ;; Out: Product
855 #define A0 18
856 #define A1 A0+1
857 #define A2 A0+2
858 #define A3 A0+3
859 #define A4 A0+4
860 #define A5 A0+5
861 #define A6 A0+6
862 #define A7 A0+7
863
864 ;; B[0..7]: In: Multiplier
865 #define B0 10
866 #define B1 B0+1
867 #define B2 B0+2
868 #define B3 B0+3
869 #define B4 B0+4
870 #define B5 B0+5
871 #define B6 B0+6
872 #define B7 B0+7
873
874 #ifndef __AVR_TINY__
875 #if defined (__AVR_HAVE_MUL__)
876 ;; Define C[] for convenience
877 ;; Notice that parts of C[] overlap A[] respective B[]
878 #define C0 16
879 #define C1 C0+1
880 #define C2 20
881 #define C3 C2+1
882 #define C4 28
883 #define C5 C4+1
884 #define C6 C4+2
885 #define C7 C4+3
886
887 #if defined (L_muldi3)
888
889 ;; A[] *= B[]
890 ;; R25:R18 *= R17:R10
891 ;; Ordinary ABI-Function
892
893 DEFUN __muldi3
894 push r29
895 push r28
896 push r17
897 push r16
898
899 ;; Counting in Words, we have to perform a 4 * 4 Multiplication
900
901 ;; 3 * 0 + 0 * 3
902 mul A7,B0 $ $ mov C7,r0
903 mul A0,B7 $ $ add C7,r0
904 mul A6,B1 $ $ add C7,r0
905 mul A6,B0 $ mov C6,r0 $ add C7,r1
906 mul B6,A1 $ $ add C7,r0
907 mul B6,A0 $ add C6,r0 $ adc C7,r1
908
909 ;; 1 * 2
910 mul A2,B4 $ add C6,r0 $ adc C7,r1
911 mul A3,B4 $ $ add C7,r0
912 mul A2,B5 $ $ add C7,r0
913
914 push A5
915 push A4
916 push B1
917 push B0
918 push A3
919 push A2
920
921 ;; 0 * 0
922 wmov 26, B0
923 XCALL __umulhisi3
924 wmov C0, 22
925 wmov C2, 24
926
927 ;; 0 * 2
928 wmov 26, B4
929 XCALL __umulhisi3 $ wmov C4,22 $ add C6,24 $ adc C7,25
930
931 wmov 26, B2
932 ;; 0 * 1
933 XCALL __muldi3_6
934
935 pop A0
936 pop A1
937 ;; 1 * 1
938 wmov 26, B2
939 XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
940
941 pop r26
942 pop r27
943 ;; 1 * 0
944 XCALL __muldi3_6
945
946 pop A0
947 pop A1
948 ;; 2 * 0
949 XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
950
951 ;; 2 * 1
952 wmov 26, B2
953 XCALL __umulhisi3 $ $ $ add C6,22 $ adc C7,23
954
955 ;; A[] = C[]
956 wmov A0, C0
957 ;; A2 = C2 already
958 wmov A4, C4
959 wmov A6, C6
960
961 pop r16
962 pop r17
963 pop r28
964 pop r29
965 ret
966 ENDF __muldi3
967 #endif /* L_muldi3 */
968
969 #if defined (L_muldi3_6)
970 ;; A helper for some 64-bit multiplications with MUL available
971 DEFUN __muldi3_6
972 __muldi3_6:
973 XCALL __umulhisi3
974 add C2, 22
975 adc C3, 23
976 adc C4, 24
977 adc C5, 25
978 brcc 0f
979 adiw C6, 1
980 0: ret
981 ENDF __muldi3_6
982 #endif /* L_muldi3_6 */
983
984 #undef C7
985 #undef C6
986 #undef C5
987 #undef C4
988 #undef C3
989 #undef C2
990 #undef C1
991 #undef C0
992
993 #else /* !HAVE_MUL */
994
995 #if defined (L_muldi3)
996
997 #define C0 26
998 #define C1 C0+1
999 #define C2 C0+2
1000 #define C3 C0+3
1001 #define C4 C0+4
1002 #define C5 C0+5
1003 #define C6 0
1004 #define C7 C6+1
1005
1006 #define Loop 9
1007
1008 ;; A[] *= B[]
1009 ;; R25:R18 *= R17:R10
1010 ;; Ordinary ABI-Function
1011
1012 DEFUN __muldi3
1013 push r29
1014 push r28
1015 push Loop
1016
1017 ldi C0, 64
1018 mov Loop, C0
1019
1020 ;; C[] = 0
1021 clr __tmp_reg__
1022 wmov C0, 0
1023 wmov C2, 0
1024 wmov C4, 0
1025
1026 0: ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[]
1027 ;; where N = 64 - Loop.
1028 ;; Notice that B[] = B[] >>> 64 so after this Routine has finished,
1029 ;; B[] will have its initial Value again.
1030 LSR B7 $ ror B6 $ ror B5 $ ror B4
1031 ror B3 $ ror B2 $ ror B1 $ ror B0
1032
1033 ;; If the N-th Bit of B[] was set then...
1034 brcc 1f
1035 ;; ...finish Rotation...
1036 ori B7, 1 << 7
1037
1038 ;; ...and add A[] * 2^N to the Result C[]
1039 ADD C0,A0 $ adc C1,A1 $ adc C2,A2 $ adc C3,A3
1040 adc C4,A4 $ adc C5,A5 $ adc C6,A6 $ adc C7,A7
1041
1042 1: ;; Multiply A[] by 2
1043 LSL A0 $ rol A1 $ rol A2 $ rol A3
1044 rol A4 $ rol A5 $ rol A6 $ rol A7
1045
1046 dec Loop
1047 brne 0b
1048
1049 ;; We expanded the Result in C[]
1050 ;; Copy Result to the Return Register A[]
1051 wmov A0, C0
1052 wmov A2, C2
1053 wmov A4, C4
1054 wmov A6, C6
1055
1056 clr __zero_reg__
1057 pop Loop
1058 pop r28
1059 pop r29
1060 ret
1061 ENDF __muldi3
1062
1063 #undef Loop
1064
1065 #undef C7
1066 #undef C6
1067 #undef C5
1068 #undef C4
1069 #undef C3
1070 #undef C2
1071 #undef C1
1072 #undef C0
1073
1074 #endif /* L_muldi3 */
1075 #endif /* HAVE_MUL */
1076 #endif /* if not __AVR_TINY__ */
1077
1078 #undef B7
1079 #undef B6
1080 #undef B5
1081 #undef B4
1082 #undef B3
1083 #undef B2
1084 #undef B1
1085 #undef B0
1086
1087 #undef A7
1088 #undef A6
1089 #undef A5
1090 #undef A4
1091 #undef A3
1092 #undef A2
1093 #undef A1
1094 #undef A0
1095
1096 /*******************************************************
1097 Widening Multiplication 64 = 32 x 32 with MUL
1098 *******************************************************/
1099
1100 #if defined (__AVR_HAVE_MUL__)
1101 #define A0 r22
1102 #define A1 r23
1103 #define A2 r24
1104 #define A3 r25
1105
1106 #define B0 r18
1107 #define B1 r19
1108 #define B2 r20
1109 #define B3 r21
1110
1111 #define C0 18
1112 #define C1 C0+1
1113 #define C2 20
1114 #define C3 C2+1
1115 #define C4 28
1116 #define C5 C4+1
1117 #define C6 C4+2
1118 #define C7 C4+3
1119
1120 #if defined (L_umulsidi3)
1121
1122 ;; Unsigned widening 64 = 32 * 32 Multiplication with MUL
1123
1124 ;; R18[8] = R22[4] * R18[4]
1125 ;;
1126 ;; Ordinary ABI Function, but additionally sets
1127 ;; X = R20[2] = B2[2]
1128 ;; Z = R22[2] = A0[2]
1129 DEFUN __umulsidi3
1130 clt
1131 ;; FALLTHRU
1132 ENDF __umulsidi3
1133 ;; T = sign (A)
1134 DEFUN __umulsidi3_helper
1135 push 29 $ push 28 ; Y
1136 wmov 30, A2
1137 ;; Counting in Words, we have to perform 4 Multiplications
1138 ;; 0 * 0
1139 wmov 26, A0
1140 XCALL __umulhisi3
1141 push 23 $ push 22 ; C0
1142 wmov 28, B0
1143 wmov 18, B2
1144 wmov C2, 24
1145 push 27 $ push 26 ; A0
1146 push 19 $ push 18 ; B2
1147 ;;
1148 ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
1149 ;; B2 C2 -- -- -- B0 A2
1150 ;; 1 * 1
1151 wmov 26, 30 ; A2
1152 XCALL __umulhisi3
1153 ;; Sign-extend A. T holds the sign of A
1154 brtc 0f
1155 ;; Subtract B from the high part of the result
1156 sub 22, 28
1157 sbc 23, 29
1158 sbc 24, 18
1159 sbc 25, 19
1160 0: wmov 18, 28 ;; B0
1161 wmov C4, 22
1162 wmov C6, 24
1163 ;;
1164 ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
1165 ;; B0 C2 -- -- A2 C4 C6
1166 ;;
1167 ;; 1 * 0
1168 XCALL __muldi3_6
1169 ;; 0 * 1
1170 pop 26 $ pop 27 ;; B2
1171 pop 18 $ pop 19 ;; A0
1172 XCALL __muldi3_6
1173
1174 ;; Move result C into place and save A0 in Z
1175 wmov 22, C4
1176 wmov 24, C6
1177 wmov 30, 18 ; A0
1178 pop C0 $ pop C1
1179
1180 ;; Epilogue
1181 pop 28 $ pop 29 ;; Y
1182 ret
1183 ENDF __umulsidi3_helper
1184 #endif /* L_umulsidi3 */
1185
1186
1187 #if defined (L_mulsidi3)
1188
1189 ;; Signed widening 64 = 32 * 32 Multiplication
1190 ;;
1191 ;; R18[8] = R22[4] * R18[4]
1192 ;; Ordinary ABI Function
1193 DEFUN __mulsidi3
1194 bst A3, 7
1195 sbrs B3, 7 ; Enhanced core has no skip bug
1196 XJMP __umulsidi3_helper
1197
1198 ;; B needs sign-extension
1199 push A3
1200 push A2
1201 XCALL __umulsidi3_helper
1202 ;; A0 survived in Z
1203 sub r22, r30
1204 sbc r23, r31
1205 pop r26
1206 pop r27
1207 sbc r24, r26
1208 sbc r25, r27
1209 ret
1210 ENDF __mulsidi3
1211 #endif /* L_mulsidi3 */
1212
1213 #undef A0
1214 #undef A1
1215 #undef A2
1216 #undef A3
1217 #undef B0
1218 #undef B1
1219 #undef B2
1220 #undef B3
1221 #undef C0
1222 #undef C1
1223 #undef C2
1224 #undef C3
1225 #undef C4
1226 #undef C5
1227 #undef C6
1228 #undef C7
1229 #endif /* HAVE_MUL */
1230
1231 /**********************************************************
1232 Widening Multiplication 64 = 32 x 32 without MUL
1233 **********************************************************/
1234 #ifndef __AVR_TINY__ /* if not __AVR_TINY__ */
1235 #if defined (L_mulsidi3) && !defined (__AVR_HAVE_MUL__)
1236 #define A0 18
1237 #define A1 A0+1
1238 #define A2 A0+2
1239 #define A3 A0+3
1240 #define A4 A0+4
1241 #define A5 A0+5
1242 #define A6 A0+6
1243 #define A7 A0+7
1244
1245 #define B0 10
1246 #define B1 B0+1
1247 #define B2 B0+2
1248 #define B3 B0+3
1249 #define B4 B0+4
1250 #define B5 B0+5
1251 #define B6 B0+6
1252 #define B7 B0+7
1253
1254 #define AA0 22
1255 #define AA1 AA0+1
1256 #define AA2 AA0+2
1257 #define AA3 AA0+3
1258
1259 #define BB0 18
1260 #define BB1 BB0+1
1261 #define BB2 BB0+2
1262 #define BB3 BB0+3
1263
1264 #define Mask r30
1265
1266 ;; Signed / Unsigned widening 64 = 32 * 32 Multiplication without MUL
1267 ;;
1268 ;; R18[8] = R22[4] * R18[4]
1269 ;; Ordinary ABI Function
1270 DEFUN __mulsidi3
1271 set
1272 skip
1273 ;; FALLTHRU
1274 ENDF __mulsidi3
1275
1276 DEFUN __umulsidi3
1277 clt ; skipped
1278 ;; Save 10 Registers: R10..R17, R28, R29
1279 do_prologue_saves 10
1280 ldi Mask, 0xff
1281 bld Mask, 7
1282 ;; Move B into place...
1283 wmov B0, BB0
1284 wmov B2, BB2
1285 ;; ...and extend it
1286 and BB3, Mask
1287 lsl BB3
1288 sbc B4, B4
1289 mov B5, B4
1290 wmov B6, B4
1291 ;; Move A into place...
1292 wmov A0, AA0
1293 wmov A2, AA2
1294 ;; ...and extend it
1295 and AA3, Mask
1296 lsl AA3
1297 sbc A4, A4
1298 mov A5, A4
1299 wmov A6, A4
1300 XCALL __muldi3
1301 do_epilogue_restores 10
1302 ENDF __umulsidi3
1303
1304 #undef A0
1305 #undef A1
1306 #undef A2
1307 #undef A3
1308 #undef A4
1309 #undef A5
1310 #undef A6
1311 #undef A7
1312 #undef B0
1313 #undef B1
1314 #undef B2
1315 #undef B3
1316 #undef B4
1317 #undef B5
1318 #undef B6
1319 #undef B7
1320 #undef AA0
1321 #undef AA1
1322 #undef AA2
1323 #undef AA3
1324 #undef BB0
1325 #undef BB1
1326 #undef BB2
1327 #undef BB3
1328 #undef Mask
1329 #endif /* L_mulsidi3 && !HAVE_MUL */
1330 #endif /* if not __AVR_TINY__ */
1331 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1332
1333 \f
1334 .section .text.libgcc.div, "ax", @progbits
1335
1336 /*******************************************************
1337 Division 8 / 8 => (result + remainder)
1338 *******************************************************/
1339 #define r_rem r25 /* remainder */
1340 #define r_arg1 r24 /* dividend, quotient */
1341 #define r_arg2 r22 /* divisor */
1342 #define r_cnt r23 /* loop count */
1343
1344 #if defined (L_udivmodqi4)
1345 DEFUN __udivmodqi4
1346 sub r_rem,r_rem ; clear remainder and carry
1347 ldi r_cnt,9 ; init loop counter
1348 rjmp __udivmodqi4_ep ; jump to entry point
1349 __udivmodqi4_loop:
1350 rol r_rem ; shift dividend into remainder
1351 cp r_rem,r_arg2 ; compare remainder & divisor
1352 brcs __udivmodqi4_ep ; remainder <= divisor
1353 sub r_rem,r_arg2 ; restore remainder
1354 __udivmodqi4_ep:
1355 rol r_arg1 ; shift dividend (with CARRY)
1356 dec r_cnt ; decrement loop counter
1357 brne __udivmodqi4_loop
1358 com r_arg1 ; complement result
1359 ; because C flag was complemented in loop
1360 ret
1361 ENDF __udivmodqi4
1362 #endif /* defined (L_udivmodqi4) */
1363
1364 #if defined (L_divmodqi4)
1365 DEFUN __divmodqi4
1366 bst r_arg1,7 ; store sign of dividend
1367 mov __tmp_reg__,r_arg1
1368 eor __tmp_reg__,r_arg2; r0.7 is sign of result
1369 sbrc r_arg1,7
1370 neg r_arg1 ; dividend negative : negate
1371 sbrc r_arg2,7
1372 neg r_arg2 ; divisor negative : negate
1373 XCALL __udivmodqi4 ; do the unsigned div/mod
1374 brtc __divmodqi4_1
1375 neg r_rem ; correct remainder sign
1376 __divmodqi4_1:
1377 sbrc __tmp_reg__,7
1378 neg r_arg1 ; correct result sign
1379 __divmodqi4_exit:
1380 ret
1381 ENDF __divmodqi4
1382 #endif /* defined (L_divmodqi4) */
1383
1384 #undef r_rem
1385 #undef r_arg1
1386 #undef r_arg2
1387 #undef r_cnt
1388
1389
1390 /*******************************************************
1391 Division 16 / 16 => (result + remainder)
1392 *******************************************************/
1393 #define r_remL r26 /* remainder Low */
1394 #define r_remH r27 /* remainder High */
1395
1396 /* return: remainder */
1397 #define r_arg1L r24 /* dividend Low */
1398 #define r_arg1H r25 /* dividend High */
1399
1400 /* return: quotient */
1401 #define r_arg2L r22 /* divisor Low */
1402 #define r_arg2H r23 /* divisor High */
1403
1404 #define r_cnt r21 /* loop count */
1405
1406 #if defined (L_udivmodhi4)
1407 DEFUN __udivmodhi4
1408 sub r_remL,r_remL
1409 sub r_remH,r_remH ; clear remainder and carry
1410 ldi r_cnt,17 ; init loop counter
1411 rjmp __udivmodhi4_ep ; jump to entry point
1412 __udivmodhi4_loop:
1413 rol r_remL ; shift dividend into remainder
1414 rol r_remH
1415 cp r_remL,r_arg2L ; compare remainder & divisor
1416 cpc r_remH,r_arg2H
1417 brcs __udivmodhi4_ep ; remainder < divisor
1418 sub r_remL,r_arg2L ; restore remainder
1419 sbc r_remH,r_arg2H
1420 __udivmodhi4_ep:
1421 rol r_arg1L ; shift dividend (with CARRY)
1422 rol r_arg1H
1423 dec r_cnt ; decrement loop counter
1424 brne __udivmodhi4_loop
1425 com r_arg1L
1426 com r_arg1H
1427 ; div/mod results to return registers, as for the div() function
1428 mov_l r_arg2L, r_arg1L ; quotient
1429 mov_h r_arg2H, r_arg1H
1430 mov_l r_arg1L, r_remL ; remainder
1431 mov_h r_arg1H, r_remH
1432 ret
1433 ENDF __udivmodhi4
1434 #endif /* defined (L_udivmodhi4) */
1435
1436 #if defined (L_divmodhi4)
1437 DEFUN __divmodhi4
1438 .global _div
1439 _div:
1440 bst r_arg1H,7 ; store sign of dividend
1441 mov __tmp_reg__,r_arg2H
1442 brtc 0f
1443 com __tmp_reg__ ; r0.7 is sign of result
1444 rcall __divmodhi4_neg1 ; dividend negative: negate
1445 0:
1446 sbrc r_arg2H,7
1447 rcall __divmodhi4_neg2 ; divisor negative: negate
1448 XCALL __udivmodhi4 ; do the unsigned div/mod
1449 sbrc __tmp_reg__,7
1450 rcall __divmodhi4_neg2 ; correct remainder sign
1451 brtc __divmodhi4_exit
1452 __divmodhi4_neg1:
1453 ;; correct dividend/remainder sign
1454 com r_arg1H
1455 neg r_arg1L
1456 sbci r_arg1H,0xff
1457 ret
1458 __divmodhi4_neg2:
1459 ;; correct divisor/result sign
1460 com r_arg2H
1461 neg r_arg2L
1462 sbci r_arg2H,0xff
1463 __divmodhi4_exit:
1464 ret
1465 ENDF __divmodhi4
1466 #endif /* defined (L_divmodhi4) */
1467
1468 #undef r_remH
1469 #undef r_remL
1470
1471 #undef r_arg1H
1472 #undef r_arg1L
1473
1474 #undef r_arg2H
1475 #undef r_arg2L
1476
1477 #undef r_cnt
1478
1479 /*******************************************************
1480 Division 24 / 24 => (result + remainder)
1481 *******************************************************/
1482
1483 ;; A[0..2]: In: Dividend; Out: Quotient
1484 #define A0 22
1485 #define A1 A0+1
1486 #define A2 A0+2
1487
1488 ;; B[0..2]: In: Divisor; Out: Remainder
1489 #define B0 18
1490 #define B1 B0+1
1491 #define B2 B0+2
1492
1493 ;; C[0..2]: Expand remainder
1494 #define C0 __zero_reg__
1495 #define C1 26
1496 #define C2 25
1497
1498 ;; Loop counter
1499 #define r_cnt 21
1500
1501 #if defined (L_udivmodpsi4)
1502 ;; R24:R22 = R24:R24 udiv R20:R18
1503 ;; R20:R18 = R24:R22 umod R20:R18
1504 ;; Clobbers: R21, R25, R26
1505
1506 DEFUN __udivmodpsi4
1507 ; init loop counter
1508 ldi r_cnt, 24+1
1509 ; Clear remainder and carry. C0 is already 0
1510 clr C1
1511 sub C2, C2
1512 ; jump to entry point
1513 rjmp __udivmodpsi4_start
1514 __udivmodpsi4_loop:
1515 ; shift dividend into remainder
1516 rol C0
1517 rol C1
1518 rol C2
1519 ; compare remainder & divisor
1520 cp C0, B0
1521 cpc C1, B1
1522 cpc C2, B2
1523 brcs __udivmodpsi4_start ; remainder <= divisor
1524 sub C0, B0 ; restore remainder
1525 sbc C1, B1
1526 sbc C2, B2
1527 __udivmodpsi4_start:
1528 ; shift dividend (with CARRY)
1529 rol A0
1530 rol A1
1531 rol A2
1532 ; decrement loop counter
1533 dec r_cnt
1534 brne __udivmodpsi4_loop
1535 com A0
1536 com A1
1537 com A2
1538 ; div/mod results to return registers
1539 ; remainder
1540 mov B0, C0
1541 mov B1, C1
1542 mov B2, C2
1543 clr __zero_reg__ ; C0
1544 ret
1545 ENDF __udivmodpsi4
1546 #endif /* defined (L_udivmodpsi4) */
1547
1548 #if defined (L_divmodpsi4)
1549 ;; R24:R22 = R24:R22 div R20:R18
1550 ;; R20:R18 = R24:R22 mod R20:R18
1551 ;; Clobbers: T, __tmp_reg__, R21, R25, R26
1552
1553 DEFUN __divmodpsi4
1554 ; R0.7 will contain the sign of the result:
1555 ; R0.7 = A.sign ^ B.sign
1556 mov __tmp_reg__, B2
1557 ; T-flag = sign of dividend
1558 bst A2, 7
1559 brtc 0f
1560 com __tmp_reg__
1561 ; Adjust dividend's sign
1562 rcall __divmodpsi4_negA
1563 0:
1564 ; Adjust divisor's sign
1565 sbrc B2, 7
1566 rcall __divmodpsi4_negB
1567
1568 ; Do the unsigned div/mod
1569 XCALL __udivmodpsi4
1570
1571 ; Adjust quotient's sign
1572 sbrc __tmp_reg__, 7
1573 rcall __divmodpsi4_negA
1574
1575 ; Adjust remainder's sign
1576 brtc __divmodpsi4_end
1577
1578 __divmodpsi4_negB:
1579 ; Correct divisor/remainder sign
1580 com B2
1581 com B1
1582 neg B0
1583 sbci B1, -1
1584 sbci B2, -1
1585 ret
1586
1587 ; Correct dividend/quotient sign
1588 __divmodpsi4_negA:
1589 com A2
1590 com A1
1591 neg A0
1592 sbci A1, -1
1593 sbci A2, -1
1594 __divmodpsi4_end:
1595 ret
1596
1597 ENDF __divmodpsi4
1598 #endif /* defined (L_divmodpsi4) */
1599
1600 #undef A0
1601 #undef A1
1602 #undef A2
1603
1604 #undef B0
1605 #undef B1
1606 #undef B2
1607
1608 #undef C0
1609 #undef C1
1610 #undef C2
1611
1612 #undef r_cnt
1613
1614 /*******************************************************
1615 Division 32 / 32 => (result + remainder)
1616 *******************************************************/
1617 #define r_remHH r31 /* remainder High */
1618 #define r_remHL r30
1619 #define r_remH r27
1620 #define r_remL r26 /* remainder Low */
1621
1622 /* return: remainder */
1623 #define r_arg1HH r25 /* dividend High */
1624 #define r_arg1HL r24
1625 #define r_arg1H r23
1626 #define r_arg1L r22 /* dividend Low */
1627
1628 /* return: quotient */
1629 #define r_arg2HH r21 /* divisor High */
1630 #define r_arg2HL r20
1631 #define r_arg2H r19
1632 #define r_arg2L r18 /* divisor Low */
1633
1634 #define r_cnt __zero_reg__ /* loop count (0 after the loop!) */
1635
1636 #if defined (L_udivmodsi4)
1637 DEFUN __udivmodsi4
1638 ldi r_remL, 33 ; init loop counter
1639 mov r_cnt, r_remL
1640 sub r_remL,r_remL
1641 sub r_remH,r_remH ; clear remainder and carry
1642 mov_l r_remHL, r_remL
1643 mov_h r_remHH, r_remH
1644 rjmp __udivmodsi4_ep ; jump to entry point
1645 __udivmodsi4_loop:
1646 rol r_remL ; shift dividend into remainder
1647 rol r_remH
1648 rol r_remHL
1649 rol r_remHH
1650 cp r_remL,r_arg2L ; compare remainder & divisor
1651 cpc r_remH,r_arg2H
1652 cpc r_remHL,r_arg2HL
1653 cpc r_remHH,r_arg2HH
1654 brcs __udivmodsi4_ep ; remainder <= divisor
1655 sub r_remL,r_arg2L ; restore remainder
1656 sbc r_remH,r_arg2H
1657 sbc r_remHL,r_arg2HL
1658 sbc r_remHH,r_arg2HH
1659 __udivmodsi4_ep:
1660 rol r_arg1L ; shift dividend (with CARRY)
1661 rol r_arg1H
1662 rol r_arg1HL
1663 rol r_arg1HH
1664 dec r_cnt ; decrement loop counter
1665 brne __udivmodsi4_loop
1666 ; __zero_reg__ now restored (r_cnt == 0)
1667 com r_arg1L
1668 com r_arg1H
1669 com r_arg1HL
1670 com r_arg1HH
1671 ; div/mod results to return registers, as for the ldiv() function
1672 mov_l r_arg2L, r_arg1L ; quotient
1673 mov_h r_arg2H, r_arg1H
1674 mov_l r_arg2HL, r_arg1HL
1675 mov_h r_arg2HH, r_arg1HH
1676 mov_l r_arg1L, r_remL ; remainder
1677 mov_h r_arg1H, r_remH
1678 mov_l r_arg1HL, r_remHL
1679 mov_h r_arg1HH, r_remHH
1680 ret
1681 ENDF __udivmodsi4
1682 #endif /* defined (L_udivmodsi4) */
1683
1684 #if defined (L_divmodsi4)
1685 DEFUN __divmodsi4
1686 mov __tmp_reg__,r_arg2HH
1687 bst r_arg1HH,7 ; store sign of dividend
1688 brtc 0f
1689 com __tmp_reg__ ; r0.7 is sign of result
1690 XCALL __negsi2 ; dividend negative: negate
1691 0:
1692 sbrc r_arg2HH,7
1693 rcall __divmodsi4_neg2 ; divisor negative: negate
1694 XCALL __udivmodsi4 ; do the unsigned div/mod
1695 sbrc __tmp_reg__, 7 ; correct quotient sign
1696 rcall __divmodsi4_neg2
1697 brtc __divmodsi4_exit ; correct remainder sign
1698 XJMP __negsi2
1699 __divmodsi4_neg2:
1700 ;; correct divisor/quotient sign
1701 com r_arg2HH
1702 com r_arg2HL
1703 com r_arg2H
1704 neg r_arg2L
1705 sbci r_arg2H,0xff
1706 sbci r_arg2HL,0xff
1707 sbci r_arg2HH,0xff
1708 __divmodsi4_exit:
1709 ret
1710 ENDF __divmodsi4
1711 #endif /* defined (L_divmodsi4) */
1712
1713 #if defined (L_negsi2)
1714 ;; (set (reg:SI 22)
1715 ;; (neg:SI (reg:SI 22)))
1716 ;; Sets the V flag for signed overflow tests
1717 DEFUN __negsi2
1718 NEG4 22
1719 ret
1720 ENDF __negsi2
1721 #endif /* L_negsi2 */
1722
1723 #undef r_remHH
1724 #undef r_remHL
1725 #undef r_remH
1726 #undef r_remL
1727 #undef r_arg1HH
1728 #undef r_arg1HL
1729 #undef r_arg1H
1730 #undef r_arg1L
1731 #undef r_arg2HH
1732 #undef r_arg2HL
1733 #undef r_arg2H
1734 #undef r_arg2L
1735 #undef r_cnt
1736
1737 /* *di routines use registers below R19 and won't work with tiny arch
1738 right now. */
1739
1740 #if !defined (__AVR_TINY__)
1741 /*******************************************************
1742 Division 64 / 64
1743 Modulo 64 % 64
1744 *******************************************************/
1745
1746 ;; Use Speed-optimized Version on "big" Devices, i.e. Devices with
1747 ;; at least 16k of Program Memory. For smaller Devices, depend
1748 ;; on MOVW and SP Size. There is a Connexion between SP Size and
1749 ;; Flash Size so that SP Size can be used to test for Flash Size.
1750
1751 #if defined (__AVR_HAVE_JMP_CALL__)
1752 # define SPEED_DIV 8
1753 #elif defined (__AVR_HAVE_MOVW__) && defined (__AVR_HAVE_SPH__)
1754 # define SPEED_DIV 16
1755 #else
1756 # define SPEED_DIV 0
1757 #endif
1758
1759 ;; A[0..7]: In: Dividend;
1760 ;; Out: Quotient (T = 0)
1761 ;; Out: Remainder (T = 1)
1762 #define A0 18
1763 #define A1 A0+1
1764 #define A2 A0+2
1765 #define A3 A0+3
1766 #define A4 A0+4
1767 #define A5 A0+5
1768 #define A6 A0+6
1769 #define A7 A0+7
1770
1771 ;; B[0..7]: In: Divisor; Out: Clobber
1772 #define B0 10
1773 #define B1 B0+1
1774 #define B2 B0+2
1775 #define B3 B0+3
1776 #define B4 B0+4
1777 #define B5 B0+5
1778 #define B6 B0+6
1779 #define B7 B0+7
1780
1781 ;; C[0..7]: Expand remainder; Out: Remainder (unused)
1782 #define C0 8
1783 #define C1 C0+1
1784 #define C2 30
1785 #define C3 C2+1
1786 #define C4 28
1787 #define C5 C4+1
1788 #define C6 26
1789 #define C7 C6+1
1790
1791 ;; Holds Signs during Division Routine
1792 #define SS __tmp_reg__
1793
1794 ;; Bit-Counter in Division Routine
1795 #define R_cnt __zero_reg__
1796
1797 ;; Scratch Register for Negation
1798 #define NN r31
1799
1800 #if defined (L_udivdi3)
1801
1802 ;; R25:R18 = R24:R18 umod R17:R10
1803 ;; Ordinary ABI-Function
1804
1805 DEFUN __umoddi3
1806 set
1807 rjmp __udivdi3_umoddi3
1808 ENDF __umoddi3
1809
1810 ;; R25:R18 = R24:R18 udiv R17:R10
1811 ;; Ordinary ABI-Function
1812
1813 DEFUN __udivdi3
1814 clt
1815 ENDF __udivdi3
1816
1817 DEFUN __udivdi3_umoddi3
1818 push C0
1819 push C1
1820 push C4
1821 push C5
1822 XCALL __udivmod64
1823 pop C5
1824 pop C4
1825 pop C1
1826 pop C0
1827 ret
1828 ENDF __udivdi3_umoddi3
1829 #endif /* L_udivdi3 */
1830
1831 #if defined (L_udivmod64)
1832
1833 ;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation
1834 ;; No Registers saved/restored; the Callers will take Care.
1835 ;; Preserves B[] and T-flag
1836 ;; T = 0: Compute Quotient in A[]
1837 ;; T = 1: Compute Remainder in A[] and shift SS one Bit left
1838
1839 DEFUN __udivmod64
1840
1841 ;; Clear Remainder (C6, C7 will follow)
1842 clr C0
1843 clr C1
1844 wmov C2, C0
1845 wmov C4, C0
1846 ldi C7, 64
1847
1848 #if SPEED_DIV == 0 || SPEED_DIV == 16
1849 ;; Initialize Loop-Counter
1850 mov R_cnt, C7
1851 wmov C6, C0
1852 #endif /* SPEED_DIV */
1853
1854 #if SPEED_DIV == 8
1855
1856 push A7
1857 clr C6
1858
1859 1: ;; Compare shifted Devidend against Divisor
1860 ;; If -- even after Shifting -- it is smaller...
1861 CP A7,B0 $ cpc C0,B1 $ cpc C1,B2 $ cpc C2,B3
1862 cpc C3,B4 $ cpc C4,B5 $ cpc C5,B6 $ cpc C6,B7
1863 brcc 2f
1864
1865 ;; ...then we can subtract it. Thus, it is legal to shift left
1866 $ mov C6,C5 $ mov C5,C4 $ mov C4,C3
1867 mov C3,C2 $ mov C2,C1 $ mov C1,C0 $ mov C0,A7
1868 mov A7,A6 $ mov A6,A5 $ mov A5,A4 $ mov A4,A3
1869 mov A3,A2 $ mov A2,A1 $ mov A1,A0 $ clr A0
1870
1871 ;; 8 Bits are done
1872 subi C7, 8
1873 brne 1b
1874
1875 ;; Shifted 64 Bits: A7 has traveled to C7
1876 pop C7
1877 ;; Divisor is greater than Dividend. We have:
1878 ;; A[] % B[] = A[]
1879 ;; A[] / B[] = 0
1880 ;; Thus, we can return immediately
1881 rjmp 5f
1882
1883 2: ;; Initialze Bit-Counter with Number of Bits still to be performed
1884 mov R_cnt, C7
1885
1886 ;; Push of A7 is not needed because C7 is still 0
1887 pop C7
1888 clr C7
1889
1890 #elif SPEED_DIV == 16
1891
1892 ;; Compare shifted Dividend against Divisor
1893 cp A7, B3
1894 cpc C0, B4
1895 cpc C1, B5
1896 cpc C2, B6
1897 cpc C3, B7
1898 brcc 2f
1899
1900 ;; Divisor is greater than shifted Dividen: We can shift the Dividend
1901 ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk
1902 wmov C2,A6 $ wmov C0,A4
1903 wmov A6,A2 $ wmov A4,A0
1904 wmov A2,C6 $ wmov A0,C4
1905
1906 ;; Set Bit Counter to 32
1907 lsr R_cnt
1908 2:
1909 #elif SPEED_DIV
1910 #error SPEED_DIV = ?
1911 #endif /* SPEED_DIV */
1912
1913 ;; The very Division + Remainder Routine
1914
1915 3: ;; Left-shift Dividend...
1916 lsl A0 $ rol A1 $ rol A2 $ rol A3
1917 rol A4 $ rol A5 $ rol A6 $ rol A7
1918
1919 ;; ...into Remainder
1920 rol C0 $ rol C1 $ rol C2 $ rol C3
1921 rol C4 $ rol C5 $ rol C6 $ rol C7
1922
1923 ;; Compare Remainder and Divisor
1924 CP C0,B0 $ cpc C1,B1 $ cpc C2,B2 $ cpc C3,B3
1925 cpc C4,B4 $ cpc C5,B5 $ cpc C6,B6 $ cpc C7,B7
1926
1927 brcs 4f
1928
1929 ;; Divisor fits into Remainder: Subtract it from Remainder...
1930 SUB C0,B0 $ sbc C1,B1 $ sbc C2,B2 $ sbc C3,B3
1931 sbc C4,B4 $ sbc C5,B5 $ sbc C6,B6 $ sbc C7,B7
1932
1933 ;; ...and set according Bit in the upcoming Quotient
1934 ;; The Bit will travel to its final Position
1935 ori A0, 1
1936
1937 4: ;; This Bit is done
1938 dec R_cnt
1939 brne 3b
1940 ;; __zero_reg__ is 0 again
1941
1942 ;; T = 0: We are fine with the Quotient in A[]
1943 ;; T = 1: Copy Remainder to A[]
1944 5: brtc 6f
1945 wmov A0, C0
1946 wmov A2, C2
1947 wmov A4, C4
1948 wmov A6, C6
1949 ;; Move the Sign of the Result to SS.7
1950 lsl SS
1951
1952 6: ret
1953
1954 ENDF __udivmod64
1955 #endif /* L_udivmod64 */
1956
1957
1958 #if defined (L_divdi3)
1959
1960 ;; R25:R18 = R24:R18 mod R17:R10
1961 ;; Ordinary ABI-Function
1962
1963 DEFUN __moddi3
1964 set
1965 rjmp __divdi3_moddi3
1966 ENDF __moddi3
1967
1968 ;; R25:R18 = R24:R18 div R17:R10
1969 ;; Ordinary ABI-Function
1970
1971 DEFUN __divdi3
1972 clt
1973 ENDF __divdi3
1974
1975 DEFUN __divdi3_moddi3
1976 #if SPEED_DIV
1977 mov r31, A7
1978 or r31, B7
1979 brmi 0f
1980 ;; Both Signs are 0: the following Complexitiy is not needed
1981 XJMP __udivdi3_umoddi3
1982 #endif /* SPEED_DIV */
1983
1984 0: ;; The Prologue
1985 ;; Save 12 Registers: Y, 17...8
1986 ;; No Frame needed
1987 do_prologue_saves 12
1988
1989 ;; SS.7 will contain the Sign of the Quotient (A.sign * B.sign)
1990 ;; SS.6 will contain the Sign of the Remainder (A.sign)
1991 mov SS, A7
1992 asr SS
1993 ;; Adjust Dividend's Sign as needed
1994 #if SPEED_DIV
1995 ;; Compiling for Speed we know that at least one Sign must be < 0
1996 ;; Thus, if A[] >= 0 then we know B[] < 0
1997 brpl 22f
1998 #else
1999 brpl 21f
2000 #endif /* SPEED_DIV */
2001
2002 XCALL __negdi2
2003
2004 ;; Adjust Divisor's Sign and SS.7 as needed
2005 21: tst B7
2006 brpl 3f
2007 22: ldi NN, 1 << 7
2008 eor SS, NN
2009
2010 ldi NN, -1
2011 com B4 $ com B5 $ com B6 $ com B7
2012 $ com B1 $ com B2 $ com B3
2013 NEG B0
2014 $ sbc B1,NN $ sbc B2,NN $ sbc B3,NN
2015 sbc B4,NN $ sbc B5,NN $ sbc B6,NN $ sbc B7,NN
2016
2017 3: ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag)
2018 XCALL __udivmod64
2019
2020 ;; Adjust Result's Sign
2021 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2022 tst SS
2023 brpl 4f
2024 #else
2025 sbrc SS, 7
2026 #endif /* __AVR_HAVE_JMP_CALL__ */
2027 XCALL __negdi2
2028
2029 4: ;; Epilogue: Restore 12 Registers and return
2030 do_epilogue_restores 12
2031
2032 ENDF __divdi3_moddi3
2033
2034 #endif /* L_divdi3 */
2035
2036 #undef R_cnt
2037 #undef SS
2038 #undef NN
2039
2040 .section .text.libgcc, "ax", @progbits
2041
2042 #define TT __tmp_reg__
2043
2044 #if defined (L_adddi3)
2045 ;; (set (reg:DI 18)
2046 ;; (plus:DI (reg:DI 18)
2047 ;; (reg:DI 10)))
2048 ;; Sets the V flag for signed overflow tests
2049 ;; Sets the C flag for unsigned overflow tests
2050 DEFUN __adddi3
2051 ADD A0,B0 $ adc A1,B1 $ adc A2,B2 $ adc A3,B3
2052 adc A4,B4 $ adc A5,B5 $ adc A6,B6 $ adc A7,B7
2053 ret
2054 ENDF __adddi3
2055 #endif /* L_adddi3 */
2056
2057 #if defined (L_adddi3_s8)
2058 ;; (set (reg:DI 18)
2059 ;; (plus:DI (reg:DI 18)
2060 ;; (sign_extend:SI (reg:QI 26))))
2061 ;; Sets the V flag for signed overflow tests
2062 ;; Sets the C flag for unsigned overflow tests provided 0 <= R26 < 128
2063 DEFUN __adddi3_s8
2064 clr TT
2065 sbrc r26, 7
2066 com TT
2067 ADD A0,r26 $ adc A1,TT $ adc A2,TT $ adc A3,TT
2068 adc A4,TT $ adc A5,TT $ adc A6,TT $ adc A7,TT
2069 ret
2070 ENDF __adddi3_s8
2071 #endif /* L_adddi3_s8 */
2072
2073 #if defined (L_subdi3)
2074 ;; (set (reg:DI 18)
2075 ;; (minus:DI (reg:DI 18)
2076 ;; (reg:DI 10)))
2077 ;; Sets the V flag for signed overflow tests
2078 ;; Sets the C flag for unsigned overflow tests
2079 DEFUN __subdi3
2080 SUB A0,B0 $ sbc A1,B1 $ sbc A2,B2 $ sbc A3,B3
2081 sbc A4,B4 $ sbc A5,B5 $ sbc A6,B6 $ sbc A7,B7
2082 ret
2083 ENDF __subdi3
2084 #endif /* L_subdi3 */
2085
2086 #if defined (L_cmpdi2)
2087 ;; (set (cc0)
2088 ;; (compare (reg:DI 18)
2089 ;; (reg:DI 10)))
2090 DEFUN __cmpdi2
2091 CP A0,B0 $ cpc A1,B1 $ cpc A2,B2 $ cpc A3,B3
2092 cpc A4,B4 $ cpc A5,B5 $ cpc A6,B6 $ cpc A7,B7
2093 ret
2094 ENDF __cmpdi2
2095 #endif /* L_cmpdi2 */
2096
2097 #if defined (L_cmpdi2_s8)
2098 ;; (set (cc0)
2099 ;; (compare (reg:DI 18)
2100 ;; (sign_extend:SI (reg:QI 26))))
2101 DEFUN __cmpdi2_s8
2102 clr TT
2103 sbrc r26, 7
2104 com TT
2105 CP A0,r26 $ cpc A1,TT $ cpc A2,TT $ cpc A3,TT
2106 cpc A4,TT $ cpc A5,TT $ cpc A6,TT $ cpc A7,TT
2107 ret
2108 ENDF __cmpdi2_s8
2109 #endif /* L_cmpdi2_s8 */
2110
2111 #if defined (L_negdi2)
2112 ;; (set (reg:DI 18)
2113 ;; (neg:DI (reg:DI 18)))
2114 ;; Sets the V flag for signed overflow tests
2115 DEFUN __negdi2
2116
2117 com A4 $ com A5 $ com A6 $ com A7
2118 $ com A1 $ com A2 $ com A3
2119 NEG A0
2120 $ sbci A1,-1 $ sbci A2,-1 $ sbci A3,-1
2121 sbci A4,-1 $ sbci A5,-1 $ sbci A6,-1 $ sbci A7,-1
2122 ret
2123
2124 ENDF __negdi2
2125 #endif /* L_negdi2 */
2126
2127 #undef TT
2128
2129 #undef C7
2130 #undef C6
2131 #undef C5
2132 #undef C4
2133 #undef C3
2134 #undef C2
2135 #undef C1
2136 #undef C0
2137
2138 #undef B7
2139 #undef B6
2140 #undef B5
2141 #undef B4
2142 #undef B3
2143 #undef B2
2144 #undef B1
2145 #undef B0
2146
2147 #undef A7
2148 #undef A6
2149 #undef A5
2150 #undef A4
2151 #undef A3
2152 #undef A2
2153 #undef A1
2154 #undef A0
2155
2156 #endif /* !defined (__AVR_TINY__) */
2157
2158 \f
2159 .section .text.libgcc.prologue, "ax", @progbits
2160
2161 /**********************************
2162 * This is a prologue subroutine
2163 **********************************/
2164 #if !defined (__AVR_TINY__)
2165 #if defined (L_prologue)
2166
2167 ;; This function does not clobber T-flag; 64-bit division relies on it
2168 DEFUN __prologue_saves__
2169 push r2
2170 push r3
2171 push r4
2172 push r5
2173 push r6
2174 push r7
2175 push r8
2176 push r9
2177 push r10
2178 push r11
2179 push r12
2180 push r13
2181 push r14
2182 push r15
2183 push r16
2184 push r17
2185 push r28
2186 push r29
2187 #if !defined (__AVR_HAVE_SPH__)
2188 in r28,__SP_L__
2189 sub r28,r26
2190 out __SP_L__,r28
2191 clr r29
2192 #elif defined (__AVR_XMEGA__)
2193 in r28,__SP_L__
2194 in r29,__SP_H__
2195 sub r28,r26
2196 sbc r29,r27
2197 out __SP_L__,r28
2198 out __SP_H__,r29
2199 #else
2200 in r28,__SP_L__
2201 in r29,__SP_H__
2202 sub r28,r26
2203 sbc r29,r27
2204 in __tmp_reg__,__SREG__
2205 cli
2206 out __SP_H__,r29
2207 out __SREG__,__tmp_reg__
2208 out __SP_L__,r28
2209 #endif /* #SP = 8/16 */
2210
2211 XIJMP
2212
2213 ENDF __prologue_saves__
2214 #endif /* defined (L_prologue) */
2215
2216 /*
2217 * This is an epilogue subroutine
2218 */
2219 #if defined (L_epilogue)
2220
2221 DEFUN __epilogue_restores__
2222 ldd r2,Y+18
2223 ldd r3,Y+17
2224 ldd r4,Y+16
2225 ldd r5,Y+15
2226 ldd r6,Y+14
2227 ldd r7,Y+13
2228 ldd r8,Y+12
2229 ldd r9,Y+11
2230 ldd r10,Y+10
2231 ldd r11,Y+9
2232 ldd r12,Y+8
2233 ldd r13,Y+7
2234 ldd r14,Y+6
2235 ldd r15,Y+5
2236 ldd r16,Y+4
2237 ldd r17,Y+3
2238 ldd r26,Y+2
2239 #if !defined (__AVR_HAVE_SPH__)
2240 ldd r29,Y+1
2241 add r28,r30
2242 out __SP_L__,r28
2243 mov r28, r26
2244 #elif defined (__AVR_XMEGA__)
2245 ldd r27,Y+1
2246 add r28,r30
2247 adc r29,__zero_reg__
2248 out __SP_L__,r28
2249 out __SP_H__,r29
2250 wmov 28, 26
2251 #else
2252 ldd r27,Y+1
2253 add r28,r30
2254 adc r29,__zero_reg__
2255 in __tmp_reg__,__SREG__
2256 cli
2257 out __SP_H__,r29
2258 out __SREG__,__tmp_reg__
2259 out __SP_L__,r28
2260 mov_l r28, r26
2261 mov_h r29, r27
2262 #endif /* #SP = 8/16 */
2263 ret
2264 ENDF __epilogue_restores__
2265 #endif /* defined (L_epilogue) */
2266 #endif /* !defined (__AVR_TINY__) */
2267
2268 #ifdef L_exit
2269 .section .fini9,"ax",@progbits
2270 DEFUN _exit
2271 .weak exit
2272 exit:
2273 ENDF _exit
2274
2275 /* Code from .fini8 ... .fini1 sections inserted by ld script. */
2276
2277 .section .fini0,"ax",@progbits
2278 cli
2279 __stop_program:
2280 rjmp __stop_program
2281 #endif /* defined (L_exit) */
2282
2283 #ifdef L_cleanup
2284 .weak _cleanup
2285 .func _cleanup
2286 _cleanup:
2287 ret
2288 .endfunc
2289 #endif /* defined (L_cleanup) */
2290
2291 \f
2292 .section .text.libgcc, "ax", @progbits
2293
2294 #ifdef L_tablejump2
2295 DEFUN __tablejump2__
2296 lsl r30
2297 rol r31
2298 #if defined (__AVR_HAVE_EIJMP_EICALL__)
2299 ;; Word address of gs() jumptable entry in R24:Z
2300 rol r24
2301 out __RAMPZ__, r24
2302 #elif defined (__AVR_HAVE_ELPM__)
2303 ;; Word address of jumptable entry in Z
2304 clr __tmp_reg__
2305 rol __tmp_reg__
2306 out __RAMPZ__, __tmp_reg__
2307 #endif
2308
2309 ;; Read word address from jumptable and jump
2310
2311 #if defined (__AVR_HAVE_ELPMX__)
2312 elpm __tmp_reg__, Z+
2313 elpm r31, Z
2314 mov r30, __tmp_reg__
2315 #ifdef __AVR_HAVE_RAMPD__
2316 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2317 out __RAMPZ__, __zero_reg__
2318 #endif /* RAMPD */
2319 XIJMP
2320 #elif defined (__AVR_HAVE_ELPM__)
2321 elpm
2322 push r0
2323 adiw r30, 1
2324 elpm
2325 push r0
2326 ret
2327 #elif defined (__AVR_HAVE_LPMX__)
2328 lpm __tmp_reg__, Z+
2329 lpm r31, Z
2330 mov r30, __tmp_reg__
2331 ijmp
2332 #elif defined (__AVR_TINY__)
2333 wsubi 30, -(__AVR_TINY_PM_BASE_ADDRESS__) ; Add PM offset to Z
2334 ld __tmp_reg__, Z+
2335 ld r31, Z ; Use ld instead of lpm to load Z
2336 mov r30, __tmp_reg__
2337 ijmp
2338 #else
2339 lpm
2340 push r0
2341 adiw r30, 1
2342 lpm
2343 push r0
2344 ret
2345 #endif
2346 ENDF __tablejump2__
2347 #endif /* L_tablejump2 */
2348
2349 #if defined(__AVR_TINY__)
2350 #ifdef L_copy_data
2351 .section .init4,"ax",@progbits
2352 .global __do_copy_data
2353 __do_copy_data:
2354 ldi r18, hi8(__data_end)
2355 ldi r26, lo8(__data_start)
2356 ldi r27, hi8(__data_start)
2357 ldi r30, lo8(__data_load_start + __AVR_TINY_PM_BASE_ADDRESS__)
2358 ldi r31, hi8(__data_load_start + __AVR_TINY_PM_BASE_ADDRESS__)
2359 rjmp .L__do_copy_data_start
2360 .L__do_copy_data_loop:
2361 ld r19, z+
2362 st X+, r19
2363 .L__do_copy_data_start:
2364 cpi r26, lo8(__data_end)
2365 cpc r27, r18
2366 brne .L__do_copy_data_loop
2367 #endif
2368 #else
2369 #ifdef L_copy_data
2370 .section .init4,"ax",@progbits
2371 DEFUN __do_copy_data
2372 #if defined(__AVR_HAVE_ELPMX__)
2373 ldi r17, hi8(__data_end)
2374 ldi r26, lo8(__data_start)
2375 ldi r27, hi8(__data_start)
2376 ldi r30, lo8(__data_load_start)
2377 ldi r31, hi8(__data_load_start)
2378 ldi r16, hh8(__data_load_start)
2379 out __RAMPZ__, r16
2380 rjmp .L__do_copy_data_start
2381 .L__do_copy_data_loop:
2382 elpm r0, Z+
2383 st X+, r0
2384 .L__do_copy_data_start:
2385 cpi r26, lo8(__data_end)
2386 cpc r27, r17
2387 brne .L__do_copy_data_loop
2388 #elif !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
2389 ldi r17, hi8(__data_end)
2390 ldi r26, lo8(__data_start)
2391 ldi r27, hi8(__data_start)
2392 ldi r30, lo8(__data_load_start)
2393 ldi r31, hi8(__data_load_start)
2394 ldi r16, hh8(__data_load_start - 0x10000)
2395 .L__do_copy_data_carry:
2396 inc r16
2397 out __RAMPZ__, r16
2398 rjmp .L__do_copy_data_start
2399 .L__do_copy_data_loop:
2400 elpm
2401 st X+, r0
2402 adiw r30, 1
2403 brcs .L__do_copy_data_carry
2404 .L__do_copy_data_start:
2405 cpi r26, lo8(__data_end)
2406 cpc r27, r17
2407 brne .L__do_copy_data_loop
2408 #elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
2409 ldi r17, hi8(__data_end)
2410 ldi r26, lo8(__data_start)
2411 ldi r27, hi8(__data_start)
2412 ldi r30, lo8(__data_load_start)
2413 ldi r31, hi8(__data_load_start)
2414 rjmp .L__do_copy_data_start
2415 .L__do_copy_data_loop:
2416 #if defined (__AVR_HAVE_LPMX__)
2417 lpm r0, Z+
2418 #else
2419 lpm
2420 adiw r30, 1
2421 #endif
2422 st X+, r0
2423 .L__do_copy_data_start:
2424 cpi r26, lo8(__data_end)
2425 cpc r27, r17
2426 brne .L__do_copy_data_loop
2427 #endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
2428 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2429 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2430 out __RAMPZ__, __zero_reg__
2431 #endif /* ELPM && RAMPD */
2432 ENDF __do_copy_data
2433 #endif /* L_copy_data */
2434 #endif /* !defined (__AVR_TINY__) */
2435
2436 /* __do_clear_bss is only necessary if there is anything in .bss section. */
2437
2438 #ifdef L_clear_bss
2439 .section .init4,"ax",@progbits
2440 DEFUN __do_clear_bss
2441 ldi r18, hi8(__bss_end)
2442 ldi r26, lo8(__bss_start)
2443 ldi r27, hi8(__bss_start)
2444 rjmp .do_clear_bss_start
2445 .do_clear_bss_loop:
2446 st X+, __zero_reg__
2447 .do_clear_bss_start:
2448 cpi r26, lo8(__bss_end)
2449 cpc r27, r18
2450 brne .do_clear_bss_loop
2451 ENDF __do_clear_bss
2452 #endif /* L_clear_bss */
2453
2454 /* __do_global_ctors and __do_global_dtors are only necessary
2455 if there are any constructors/destructors. */
2456
2457 #if defined(__AVR_TINY__)
2458 #define cdtors_tst_reg r18
2459 #else
2460 #define cdtors_tst_reg r17
2461 #endif
2462
2463 #ifdef L_ctors
2464 .section .init6,"ax",@progbits
2465 DEFUN __do_global_ctors
2466 ldi cdtors_tst_reg, pm_hi8(__ctors_start)
2467 ldi r28, pm_lo8(__ctors_end)
2468 ldi r29, pm_hi8(__ctors_end)
2469 #ifdef __AVR_HAVE_EIJMP_EICALL__
2470 ldi r16, pm_hh8(__ctors_end)
2471 #endif /* HAVE_EIJMP */
2472 rjmp .L__do_global_ctors_start
2473 .L__do_global_ctors_loop:
2474 wsubi 28, 1
2475 #ifdef __AVR_HAVE_EIJMP_EICALL__
2476 sbc r16, __zero_reg__
2477 mov r24, r16
2478 #endif /* HAVE_EIJMP */
2479 mov_h r31, r29
2480 mov_l r30, r28
2481 XCALL __tablejump2__
2482 .L__do_global_ctors_start:
2483 cpi r28, pm_lo8(__ctors_start)
2484 cpc r29, cdtors_tst_reg
2485 #ifdef __AVR_HAVE_EIJMP_EICALL__
2486 ldi r24, pm_hh8(__ctors_start)
2487 cpc r16, r24
2488 #endif /* HAVE_EIJMP */
2489 brne .L__do_global_ctors_loop
2490 ENDF __do_global_ctors
2491 #endif /* L_ctors */
2492
2493 #ifdef L_dtors
2494 .section .fini6,"ax",@progbits
2495 DEFUN __do_global_dtors
2496 ldi cdtors_tst_reg, pm_hi8(__dtors_end)
2497 ldi r28, pm_lo8(__dtors_start)
2498 ldi r29, pm_hi8(__dtors_start)
2499 #ifdef __AVR_HAVE_EIJMP_EICALL__
2500 ldi r16, pm_hh8(__dtors_start)
2501 #endif /* HAVE_EIJMP */
2502 rjmp .L__do_global_dtors_start
2503 .L__do_global_dtors_loop:
2504 #ifdef __AVR_HAVE_EIJMP_EICALL__
2505 mov r24, r16
2506 #endif /* HAVE_EIJMP */
2507 mov_h r31, r29
2508 mov_l r30, r28
2509 XCALL __tablejump2__
2510 waddi 28, 1
2511 #ifdef __AVR_HAVE_EIJMP_EICALL__
2512 adc r16, __zero_reg__
2513 #endif /* HAVE_EIJMP */
2514 .L__do_global_dtors_start:
2515 cpi r28, pm_lo8(__dtors_end)
2516 cpc r29, cdtors_tst_reg
2517 #ifdef __AVR_HAVE_EIJMP_EICALL__
2518 ldi r24, pm_hh8(__dtors_end)
2519 cpc r16, r24
2520 #endif /* HAVE_EIJMP */
2521 brne .L__do_global_dtors_loop
2522 ENDF __do_global_dtors
2523 #endif /* L_dtors */
2524
2525 #undef cdtors_tst_reg
2526
2527 .section .text.libgcc, "ax", @progbits
2528
2529 #if !defined (__AVR_TINY__)
2530 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2531 ;; Loading n bytes from Flash; n = 3,4
2532 ;; R22... = Flash[Z]
2533 ;; Clobbers: __tmp_reg__
2534
2535 #if (defined (L_load_3) \
2536 || defined (L_load_4)) \
2537 && !defined (__AVR_HAVE_LPMX__)
2538
2539 ;; Destination
2540 #define D0 22
2541 #define D1 D0+1
2542 #define D2 D0+2
2543 #define D3 D0+3
2544
2545 .macro .load dest, n
2546 lpm
2547 mov \dest, r0
2548 .if \dest != D0+\n-1
2549 adiw r30, 1
2550 .else
2551 sbiw r30, \n-1
2552 .endif
2553 .endm
2554
2555 #if defined (L_load_3)
2556 DEFUN __load_3
2557 push D3
2558 XCALL __load_4
2559 pop D3
2560 ret
2561 ENDF __load_3
2562 #endif /* L_load_3 */
2563
2564 #if defined (L_load_4)
2565 DEFUN __load_4
2566 .load D0, 4
2567 .load D1, 4
2568 .load D2, 4
2569 .load D3, 4
2570 ret
2571 ENDF __load_4
2572 #endif /* L_load_4 */
2573
2574 #endif /* L_load_3 || L_load_3 */
2575 #endif /* !defined (__AVR_TINY__) */
2576
2577 #if !defined (__AVR_TINY__)
2578 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2579 ;; Loading n bytes from Flash or RAM; n = 1,2,3,4
2580 ;; R22... = Flash[R21:Z] or RAM[Z] depending on R21.7
2581 ;; Clobbers: __tmp_reg__, R21, R30, R31
2582
2583 #if (defined (L_xload_1) \
2584 || defined (L_xload_2) \
2585 || defined (L_xload_3) \
2586 || defined (L_xload_4))
2587
2588 ;; Destination
2589 #define D0 22
2590 #define D1 D0+1
2591 #define D2 D0+2
2592 #define D3 D0+3
2593
2594 ;; Register containing bits 16+ of the address
2595
2596 #define HHI8 21
2597
2598 .macro .xload dest, n
2599 #if defined (__AVR_HAVE_ELPMX__)
2600 elpm \dest, Z+
2601 #elif defined (__AVR_HAVE_ELPM__)
2602 elpm
2603 mov \dest, r0
2604 .if \dest != D0+\n-1
2605 adiw r30, 1
2606 adc HHI8, __zero_reg__
2607 out __RAMPZ__, HHI8
2608 .endif
2609 #elif defined (__AVR_HAVE_LPMX__)
2610 lpm \dest, Z+
2611 #else
2612 lpm
2613 mov \dest, r0
2614 .if \dest != D0+\n-1
2615 adiw r30, 1
2616 .endif
2617 #endif
2618 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2619 .if \dest == D0+\n-1
2620 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2621 out __RAMPZ__, __zero_reg__
2622 .endif
2623 #endif
2624 .endm ; .xload
2625
2626 #if defined (L_xload_1)
2627 DEFUN __xload_1
2628 #if defined (__AVR_HAVE_LPMX__) && !defined (__AVR_HAVE_ELPM__)
2629 sbrc HHI8, 7
2630 ld D0, Z
2631 sbrs HHI8, 7
2632 lpm D0, Z
2633 ret
2634 #else
2635 sbrc HHI8, 7
2636 rjmp 1f
2637 #if defined (__AVR_HAVE_ELPM__)
2638 out __RAMPZ__, HHI8
2639 #endif /* __AVR_HAVE_ELPM__ */
2640 .xload D0, 1
2641 ret
2642 1: ld D0, Z
2643 ret
2644 #endif /* LPMx && ! ELPM */
2645 ENDF __xload_1
2646 #endif /* L_xload_1 */
2647
2648 #if defined (L_xload_2)
2649 DEFUN __xload_2
2650 sbrc HHI8, 7
2651 rjmp 1f
2652 #if defined (__AVR_HAVE_ELPM__)
2653 out __RAMPZ__, HHI8
2654 #endif /* __AVR_HAVE_ELPM__ */
2655 .xload D0, 2
2656 .xload D1, 2
2657 ret
2658 1: ld D0, Z+
2659 ld D1, Z+
2660 ret
2661 ENDF __xload_2
2662 #endif /* L_xload_2 */
2663
2664 #if defined (L_xload_3)
2665 DEFUN __xload_3
2666 sbrc HHI8, 7
2667 rjmp 1f
2668 #if defined (__AVR_HAVE_ELPM__)
2669 out __RAMPZ__, HHI8
2670 #endif /* __AVR_HAVE_ELPM__ */
2671 .xload D0, 3
2672 .xload D1, 3
2673 .xload D2, 3
2674 ret
2675 1: ld D0, Z+
2676 ld D1, Z+
2677 ld D2, Z+
2678 ret
2679 ENDF __xload_3
2680 #endif /* L_xload_3 */
2681
2682 #if defined (L_xload_4)
2683 DEFUN __xload_4
2684 sbrc HHI8, 7
2685 rjmp 1f
2686 #if defined (__AVR_HAVE_ELPM__)
2687 out __RAMPZ__, HHI8
2688 #endif /* __AVR_HAVE_ELPM__ */
2689 .xload D0, 4
2690 .xload D1, 4
2691 .xload D2, 4
2692 .xload D3, 4
2693 ret
2694 1: ld D0, Z+
2695 ld D1, Z+
2696 ld D2, Z+
2697 ld D3, Z+
2698 ret
2699 ENDF __xload_4
2700 #endif /* L_xload_4 */
2701
2702 #endif /* L_xload_{1|2|3|4} */
2703 #endif /* if !defined (__AVR_TINY__) */
2704
2705 #if !defined (__AVR_TINY__)
2706 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2707 ;; memcopy from Address Space __pgmx to RAM
2708 ;; R23:Z = Source Address
2709 ;; X = Destination Address
2710 ;; Clobbers: __tmp_reg__, R23, R24, R25, X, Z
2711
2712 #if defined (L_movmemx)
2713
2714 #define HHI8 23
2715 #define LOOP 24
2716
2717 DEFUN __movmemx_qi
2718 ;; #Bytes to copy fity in 8 Bits (1..255)
2719 ;; Zero-extend Loop Counter
2720 clr LOOP+1
2721 ;; FALLTHRU
2722 ENDF __movmemx_qi
2723
2724 DEFUN __movmemx_hi
2725
2726 ;; Read from where?
2727 sbrc HHI8, 7
2728 rjmp 1f
2729
2730 ;; Read from Flash
2731
2732 #if defined (__AVR_HAVE_ELPM__)
2733 out __RAMPZ__, HHI8
2734 #endif
2735
2736 0: ;; Load 1 Byte from Flash...
2737
2738 #if defined (__AVR_HAVE_ELPMX__)
2739 elpm r0, Z+
2740 #elif defined (__AVR_HAVE_ELPM__)
2741 elpm
2742 adiw r30, 1
2743 adc HHI8, __zero_reg__
2744 out __RAMPZ__, HHI8
2745 #elif defined (__AVR_HAVE_LPMX__)
2746 lpm r0, Z+
2747 #else
2748 lpm
2749 adiw r30, 1
2750 #endif
2751
2752 ;; ...and store that Byte to RAM Destination
2753 st X+, r0
2754 sbiw LOOP, 1
2755 brne 0b
2756 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2757 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2758 out __RAMPZ__, __zero_reg__
2759 #endif /* ELPM && RAMPD */
2760 ret
2761
2762 ;; Read from RAM
2763
2764 1: ;; Read 1 Byte from RAM...
2765 ld r0, Z+
2766 ;; and store that Byte to RAM Destination
2767 st X+, r0
2768 sbiw LOOP, 1
2769 brne 1b
2770 ret
2771 ENDF __movmemx_hi
2772
2773 #undef HHI8
2774 #undef LOOP
2775
2776 #endif /* L_movmemx */
2777 #endif /* !defined (__AVR_TINY__) */
2778
2779 \f
2780 .section .text.libgcc.builtins, "ax", @progbits
2781
2782 /**********************************
2783 * Find first set Bit (ffs)
2784 **********************************/
2785
2786 #if defined (L_ffssi2)
2787 ;; find first set bit
2788 ;; r25:r24 = ffs32 (r25:r22)
2789 ;; clobbers: r22, r26
2790 DEFUN __ffssi2
2791 clr r26
2792 tst r22
2793 brne 1f
2794 subi r26, -8
2795 or r22, r23
2796 brne 1f
2797 subi r26, -8
2798 or r22, r24
2799 brne 1f
2800 subi r26, -8
2801 or r22, r25
2802 brne 1f
2803 ret
2804 1: mov r24, r22
2805 XJMP __loop_ffsqi2
2806 ENDF __ffssi2
2807 #endif /* defined (L_ffssi2) */
2808
2809 #if defined (L_ffshi2)
2810 ;; find first set bit
2811 ;; r25:r24 = ffs16 (r25:r24)
2812 ;; clobbers: r26
2813 DEFUN __ffshi2
2814 clr r26
2815 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2816 ;; Some cores have problem skipping 2-word instruction
2817 tst r24
2818 breq 2f
2819 #else
2820 cpse r24, __zero_reg__
2821 #endif /* __AVR_HAVE_JMP_CALL__ */
2822 1: XJMP __loop_ffsqi2
2823 2: ldi r26, 8
2824 or r24, r25
2825 brne 1b
2826 ret
2827 ENDF __ffshi2
2828 #endif /* defined (L_ffshi2) */
2829
2830 #if defined (L_loop_ffsqi2)
2831 ;; Helper for ffshi2, ffssi2
2832 ;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
2833 ;; r24 must be != 0
2834 ;; clobbers: r26
2835 DEFUN __loop_ffsqi2
2836 inc r26
2837 lsr r24
2838 brcc __loop_ffsqi2
2839 mov r24, r26
2840 clr r25
2841 ret
2842 ENDF __loop_ffsqi2
2843 #endif /* defined (L_loop_ffsqi2) */
2844
2845 \f
2846 /**********************************
2847 * Count trailing Zeros (ctz)
2848 **********************************/
2849
2850 #if defined (L_ctzsi2)
2851 ;; count trailing zeros
2852 ;; r25:r24 = ctz32 (r25:r22)
2853 ;; clobbers: r26, r22
2854 ;; ctz(0) = 255
2855 ;; Note that ctz(0) in undefined for GCC
2856 DEFUN __ctzsi2
2857 XCALL __ffssi2
2858 dec r24
2859 ret
2860 ENDF __ctzsi2
2861 #endif /* defined (L_ctzsi2) */
2862
2863 #if defined (L_ctzhi2)
2864 ;; count trailing zeros
2865 ;; r25:r24 = ctz16 (r25:r24)
2866 ;; clobbers: r26
2867 ;; ctz(0) = 255
2868 ;; Note that ctz(0) in undefined for GCC
2869 DEFUN __ctzhi2
2870 XCALL __ffshi2
2871 dec r24
2872 ret
2873 ENDF __ctzhi2
2874 #endif /* defined (L_ctzhi2) */
2875
2876 \f
2877 /**********************************
2878 * Count leading Zeros (clz)
2879 **********************************/
2880
2881 #if defined (L_clzdi2)
2882 ;; count leading zeros
2883 ;; r25:r24 = clz64 (r25:r18)
2884 ;; clobbers: r22, r23, r26
2885 DEFUN __clzdi2
2886 XCALL __clzsi2
2887 sbrs r24, 5
2888 ret
2889 mov_l r22, r18
2890 mov_h r23, r19
2891 mov_l r24, r20
2892 mov_h r25, r21
2893 XCALL __clzsi2
2894 subi r24, -32
2895 ret
2896 ENDF __clzdi2
2897 #endif /* defined (L_clzdi2) */
2898
2899 #if defined (L_clzsi2)
2900 ;; count leading zeros
2901 ;; r25:r24 = clz32 (r25:r22)
2902 ;; clobbers: r26
2903 DEFUN __clzsi2
2904 XCALL __clzhi2
2905 sbrs r24, 4
2906 ret
2907 mov_l r24, r22
2908 mov_h r25, r23
2909 XCALL __clzhi2
2910 subi r24, -16
2911 ret
2912 ENDF __clzsi2
2913 #endif /* defined (L_clzsi2) */
2914
2915 #if defined (L_clzhi2)
2916 ;; count leading zeros
2917 ;; r25:r24 = clz16 (r25:r24)
2918 ;; clobbers: r26
2919 DEFUN __clzhi2
2920 clr r26
2921 tst r25
2922 brne 1f
2923 subi r26, -8
2924 or r25, r24
2925 brne 1f
2926 ldi r24, 16
2927 ret
2928 1: cpi r25, 16
2929 brsh 3f
2930 subi r26, -3
2931 swap r25
2932 2: inc r26
2933 3: lsl r25
2934 brcc 2b
2935 mov r24, r26
2936 clr r25
2937 ret
2938 ENDF __clzhi2
2939 #endif /* defined (L_clzhi2) */
2940
2941 \f
2942 /**********************************
2943 * Parity
2944 **********************************/
2945
2946 #if defined (L_paritydi2)
2947 ;; r25:r24 = parity64 (r25:r18)
2948 ;; clobbers: __tmp_reg__
2949 DEFUN __paritydi2
2950 eor r24, r18
2951 eor r24, r19
2952 eor r24, r20
2953 eor r24, r21
2954 XJMP __paritysi2
2955 ENDF __paritydi2
2956 #endif /* defined (L_paritydi2) */
2957
2958 #if defined (L_paritysi2)
2959 ;; r25:r24 = parity32 (r25:r22)
2960 ;; clobbers: __tmp_reg__
2961 DEFUN __paritysi2
2962 eor r24, r22
2963 eor r24, r23
2964 XJMP __parityhi2
2965 ENDF __paritysi2
2966 #endif /* defined (L_paritysi2) */
2967
2968 #if defined (L_parityhi2)
2969 ;; r25:r24 = parity16 (r25:r24)
2970 ;; clobbers: __tmp_reg__
2971 DEFUN __parityhi2
2972 eor r24, r25
2973 ;; FALLTHRU
2974 ENDF __parityhi2
2975
2976 ;; r25:r24 = parity8 (r24)
2977 ;; clobbers: __tmp_reg__
2978 DEFUN __parityqi2
2979 ;; parity is in r24[0..7]
2980 mov __tmp_reg__, r24
2981 swap __tmp_reg__
2982 eor r24, __tmp_reg__
2983 ;; parity is in r24[0..3]
2984 subi r24, -4
2985 andi r24, -5
2986 subi r24, -6
2987 ;; parity is in r24[0,3]
2988 sbrc r24, 3
2989 inc r24
2990 ;; parity is in r24[0]
2991 andi r24, 1
2992 clr r25
2993 ret
2994 ENDF __parityqi2
2995 #endif /* defined (L_parityhi2) */
2996
2997 \f
2998 /**********************************
2999 * Population Count
3000 **********************************/
3001
3002 #if defined (L_popcounthi2)
3003 ;; population count
3004 ;; r25:r24 = popcount16 (r25:r24)
3005 ;; clobbers: __tmp_reg__
3006 DEFUN __popcounthi2
3007 XCALL __popcountqi2
3008 push r24
3009 mov r24, r25
3010 XCALL __popcountqi2
3011 clr r25
3012 ;; FALLTHRU
3013 ENDF __popcounthi2
3014
3015 DEFUN __popcounthi2_tail
3016 pop __tmp_reg__
3017 add r24, __tmp_reg__
3018 ret
3019 ENDF __popcounthi2_tail
3020 #endif /* defined (L_popcounthi2) */
3021
3022 #if defined (L_popcountsi2)
3023 ;; population count
3024 ;; r25:r24 = popcount32 (r25:r22)
3025 ;; clobbers: __tmp_reg__
3026 DEFUN __popcountsi2
3027 XCALL __popcounthi2
3028 push r24
3029 mov_l r24, r22
3030 mov_h r25, r23
3031 XCALL __popcounthi2
3032 XJMP __popcounthi2_tail
3033 ENDF __popcountsi2
3034 #endif /* defined (L_popcountsi2) */
3035
3036 #if defined (L_popcountdi2)
3037 ;; population count
3038 ;; r25:r24 = popcount64 (r25:r18)
3039 ;; clobbers: r22, r23, __tmp_reg__
3040 DEFUN __popcountdi2
3041 XCALL __popcountsi2
3042 push r24
3043 mov_l r22, r18
3044 mov_h r23, r19
3045 mov_l r24, r20
3046 mov_h r25, r21
3047 XCALL __popcountsi2
3048 XJMP __popcounthi2_tail
3049 ENDF __popcountdi2
3050 #endif /* defined (L_popcountdi2) */
3051
3052 #if defined (L_popcountqi2)
3053 ;; population count
3054 ;; r24 = popcount8 (r24)
3055 ;; clobbers: __tmp_reg__
3056 DEFUN __popcountqi2
3057 mov __tmp_reg__, r24
3058 andi r24, 1
3059 lsr __tmp_reg__
3060 lsr __tmp_reg__
3061 adc r24, __zero_reg__
3062 lsr __tmp_reg__
3063 adc r24, __zero_reg__
3064 lsr __tmp_reg__
3065 adc r24, __zero_reg__
3066 lsr __tmp_reg__
3067 adc r24, __zero_reg__
3068 lsr __tmp_reg__
3069 adc r24, __zero_reg__
3070 lsr __tmp_reg__
3071 adc r24, __tmp_reg__
3072 ret
3073 ENDF __popcountqi2
3074 #endif /* defined (L_popcountqi2) */
3075
3076 \f
3077 /**********************************
3078 * Swap bytes
3079 **********************************/
3080
3081 ;; swap two registers with different register number
3082 .macro bswap a, b
3083 eor \a, \b
3084 eor \b, \a
3085 eor \a, \b
3086 .endm
3087
3088 #if defined (L_bswapsi2)
3089 ;; swap bytes
3090 ;; r25:r22 = bswap32 (r25:r22)
3091 DEFUN __bswapsi2
3092 bswap r22, r25
3093 bswap r23, r24
3094 ret
3095 ENDF __bswapsi2
3096 #endif /* defined (L_bswapsi2) */
3097
3098 #if defined (L_bswapdi2)
3099 ;; swap bytes
3100 ;; r25:r18 = bswap64 (r25:r18)
3101 DEFUN __bswapdi2
3102 bswap r18, r25
3103 bswap r19, r24
3104 bswap r20, r23
3105 bswap r21, r22
3106 ret
3107 ENDF __bswapdi2
3108 #endif /* defined (L_bswapdi2) */
3109
3110 \f
3111 /**********************************
3112 * 64-bit shifts
3113 **********************************/
3114
3115 #if defined (L_ashrdi3)
3116
3117 #define SS __zero_reg__
3118
3119 ;; Arithmetic shift right
3120 ;; r25:r18 = ashr64 (r25:r18, r17:r16)
3121 DEFUN __ashrdi3
3122 sbrc r25, 7
3123 com SS
3124 ;; FALLTHRU
3125 ENDF __ashrdi3
3126
3127 ;; Logic shift right
3128 ;; r25:r18 = lshr64 (r25:r18, r17:r16)
3129 DEFUN __lshrdi3
3130 ;; Signs are in SS (zero_reg)
3131 mov __tmp_reg__, r16
3132 0: cpi r16, 8
3133 brlo 2f
3134 subi r16, 8
3135 mov r18, r19
3136 mov r19, r20
3137 mov r20, r21
3138 mov r21, r22
3139 mov r22, r23
3140 mov r23, r24
3141 mov r24, r25
3142 mov r25, SS
3143 rjmp 0b
3144 1: asr SS
3145 ror r25
3146 ror r24
3147 ror r23
3148 ror r22
3149 ror r21
3150 ror r20
3151 ror r19
3152 ror r18
3153 2: dec r16
3154 brpl 1b
3155 clr __zero_reg__
3156 mov r16, __tmp_reg__
3157 ret
3158 ENDF __lshrdi3
3159
3160 #undef SS
3161
3162 #endif /* defined (L_ashrdi3) */
3163
3164 #if defined (L_ashldi3)
3165 ;; Shift left
3166 ;; r25:r18 = ashl64 (r25:r18, r17:r16)
3167 ;; This function does not clobber T.
3168 DEFUN __ashldi3
3169 mov __tmp_reg__, r16
3170 0: cpi r16, 8
3171 brlo 2f
3172 mov r25, r24
3173 mov r24, r23
3174 mov r23, r22
3175 mov r22, r21
3176 mov r21, r20
3177 mov r20, r19
3178 mov r19, r18
3179 clr r18
3180 subi r16, 8
3181 rjmp 0b
3182 1: lsl r18
3183 rol r19
3184 rol r20
3185 rol r21
3186 rol r22
3187 rol r23
3188 rol r24
3189 rol r25
3190 2: dec r16
3191 brpl 1b
3192 mov r16, __tmp_reg__
3193 ret
3194 ENDF __ashldi3
3195 #endif /* defined (L_ashldi3) */
3196
3197 #if defined (L_rotldi3)
3198 ;; Rotate left
3199 ;; r25:r18 = rotl64 (r25:r18, r17:r16)
3200 DEFUN __rotldi3
3201 push r16
3202 0: cpi r16, 8
3203 brlo 2f
3204 subi r16, 8
3205 mov __tmp_reg__, r25
3206 mov r25, r24
3207 mov r24, r23
3208 mov r23, r22
3209 mov r22, r21
3210 mov r21, r20
3211 mov r20, r19
3212 mov r19, r18
3213 mov r18, __tmp_reg__
3214 rjmp 0b
3215 1: lsl r18
3216 rol r19
3217 rol r20
3218 rol r21
3219 rol r22
3220 rol r23
3221 rol r24
3222 rol r25
3223 adc r18, __zero_reg__
3224 2: dec r16
3225 brpl 1b
3226 pop r16
3227 ret
3228 ENDF __rotldi3
3229 #endif /* defined (L_rotldi3) */
3230
3231 \f
3232 .section .text.libgcc.fmul, "ax", @progbits
3233
3234 /***********************************************************/
3235 ;;; Softmul versions of FMUL, FMULS and FMULSU to implement
3236 ;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
3237 /***********************************************************/
3238
3239 #define A1 24
3240 #define B1 25
3241 #define C0 22
3242 #define C1 23
3243 #define A0 __tmp_reg__
3244
3245 #ifdef L_fmuls
3246 ;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
3247 ;;; Clobbers: r24, r25, __tmp_reg__
3248 DEFUN __fmuls
3249 ;; A0.7 = negate result?
3250 mov A0, A1
3251 eor A0, B1
3252 ;; B1 = |B1|
3253 sbrc B1, 7
3254 neg B1
3255 XJMP __fmulsu_exit
3256 ENDF __fmuls
3257 #endif /* L_fmuls */
3258
3259 #ifdef L_fmulsu
3260 ;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
3261 ;;; Clobbers: r24, r25, __tmp_reg__
3262 DEFUN __fmulsu
3263 ;; A0.7 = negate result?
3264 mov A0, A1
3265 ;; FALLTHRU
3266 ENDF __fmulsu
3267
3268 ;; Helper for __fmuls and __fmulsu
3269 DEFUN __fmulsu_exit
3270 ;; A1 = |A1|
3271 sbrc A1, 7
3272 neg A1
3273 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
3274 ;; Some cores have problem skipping 2-word instruction
3275 tst A0
3276 brmi 1f
3277 #else
3278 sbrs A0, 7
3279 #endif /* __AVR_HAVE_JMP_CALL__ */
3280 XJMP __fmul
3281 1: XCALL __fmul
3282 ;; C = -C iff A0.7 = 1
3283 NEG2 C0
3284 ret
3285 ENDF __fmulsu_exit
3286 #endif /* L_fmulsu */
3287
3288
3289 #ifdef L_fmul
3290 ;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
3291 ;;; Clobbers: r24, r25, __tmp_reg__
3292 DEFUN __fmul
3293 ; clear result
3294 clr C0
3295 clr C1
3296 clr A0
3297 1: tst B1
3298 ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
3299 2: brpl 3f
3300 ;; C += A
3301 add C0, A0
3302 adc C1, A1
3303 3: ;; A >>= 1
3304 lsr A1
3305 ror A0
3306 ;; B <<= 1
3307 lsl B1
3308 brne 2b
3309 ret
3310 ENDF __fmul
3311 #endif /* L_fmul */
3312
3313 #undef A0
3314 #undef A1
3315 #undef B1
3316 #undef C0
3317 #undef C1
3318
3319 #include "lib1funcs-fixed.S"