libgcc/config/avr/lib1funcs.S

   1 /*  -*- Mode: Asm -*-  */
   2 /* Copyright (C) 1998, 1999, 2000, 2007, 2008, 2009
   3    Free Software Foundation, Inc.
   4    Contributed by Denis Chertykov <chertykov@gmail.com>
   5
   6 This file is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 This file is distributed in the hope that it will be useful, but
  12 WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 General Public License for more details.
  15
  16 Under Section 7 of GPL version 3, you are granted additional
  17 permissions described in the GCC Runtime Library Exception, version
  18 3.1, as published by the Free Software Foundation.
  19
  20 You should have received a copy of the GNU General Public License and
  21 a copy of the GCC Runtime Library Exception along with this program;
  22 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  23 <http://www.gnu.org/licenses/>.  */
  24
  25 #define __zero_reg__ r1
  26 #define __tmp_reg__ r0
  27 #define __SREG__ 0x3f
  28 #define __SP_H__ 0x3e
  29 #define __SP_L__ 0x3d
  30 #define __RAMPZ__ 0x3B
  31 #define __EIND__  0x3C
  32
  33 /* Most of the functions here are called directly from avr.md
  34    patterns, instead of using the standard libcall mechanisms.
  35    This can make better code because GCC knows exactly which
  36    of the call-used registers (not all of them) are clobbered.  */
  37
  38 /* FIXME:  At present, there is no SORT directive in the linker
  39            script so that we must not assume that different modules
  40            in the same input section like .libgcc.text.mul will be
  41            located close together.  Therefore, we cannot use
  42            RCALL/RJMP to call a function like __udivmodhi4 from
  43            __divmodhi4 and have to use lengthy XCALL/XJMP even
  44            though they are in the same input section and all same
  45            input sections together are small enough to reach every
  46            location with a RCALL/RJMP instruction.  */
  47
  48         .macro  mov_l  r_dest, r_src
  49 #if defined (__AVR_HAVE_MOVW__)
  50         movw    \r_dest, \r_src
  51 #else
  52         mov     \r_dest, \r_src
  53 #endif
  54         .endm
  55
  56         .macro  mov_h  r_dest, r_src
  57 #if defined (__AVR_HAVE_MOVW__)
  58         ; empty
  59 #else
  60         mov     \r_dest, \r_src
  61 #endif
  62         .endm
  63
  64 .macro  wmov  r_dest, r_src
  65 #if defined (__AVR_HAVE_MOVW__)
  66     movw \r_dest,   \r_src
  67 #else
  68     mov \r_dest,    \r_src
  69     mov \r_dest+1,  \r_src+1
  70 #endif
  71 .endm
  72
  73 #if defined (__AVR_HAVE_JMP_CALL__)
  74 #define XCALL call
  75 #define XJMP  jmp
  76 #else
  77 #define XCALL rcall
  78 #define XJMP  rjmp
  79 #endif
  80
  81 .macro DEFUN name
  82 .global \name
  83 .func \name
  84 \name:
  85 .endm
  86
  87 .macro ENDF name
  88 .size \name, .-\name
  89 .endfunc
  90 .endm
  91
  92 \f
  93 .section .text.libgcc.mul, "ax", @progbits
  94
  95 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  96 /* Note: mulqi3, mulhi3 are open-coded on the enhanced core.  */
  97 #if !defined (__AVR_HAVE_MUL__)
  98 /*******************************************************
  99     Multiplication  8 x 8  without MUL
 100 *******************************************************/
 101 #if defined (L_mulqi3)
 102
 103 #define r_arg2  r22             /* multiplicand */
 104 #define r_arg1  r24             /* multiplier */
 105 #define r_res   __tmp_reg__     /* result */
 106
 107 DEFUN __mulqi3
 108         clr     r_res           ; clear result
 109 __mulqi3_loop:
 110         sbrc    r_arg1,0
 111         add     r_res,r_arg2
 112         add     r_arg2,r_arg2   ; shift multiplicand
 113         breq    __mulqi3_exit   ; while multiplicand != 0
 114         lsr     r_arg1          ;
 115         brne    __mulqi3_loop   ; exit if multiplier = 0
 116 __mulqi3_exit:
 117         mov     r_arg1,r_res    ; result to return register
 118         ret
 119 ENDF __mulqi3
 120
 121 #undef r_arg2
 122 #undef r_arg1
 123 #undef r_res
 124
 125 #endif  /* defined (L_mulqi3) */
 126
 127 #if defined (L_mulqihi3)
 128 DEFUN __mulqihi3
 129         clr     r25
 130         sbrc    r24, 7
 131         dec     r25
 132         clr     r23
 133         sbrc    r22, 7
 134         dec     r22
 135         XJMP    __mulhi3
 136 ENDF __mulqihi3:
 137 #endif /* defined (L_mulqihi3) */
 138
 139 #if defined (L_umulqihi3)
 140 DEFUN __umulqihi3
 141         clr     r25
 142         clr     r23
 143         XJMP    __mulhi3
 144 ENDF __umulqihi3
 145 #endif /* defined (L_umulqihi3) */
 146
 147 /*******************************************************
 148     Multiplication  16 x 16  without MUL
 149 *******************************************************/
 150 #if defined (L_mulhi3)
 151 #define r_arg1L r24             /* multiplier Low */
 152 #define r_arg1H r25             /* multiplier High */
 153 #define r_arg2L r22             /* multiplicand Low */
 154 #define r_arg2H r23             /* multiplicand High */
 155 #define r_resL  __tmp_reg__     /* result Low */
 156 #define r_resH  r21             /* result High */
 157
 158 DEFUN __mulhi3
 159         clr     r_resH          ; clear result
 160         clr     r_resL          ; clear result
 161 __mulhi3_loop:
 162         sbrs    r_arg1L,0
 163         rjmp    __mulhi3_skip1
 164         add     r_resL,r_arg2L  ; result + multiplicand
 165         adc     r_resH,r_arg2H
 166 __mulhi3_skip1:
 167         add     r_arg2L,r_arg2L ; shift multiplicand
 168         adc     r_arg2H,r_arg2H
 169
 170         cp      r_arg2L,__zero_reg__
 171         cpc     r_arg2H,__zero_reg__
 172         breq    __mulhi3_exit   ; while multiplicand != 0
 173
 174         lsr     r_arg1H         ; gets LSB of multiplier
 175         ror     r_arg1L
 176         sbiw    r_arg1L,0
 177         brne    __mulhi3_loop   ; exit if multiplier = 0
 178 __mulhi3_exit:
 179         mov     r_arg1H,r_resH  ; result to return register
 180         mov     r_arg1L,r_resL
 181         ret
 182 ENDF __mulhi3
 183
 184 #undef r_arg1L
 185 #undef r_arg1H
 186 #undef r_arg2L
 187 #undef r_arg2H
 188 #undef r_resL
 189 #undef r_resH
 190
 191 #endif /* defined (L_mulhi3) */
 192
 193 /*******************************************************
 194     Widening Multiplication  32 = 16 x 16  without MUL
 195 *******************************************************/
 196
 197 #if defined (L_mulhisi3)
 198 DEFUN __mulhisi3
 199 ;;; FIXME: This is dead code (noone calls it)
 200     mov_l   r18, r24
 201     mov_h   r19, r25
 202     clr     r24
 203     sbrc    r23, 7
 204     dec     r24
 205     mov     r25, r24
 206     clr     r20
 207     sbrc    r19, 7
 208     dec     r20
 209     mov     r21, r20
 210     XJMP    __mulsi3
 211 ENDF __mulhisi3
 212 #endif /* defined (L_mulhisi3) */
 213
 214 #if defined (L_umulhisi3)
 215 DEFUN __umulhisi3
 216 ;;; FIXME: This is dead code (noone calls it)
 217     mov_l   r18, r24
 218     mov_h   r19, r25
 219     clr     r24
 220     clr     r25
 221     mov_l   r20, r24
 222     mov_h   r21, r25
 223     XJMP    __mulsi3
 224 ENDF __umulhisi3
 225 #endif /* defined (L_umulhisi3) */
 226
 227 #if defined (L_mulsi3)
 228 /*******************************************************
 229     Multiplication  32 x 32  without MUL
 230 *******************************************************/
 231 #define r_arg1L  r22            /* multiplier Low */
 232 #define r_arg1H  r23
 233 #define r_arg1HL r24
 234 #define r_arg1HH r25            /* multiplier High */
 235
 236 #define r_arg2L  r18            /* multiplicand Low */
 237 #define r_arg2H  r19
 238 #define r_arg2HL r20
 239 #define r_arg2HH r21            /* multiplicand High */
 240
 241 #define r_resL   r26            /* result Low */
 242 #define r_resH   r27
 243 #define r_resHL  r30
 244 #define r_resHH  r31            /* result High */
 245
 246 DEFUN __mulsi3
 247         clr     r_resHH         ; clear result
 248         clr     r_resHL         ; clear result
 249         clr     r_resH          ; clear result
 250         clr     r_resL          ; clear result
 251 __mulsi3_loop:
 252         sbrs    r_arg1L,0
 253         rjmp    __mulsi3_skip1
 254         add     r_resL,r_arg2L          ; result + multiplicand
 255         adc     r_resH,r_arg2H
 256         adc     r_resHL,r_arg2HL
 257         adc     r_resHH,r_arg2HH
 258 __mulsi3_skip1:
 259         add     r_arg2L,r_arg2L         ; shift multiplicand
 260         adc     r_arg2H,r_arg2H
 261         adc     r_arg2HL,r_arg2HL
 262         adc     r_arg2HH,r_arg2HH
 263
 264         lsr     r_arg1HH        ; gets LSB of multiplier
 265         ror     r_arg1HL
 266         ror     r_arg1H
 267         ror     r_arg1L
 268         brne    __mulsi3_loop
 269         sbiw    r_arg1HL,0
 270         cpc     r_arg1H,r_arg1L
 271         brne    __mulsi3_loop           ; exit if multiplier = 0
 272 __mulsi3_exit:
 273         mov_h   r_arg1HH,r_resHH        ; result to return register
 274         mov_l   r_arg1HL,r_resHL
 275         mov_h   r_arg1H,r_resH
 276         mov_l   r_arg1L,r_resL
 277         ret
 278 ENDF __mulsi3
 279
 280 #undef r_arg1L
 281 #undef r_arg1H
 282 #undef r_arg1HL
 283 #undef r_arg1HH
 284
 285 #undef r_arg2L
 286 #undef r_arg2H
 287 #undef r_arg2HL
 288 #undef r_arg2HH
 289
 290 #undef r_resL
 291 #undef r_resH
 292 #undef r_resHL
 293 #undef r_resHH
 294
 295 #endif /* defined (L_mulsi3) */
 296
 297 #endif /* !defined (__AVR_HAVE_MUL__) */
 298 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 299 \f
 300 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 301 #if defined (__AVR_HAVE_MUL__)
 302 #define A0 26
 303 #define B0 18
 304 #define C0 22
 305
 306 #define A1 A0+1
 307
 308 #define B1 B0+1
 309 #define B2 B0+2
 310 #define B3 B0+3
 311
 312 #define C1 C0+1
 313 #define C2 C0+2
 314 #define C3 C0+3
 315
 316 /*******************************************************
 317     Widening Multiplication  32 = 16 x 16
 318 *******************************************************/
 319
 320 #if defined (L_mulhisi3)
 321 ;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
 322 ;;; C3:C0   = (signed long) A1:A0   * (signed long) B1:B0
 323 ;;; Clobbers: __tmp_reg__
 324 DEFUN __mulhisi3
 325     XCALL   __umulhisi3
 326     ;; Sign-extend B
 327     tst     B1
 328     brpl    1f
 329     sub     C2, A0
 330     sbc     C3, A1
 331 1:  ;; Sign-extend A
 332     XJMP __usmulhisi3_tail
 333 ENDF __mulhisi3
 334 #endif /* L_mulhisi3 */
 335
 336 #if defined (L_usmulhisi3)
 337 ;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
 338 ;;; C3:C0   = (signed long) A1:A0   * (unsigned long) B1:B0
 339 ;;; Clobbers: __tmp_reg__
 340 DEFUN __usmulhisi3
 341     XCALL   __umulhisi3
 342     ;; FALLTHRU
 343 ENDF __usmulhisi3
 344
 345 DEFUN __usmulhisi3_tail
 346     ;; Sign-extend A
 347     sbrs    A1, 7
 348     ret
 349     sub     C2, B0
 350     sbc     C3, B1
 351     ret
 352 ENDF __usmulhisi3_tail
 353 #endif /* L_usmulhisi3 */
 354
 355 #if defined (L_umulhisi3)
 356 ;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
 357 ;;; C3:C0   = (unsigned long) A1:A0   * (unsigned long) B1:B0
 358 ;;; Clobbers: __tmp_reg__
 359 DEFUN __umulhisi3
 360     mul     A0, B0
 361     movw    C0, r0
 362     mul     A1, B1
 363     movw    C2, r0
 364     mul     A0, B1
 365     rcall   1f
 366     mul     A1, B0
 367 1:  add     C1, r0
 368     adc     C2, r1
 369     clr     __zero_reg__
 370     adc     C3, __zero_reg__
 371     ret
 372 ENDF __umulhisi3
 373 #endif /* L_umulhisi3 */
 374
 375 /*******************************************************
 376     Widening Multiplication  32 = 16 x 32
 377 *******************************************************/
 378
 379 #if defined (L_mulshisi3)
 380 ;;; R25:R22 = (signed long) R27:R26 * R21:R18
 381 ;;; (C3:C0) = (signed long) A1:A0   * B3:B0
 382 ;;; Clobbers: __tmp_reg__
 383 DEFUN __mulshisi3
 384 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
 385     ;; Some cores have problem skipping 2-word instruction
 386     tst     A1
 387     brmi    __mulohisi3
 388 #else
 389     sbrs    A1, 7
 390 #endif /* __AVR_HAVE_JMP_CALL__ */
 391     XJMP    __muluhisi3
 392     ;; FALLTHRU
 393 ENDF __mulshisi3
 394
 395 ;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
 396 ;;; (C3:C0) = (one-extended long) A1:A0   * B3:B0
 397 ;;; Clobbers: __tmp_reg__
 398 DEFUN __mulohisi3
 399     XCALL   __muluhisi3
 400     ;; One-extend R27:R26 (A1:A0)
 401     sub     C2, B0
 402     sbc     C3, B1
 403     ret
 404 ENDF __mulohisi3
 405 #endif /* L_mulshisi3 */
 406
 407 #if defined (L_muluhisi3)
 408 ;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
 409 ;;; (C3:C0) = (unsigned long) A1:A0   * B3:B0
 410 ;;; Clobbers: __tmp_reg__
 411 DEFUN __muluhisi3
 412     XCALL   __umulhisi3
 413     mul     A0, B3
 414     add     C3, r0
 415     mul     A1, B2
 416     add     C3, r0
 417     mul     A0, B2
 418     add     C2, r0
 419     adc     C3, r1
 420     clr     __zero_reg__
 421     ret
 422 ENDF __muluhisi3
 423 #endif /* L_muluhisi3 */
 424
 425 /*******************************************************
 426     Multiplication  32 x 32
 427 *******************************************************/
 428
 429 #if defined (L_mulsi3)
 430 ;;; R25:R22 = R25:R22 * R21:R18
 431 ;;; (C3:C0) = C3:C0   * B3:B0
 432 ;;; Clobbers: R26, R27, __tmp_reg__
 433 DEFUN __mulsi3
 434     movw    A0, C0
 435     push    C2
 436     push    C3
 437     XCALL   __muluhisi3
 438     pop     A1
 439     pop     A0
 440     ;; A1:A0 now contains the high word of A
 441     mul     A0, B0
 442     add     C2, r0
 443     adc     C3, r1
 444     mul     A0, B1
 445     add     C3, r0
 446     mul     A1, B0
 447     add     C3, r0
 448     clr     __zero_reg__
 449     ret
 450 ENDF __mulsi3
 451 #endif /* L_mulsi3 */
 452
 453 #undef A0
 454 #undef A1
 455
 456 #undef B0
 457 #undef B1
 458 #undef B2
 459 #undef B3
 460
 461 #undef C0
 462 #undef C1
 463 #undef C2
 464 #undef C3
 465
 466 #endif /* __AVR_HAVE_MUL__ */
 467
 468 /*******************************************************
 469        Multiplication 24 x 24
 470 *******************************************************/
 471
 472 #if defined (L_mulpsi3)
 473
 474 ;; A[0..2]: In: Multiplicand; Out: Product
 475 #define A0  22
 476 #define A1  A0+1
 477 #define A2  A0+2
 478
 479 ;; B[0..2]: In: Multiplier
 480 #define B0  18
 481 #define B1  B0+1
 482 #define B2  B0+2
 483
 484 #if defined (__AVR_HAVE_MUL__)
 485
 486 ;; C[0..2]: Expand Result
 487 #define C0  22
 488 #define C1  C0+1
 489 #define C2  C0+2
 490
 491 ;; R24:R22 *= R20:R18
 492 ;; Clobbers: r21, r25, r26, r27, __tmp_reg__
 493
 494 #define AA0 26
 495 #define AA2 21
 496
 497 DEFUN __mulpsi3
 498     wmov    AA0, A0
 499     mov     AA2, A2
 500     XCALL   __umulhisi3
 501     mul     AA2, B0     $  add  C2, r0
 502     mul     AA0, B2     $  add  C2, r0
 503     clr     __zero_reg__
 504     ret
 505 ENDF __mulpsi3
 506
 507 #undef AA2
 508 #undef AA0
 509
 510 #undef C2
 511 #undef C1
 512 #undef C0
 513
 514 #else /* !HAVE_MUL */
 515
 516 ;; C[0..2]: Expand Result
 517 #define C0  0
 518 #define C1  C0+1
 519 #define C2  21
 520
 521 ;; R24:R22 *= R20:R18
 522 ;; Clobbers: __tmp_reg__, R18, R19, R20, R21
 523
 524 DEFUN __mulpsi3
 525
 526     ;; C[] = 0
 527     clr     __tmp_reg__
 528     clr     C2
 529
 530 0:  ;; Shift N-th Bit of B[] into Carry.  N = 24 - Loop
 531     LSR  B2     $  ror  B1     $  ror  B0
 532
 533     ;; If the N-th Bit of B[] was set...
 534     brcc    1f
 535
 536     ;; ...then add A[] * 2^N to the Result C[]
 537     ADD  C0,A0  $  adc  C1,A1  $  adc  C2,A2
 538
 539 1:  ;; Multiply A[] by 2
 540     LSL  A0     $  rol  A1     $  rol  A2
 541
 542     ;; Loop until B[] is 0
 543     subi B0,0   $  sbci B1,0   $  sbci B2,0
 544     brne    0b
 545
 546     ;; Copy C[] to the return Register A[]
 547     wmov    A0, C0
 548     mov     A2, C2
 549
 550     clr     __zero_reg__
 551     ret
 552 ENDF __mulpsi3
 553
 554 #undef C2
 555 #undef C1
 556 #undef C0
 557
 558 #endif /* HAVE_MUL */
 559
 560 #undef B2
 561 #undef B1
 562 #undef B0
 563
 564 #undef A2
 565 #undef A1
 566 #undef A0
 567
 568 #endif /* L_mulpsi3 */
 569
 570 #if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__)
 571
 572 ;; A[0..2]: In: Multiplicand
 573 #define A0  22
 574 #define A1  A0+1
 575 #define A2  A0+2
 576
 577 ;; BB: In: Multiplier
 578 #define BB  25
 579
 580 ;; C[0..2]: Result
 581 #define C0  18
 582 #define C1  C0+1
 583 #define C2  C0+2
 584
 585 ;; C[] = A[] * sign_extend (BB)
 586 DEFUN __mulsqipsi3
 587     mul     A0, BB
 588     movw    C0, r0
 589     mul     A2, BB
 590     mov     C2, r0
 591     mul     A1, BB
 592     add     C1, r0
 593     adc     C2, r1
 594     clr     __zero_reg__
 595     sbrs    BB, 7
 596     ret
 597     ;; One-extend BB
 598     sub     C1, A0
 599     sbc     C2, A1
 600     ret
 601 ENDF __mulsqipsi3
 602
 603 #undef C2
 604 #undef C1
 605 #undef C0
 606
 607 #undef BB
 608
 609 #undef A2
 610 #undef A1
 611 #undef A0
 612
 613 #endif /* L_mulsqipsi3  &&  HAVE_MUL */
 614
 615 /*******************************************************
 616        Multiplication 64 x 64
 617 *******************************************************/
 618
 619 #if defined (L_muldi3)
 620
 621 ;; A[] = A[] * B[]
 622
 623 ;; A[0..7]: In: Multiplicand
 624 ;; Out: Product
 625 #define A0  18
 626 #define A1  A0+1
 627 #define A2  A0+2
 628 #define A3  A0+3
 629 #define A4  A0+4
 630 #define A5  A0+5
 631 #define A6  A0+6
 632 #define A7  A0+7
 633
 634 ;; B[0..7]: In: Multiplier
 635 #define B0  10
 636 #define B1  B0+1
 637 #define B2  B0+2
 638 #define B3  B0+3
 639 #define B4  B0+4
 640 #define B5  B0+5
 641 #define B6  B0+6
 642 #define B7  B0+7
 643
 644 #if defined (__AVR_HAVE_MUL__)
 645
 646 ;; Define C[] for convenience
 647 ;; Notice that parts of C[] overlap A[] respective B[]
 648 #define C0  16
 649 #define C1  C0+1
 650 #define C2  20
 651 #define C3  C2+1
 652 #define C4  28
 653 #define C5  C4+1
 654 #define C6  C4+2
 655 #define C7  C4+3
 656
 657 ;; A[]     *= B[]
 658 ;; R25:R18 *= R17:R10
 659 ;; Ordinary ABI-Function
 660
 661 DEFUN __muldi3
 662     push    r29
 663     push    r28
 664     push    r17
 665     push    r16
 666
 667     ;; Counting in Words, we have to perform a 4 * 4 Multiplication
 668
 669     ;; 3 * 0  +  0 * 3
 670     mul  A7,B0  $             $  mov C7,r0
 671     mul  A0,B7  $             $  add C7,r0
 672     mul  A6,B1  $             $  add C7,r0
 673     mul  A6,B0  $  mov C6,r0  $  add C7,r1
 674     mul  B6,A1  $             $  add C7,r0
 675     mul  B6,A0  $  add C6,r0  $  adc C7,r1
 676
 677     ;; 1 * 2
 678     mul  A2,B4  $  add C6,r0  $  adc C7,r1
 679     mul  A3,B4  $             $  add C7,r0
 680     mul  A2,B5  $             $  add C7,r0
 681
 682     push    A5
 683     push    A4
 684     push    B1
 685     push    B0
 686     push    A3
 687     push    A2
 688
 689     ;; 0 * 0
 690     wmov    26, B0
 691     XCALL   __umulhisi3
 692     wmov    C0, 22
 693     wmov    C2, 24
 694
 695     ;; 0 * 2
 696     wmov    26, B4
 697     XCALL   __umulhisi3  $  wmov C4,22            $ add C6,24 $ adc C7,25
 698
 699     wmov    26, B2
 700     ;; 0 * 1
 701     rcall   __muldi3_6
 702
 703     pop     A0
 704     pop     A1
 705     ;; 1 * 1
 706     wmov    26, B2
 707     XCALL   __umulhisi3  $  add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
 708
 709     pop     r26
 710     pop     r27
 711     ;; 1 * 0
 712     rcall   __muldi3_6
 713
 714     pop     A0
 715     pop     A1
 716     ;; 2 * 0
 717     XCALL   __umulhisi3  $  add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
 718
 719     ;; 2 * 1
 720     wmov    26, B2
 721     XCALL   __umulhisi3  $            $           $ add C6,22 $ adc C7,23
 722
 723     ;; A[] = C[]
 724     wmov    A0, C0
 725     ;; A2 = C2 already
 726     wmov    A4, C4
 727     wmov    A6, C6
 728
 729     clr     __zero_reg__
 730     pop     r16
 731     pop     r17
 732     pop     r28
 733     pop     r29
 734     ret
 735
 736 __muldi3_6:
 737     XCALL   __umulhisi3
 738     add     C2, 22
 739     adc     C3, 23
 740     adc     C4, 24
 741     adc     C5, 25
 742     brcc    0f
 743     adiw    C6, 1
 744 0:  ret
 745 ENDF __muldi3
 746
 747 #undef C7
 748 #undef C6
 749 #undef C5
 750 #undef C4
 751 #undef C3
 752 #undef C2
 753 #undef C1
 754 #undef C0
 755
 756 #else /* !HAVE_MUL */
 757
 758 #define C0  26
 759 #define C1  C0+1
 760 #define C2  C0+2
 761 #define C3  C0+3
 762 #define C4  C0+4
 763 #define C5  C0+5
 764 #define C6  0
 765 #define C7  C6+1
 766
 767 #define Loop 9
 768
 769 ;; A[]     *= B[]
 770 ;; R25:R18 *= R17:R10
 771 ;; Ordinary ABI-Function
 772
 773 DEFUN __muldi3
 774     push    r29
 775     push    r28
 776     push    Loop
 777
 778     ldi     C0, 64
 779     mov     Loop, C0
 780
 781     ;; C[] = 0
 782     clr     __tmp_reg__
 783     wmov    C0, 0
 784     wmov    C2, 0
 785     wmov    C4, 0
 786
 787 0:  ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[]
 788     ;; where N = 64 - Loop.
 789     ;; Notice that B[] = B[] >>> 64 so after this Routine has finished,
 790     ;; B[] will have its initial Value again.
 791     LSR  B7     $  ror  B6     $  ror  B5     $  ror  B4
 792     ror  B3     $  ror  B2     $  ror  B1     $  ror  B0
 793
 794     ;; If the N-th Bit of B[] was set then...
 795     brcc    1f
 796     ;; ...finish Rotation...
 797     ori     B7, 1 << 7
 798
 799     ;; ...and add A[] * 2^N to the Result C[]
 800     ADD  C0,A0  $  adc  C1,A1  $  adc  C2,A2  $  adc  C3,A3
 801     adc  C4,A4  $  adc  C5,A5  $  adc  C6,A6  $  adc  C7,A7
 802
 803 1:  ;; Multiply A[] by 2
 804     LSL  A0     $  rol  A1     $  rol  A2     $  rol  A3
 805     rol  A4     $  rol  A5     $  rol  A6     $  rol  A7
 806
 807     dec     Loop
 808     brne    0b
 809
 810     ;; We expanded the Result in C[]
 811     ;; Copy Result to the Return Register A[]
 812     wmov    A0, C0
 813     wmov    A2, C2
 814     wmov    A4, C4
 815     wmov    A6, C6
 816
 817     clr     __zero_reg__
 818     pop     Loop
 819     pop     r28
 820     pop     r29
 821     ret
 822 ENDF __muldi3
 823
 824 #undef Loop
 825
 826 #undef C7
 827 #undef C6
 828 #undef C5
 829 #undef C4
 830 #undef C3
 831 #undef C2
 832 #undef C1
 833 #undef C0
 834
 835 #endif /* HAVE_MUL */
 836
 837 #undef B7
 838 #undef B6
 839 #undef B5
 840 #undef B4
 841 #undef B3
 842 #undef B2
 843 #undef B1
 844 #undef B0
 845
 846 #undef A7
 847 #undef A6
 848 #undef A5
 849 #undef A4
 850 #undef A3
 851 #undef A2
 852 #undef A1
 853 #undef A0
 854
 855 #endif /* L_muldi3 */
 856
 857 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 858
 859 \f
 860 .section .text.libgcc.div, "ax", @progbits
 861
 862 /*******************************************************
 863        Division 8 / 8 => (result + remainder)
 864 *******************************************************/
 865 #define r_rem   r25     /* remainder */
 866 #define r_arg1  r24     /* dividend, quotient */
 867 #define r_arg2  r22     /* divisor */
 868 #define r_cnt   r23     /* loop count */
 869
 870 #if defined (L_udivmodqi4)
 871 DEFUN __udivmodqi4
 872         sub     r_rem,r_rem     ; clear remainder and carry
 873         ldi     r_cnt,9         ; init loop counter
 874         rjmp    __udivmodqi4_ep ; jump to entry point
 875 __udivmodqi4_loop:
 876         rol     r_rem           ; shift dividend into remainder
 877         cp      r_rem,r_arg2    ; compare remainder & divisor
 878         brcs    __udivmodqi4_ep ; remainder <= divisor
 879         sub     r_rem,r_arg2    ; restore remainder
 880 __udivmodqi4_ep:
 881         rol     r_arg1          ; shift dividend (with CARRY)
 882         dec     r_cnt           ; decrement loop counter
 883         brne    __udivmodqi4_loop
 884         com     r_arg1          ; complement result
 885                                 ; because C flag was complemented in loop
 886         ret
 887 ENDF __udivmodqi4
 888 #endif /* defined (L_udivmodqi4) */
 889
 890 #if defined (L_divmodqi4)
 891 DEFUN __divmodqi4
 892         bst     r_arg1,7        ; store sign of dividend
 893         mov     __tmp_reg__,r_arg1
 894         eor     __tmp_reg__,r_arg2; r0.7 is sign of result
 895         sbrc    r_arg1,7
 896         neg     r_arg1          ; dividend negative : negate
 897         sbrc    r_arg2,7
 898         neg     r_arg2          ; divisor negative : negate
 899         XCALL   __udivmodqi4    ; do the unsigned div/mod
 900         brtc    __divmodqi4_1
 901         neg     r_rem           ; correct remainder sign
 902 __divmodqi4_1:
 903         sbrc    __tmp_reg__,7
 904         neg     r_arg1          ; correct result sign
 905 __divmodqi4_exit:
 906         ret
 907 ENDF __divmodqi4
 908 #endif /* defined (L_divmodqi4) */
 909
 910 #undef r_rem
 911 #undef r_arg1
 912 #undef r_arg2
 913 #undef r_cnt
 914
 915
 916 /*******************************************************
 917        Division 16 / 16 => (result + remainder)
 918 *******************************************************/
 919 #define r_remL  r26     /* remainder Low */
 920 #define r_remH  r27     /* remainder High */
 921
 922 /* return: remainder */
 923 #define r_arg1L r24     /* dividend Low */
 924 #define r_arg1H r25     /* dividend High */
 925
 926 /* return: quotient */
 927 #define r_arg2L r22     /* divisor Low */
 928 #define r_arg2H r23     /* divisor High */
 929
 930 #define r_cnt   r21     /* loop count */
 931
 932 #if defined (L_udivmodhi4)
 933 DEFUN __udivmodhi4
 934         sub     r_remL,r_remL
 935         sub     r_remH,r_remH   ; clear remainder and carry
 936         ldi     r_cnt,17        ; init loop counter
 937         rjmp    __udivmodhi4_ep ; jump to entry point
 938 __udivmodhi4_loop:
 939         rol     r_remL          ; shift dividend into remainder
 940         rol     r_remH
 941         cp      r_remL,r_arg2L  ; compare remainder & divisor
 942         cpc     r_remH,r_arg2H
 943         brcs    __udivmodhi4_ep ; remainder < divisor
 944         sub     r_remL,r_arg2L  ; restore remainder
 945         sbc     r_remH,r_arg2H
 946 __udivmodhi4_ep:
 947         rol     r_arg1L         ; shift dividend (with CARRY)
 948         rol     r_arg1H
 949         dec     r_cnt           ; decrement loop counter
 950         brne    __udivmodhi4_loop
 951         com     r_arg1L
 952         com     r_arg1H
 953 ; div/mod results to return registers, as for the div() function
 954         mov_l   r_arg2L, r_arg1L        ; quotient
 955         mov_h   r_arg2H, r_arg1H
 956         mov_l   r_arg1L, r_remL         ; remainder
 957         mov_h   r_arg1H, r_remH
 958         ret
 959 ENDF __udivmodhi4
 960 #endif /* defined (L_udivmodhi4) */
 961
 962 #if defined (L_divmodhi4)
 963 DEFUN __divmodhi4
 964     .global _div
 965 _div:
 966     bst     r_arg1H,7           ; store sign of dividend
 967     mov     __tmp_reg__,r_arg2H
 968     brtc    0f
 969     com     __tmp_reg__         ; r0.7 is sign of result
 970     rcall   __divmodhi4_neg1    ; dividend negative: negate
 971 0:
 972     sbrc    r_arg2H,7
 973     rcall   __divmodhi4_neg2    ; divisor negative: negate
 974     XCALL   __udivmodhi4        ; do the unsigned div/mod
 975     sbrc    __tmp_reg__,7
 976     rcall   __divmodhi4_neg2    ; correct remainder sign
 977     brtc    __divmodhi4_exit
 978 __divmodhi4_neg1:
 979     ;; correct dividend/remainder sign
 980     com     r_arg1H
 981     neg     r_arg1L
 982     sbci    r_arg1H,0xff
 983     ret
 984 __divmodhi4_neg2:
 985     ;; correct divisor/result sign
 986     com     r_arg2H
 987     neg     r_arg2L
 988     sbci    r_arg2H,0xff
 989 __divmodhi4_exit:
 990     ret
 991 ENDF __divmodhi4
 992 #endif /* defined (L_divmodhi4) */
 993
 994 #undef r_remH
 995 #undef r_remL
 996
 997 #undef r_arg1H
 998 #undef r_arg1L
 999
1000 #undef r_arg2H
1001 #undef r_arg2L
1002
1003 #undef r_cnt
1004
1005 /*******************************************************
1006        Division 24 / 24 => (result + remainder)
1007 *******************************************************/
1008
1009 ;; A[0..2]: In: Dividend; Out: Quotient
1010 #define A0  22
1011 #define A1  A0+1
1012 #define A2  A0+2
1013
1014 ;; B[0..2]: In: Divisor;   Out: Remainder
1015 #define B0  18
1016 #define B1  B0+1
1017 #define B2  B0+2
1018
1019 ;; C[0..2]: Expand remainder
1020 #define C0  __zero_reg__
1021 #define C1  26
1022 #define C2  25
1023
1024 ;; Loop counter
1025 #define r_cnt   21
1026
1027 #if defined (L_udivmodpsi4)
1028 ;; R24:R22 = R24:R22  udiv  R20:R18
1029 ;; R20:R18 = R24:R22  umod  R20:R18
1030 ;; Clobbers: R21, R25, R26
1031
1032 DEFUN __udivmodpsi4
1033     ; init loop counter
1034     ldi     r_cnt, 24+1
1035     ; Clear remainder and carry.  C0 is already 0
1036     clr     C1
1037     sub     C2, C2
1038     ; jump to entry point
1039     rjmp    __udivmodpsi4_start
1040 __udivmodpsi4_loop:
1041     ; shift dividend into remainder
1042     rol     C0
1043     rol     C1
1044     rol     C2
1045     ; compare remainder & divisor
1046     cp      C0, B0
1047     cpc     C1, B1
1048     cpc     C2, B2
1049     brcs    __udivmodpsi4_start ; remainder <= divisor
1050     sub     C0, B0              ; restore remainder
1051     sbc     C1, B1
1052     sbc     C2, B2
1053 __udivmodpsi4_start:
1054     ; shift dividend (with CARRY)
1055     rol     A0
1056     rol     A1
1057     rol     A2
1058     ; decrement loop counter
1059     dec     r_cnt
1060     brne    __udivmodpsi4_loop
1061     com     A0
1062     com     A1
1063     com     A2
1064     ; div/mod results to return registers
1065     ; remainder
1066     mov     B0, C0
1067     mov     B1, C1
1068     mov     B2, C2
1069     clr     __zero_reg__ ; C0
1070     ret
1071 ENDF __udivmodpsi4
1072 #endif /* defined (L_udivmodpsi4) */
1073
1074 #if defined (L_divmodpsi4)
1075 ;; R24:R22 = R24:R22  div  R20:R18
1076 ;; R20:R18 = R24:R22  mod  R20:R18
1077 ;; Clobbers: T, __tmp_reg__, R21, R25, R26
1078
1079 DEFUN __divmodpsi4
1080     ; R0.7 will contain the sign of the result:
1081     ; R0.7 = A.sign ^ B.sign
1082     mov __tmp_reg__, B2
1083     ; T-flag = sign of dividend
1084     bst     A2, 7
1085     brtc    0f
1086     com     __tmp_reg__
1087     ; Adjust dividend's sign
1088     rcall   __divmodpsi4_negA
1089 0:
1090     ; Adjust divisor's sign
1091     sbrc    B2, 7
1092     rcall   __divmodpsi4_negB
1093
1094     ; Do the unsigned div/mod
1095     XCALL   __udivmodpsi4
1096
1097     ; Adjust quotient's sign
1098     sbrc    __tmp_reg__, 7
1099     rcall   __divmodpsi4_negA
1100
1101     ; Adjust remainder's sign
1102     brtc    __divmodpsi4_end
1103
1104 __divmodpsi4_negB:
1105     ; Correct divisor/remainder sign
1106     com     B2
1107     com     B1
1108     neg     B0
1109     sbci    B1, -1
1110     sbci    B2, -1
1111     ret
1112
1113     ; Correct dividend/quotient sign
1114 __divmodpsi4_negA:
1115     com     A2
1116     com     A1
1117     neg     A0
1118     sbci    A1, -1
1119     sbci    A2, -1
1120 __divmodpsi4_end:
1121     ret
1122
1123 ENDF __divmodpsi4
1124 #endif /* defined (L_divmodpsi4) */
1125
1126 #undef A0
1127 #undef A1
1128 #undef A2
1129
1130 #undef B0
1131 #undef B1
1132 #undef B2
1133
1134 #undef C0
1135 #undef C1
1136 #undef C2
1137
1138 #undef r_cnt
1139
1140 /*******************************************************
1141        Division 32 / 32 => (result + remainder)
1142 *******************************************************/
1143 #define r_remHH r31     /* remainder High */
1144 #define r_remHL r30
1145 #define r_remH  r27
1146 #define r_remL  r26     /* remainder Low */
1147
1148 /* return: remainder */
1149 #define r_arg1HH r25    /* dividend High */
1150 #define r_arg1HL r24
1151 #define r_arg1H  r23
1152 #define r_arg1L  r22    /* dividend Low */
1153
1154 /* return: quotient */
1155 #define r_arg2HH r21    /* divisor High */
1156 #define r_arg2HL r20
1157 #define r_arg2H  r19
1158 #define r_arg2L  r18    /* divisor Low */
1159
1160 #define r_cnt __zero_reg__  /* loop count (0 after the loop!) */
1161
1162 #if defined (L_udivmodsi4)
1163 DEFUN __udivmodsi4
1164         ldi     r_remL, 33      ; init loop counter
1165         mov     r_cnt, r_remL
1166         sub     r_remL,r_remL
1167         sub     r_remH,r_remH   ; clear remainder and carry
1168         mov_l   r_remHL, r_remL
1169         mov_h   r_remHH, r_remH
1170         rjmp    __udivmodsi4_ep ; jump to entry point
1171 __udivmodsi4_loop:
1172         rol     r_remL          ; shift dividend into remainder
1173         rol     r_remH
1174         rol     r_remHL
1175         rol     r_remHH
1176         cp      r_remL,r_arg2L  ; compare remainder & divisor
1177         cpc     r_remH,r_arg2H
1178         cpc     r_remHL,r_arg2HL
1179         cpc     r_remHH,r_arg2HH
1180         brcs    __udivmodsi4_ep ; remainder <= divisor
1181         sub     r_remL,r_arg2L  ; restore remainder
1182         sbc     r_remH,r_arg2H
1183         sbc     r_remHL,r_arg2HL
1184         sbc     r_remHH,r_arg2HH
1185 __udivmodsi4_ep:
1186         rol     r_arg1L         ; shift dividend (with CARRY)
1187         rol     r_arg1H
1188         rol     r_arg1HL
1189         rol     r_arg1HH
1190         dec     r_cnt           ; decrement loop counter
1191         brne    __udivmodsi4_loop
1192                                 ; __zero_reg__ now restored (r_cnt == 0)
1193         com     r_arg1L
1194         com     r_arg1H
1195         com     r_arg1HL
1196         com     r_arg1HH
1197 ; div/mod results to return registers, as for the ldiv() function
1198         mov_l   r_arg2L,  r_arg1L       ; quotient
1199         mov_h   r_arg2H,  r_arg1H
1200         mov_l   r_arg2HL, r_arg1HL
1201         mov_h   r_arg2HH, r_arg1HH
1202         mov_l   r_arg1L,  r_remL        ; remainder
1203         mov_h   r_arg1H,  r_remH
1204         mov_l   r_arg1HL, r_remHL
1205         mov_h   r_arg1HH, r_remHH
1206         ret
1207 ENDF __udivmodsi4
1208 #endif /* defined (L_udivmodsi4) */
1209
1210 #if defined (L_divmodsi4)
1211 DEFUN __divmodsi4
1212     mov     __tmp_reg__,r_arg2HH
1213     bst     r_arg1HH,7          ; store sign of dividend
1214     brtc    0f
1215     com     __tmp_reg__         ; r0.7 is sign of result
1216     rcall   __divmodsi4_neg1    ; dividend negative: negate
1217 0:
1218     sbrc    r_arg2HH,7
1219     rcall   __divmodsi4_neg2    ; divisor negative: negate
1220     XCALL   __udivmodsi4        ; do the unsigned div/mod
1221     sbrc    __tmp_reg__, 7      ; correct quotient sign
1222     rcall   __divmodsi4_neg2
1223     brtc    __divmodsi4_exit    ; correct remainder sign
1224 __divmodsi4_neg1:
1225     ;; correct dividend/remainder sign
1226     com     r_arg1HH
1227     com     r_arg1HL
1228     com     r_arg1H
1229     neg     r_arg1L
1230     sbci    r_arg1H, 0xff
1231     sbci    r_arg1HL,0xff
1232     sbci    r_arg1HH,0xff
1233     ret
1234 __divmodsi4_neg2:
1235     ;; correct divisor/quotient sign
1236     com     r_arg2HH
1237     com     r_arg2HL
1238     com     r_arg2H
1239     neg     r_arg2L
1240     sbci    r_arg2H,0xff
1241     sbci    r_arg2HL,0xff
1242     sbci    r_arg2HH,0xff
1243 __divmodsi4_exit:
1244     ret
1245 ENDF __divmodsi4
1246 #endif /* defined (L_divmodsi4) */
1247
1248
1249 /*******************************************************
1250        Division 64 / 64
1251        Modulo   64 % 64
1252 *******************************************************/
1253
1254 ;; Use Speed-optimized Version on "big" Devices, i.e. Devices with
1255 ;; at least 16k of Program Memory.  For smaller Devices, depend
1256 ;; on MOVW and SP Size.  There is a Connexion between SP Size and
1257 ;; Flash Size so that SP Size can be used to test for Flash Size.
1258
1259 #if defined (__AVR_HAVE_JMP_CALL__)
1260 #   define SPEED_DIV 8
1261 #elif defined (__AVR_HAVE_MOVW__) && !defined (__AVR_HAVE_8BIT_SP__)
1262 #   define SPEED_DIV 16
1263 #else
1264 #   define SPEED_DIV 0
1265 #endif
1266
1267 ;; A[0..7]: In: Dividend;
1268 ;; Out: Quotient  (T = 0)
1269 ;; Out: Remainder (T = 1)
1270 #define A0  18
1271 #define A1  A0+1
1272 #define A2  A0+2
1273 #define A3  A0+3
1274 #define A4  A0+4
1275 #define A5  A0+5
1276 #define A6  A0+6
1277 #define A7  A0+7
1278
1279 ;; B[0..7]: In: Divisor;   Out: Clobber
1280 #define B0  10
1281 #define B1  B0+1
1282 #define B2  B0+2
1283 #define B3  B0+3
1284 #define B4  B0+4
1285 #define B5  B0+5
1286 #define B6  B0+6
1287 #define B7  B0+7
1288
1289 ;; C[0..7]: Expand remainder;  Out: Remainder (unused)
1290 #define C0  8
1291 #define C1  C0+1
1292 #define C2  30
1293 #define C3  C2+1
1294 #define C4  28
1295 #define C5  C4+1
1296 #define C6  26
1297 #define C7  C6+1
1298
1299 ;; Holds Signs during Division Routine
1300 #define SS      __tmp_reg__
1301
1302 ;; Bit-Counter in Division Routine
1303 #define R_cnt   __zero_reg__
1304
1305 ;; Scratch Register for Negation
1306 #define NN      r31
1307
1308 #if defined (L_udivdi3)
1309
1310 ;; R25:R18 = R24:R18  umod  R17:R10
1311 ;; Ordinary ABI-Function
1312
1313 DEFUN __umoddi3
1314     set
1315     rjmp __udivdi3_umoddi3
1316 ENDF __umoddi3
1317
1318 ;; R25:R18 = R24:R18  udiv  R17:R10
1319 ;; Ordinary ABI-Function
1320
1321 DEFUN __udivdi3
1322     clt
1323 ENDF __udivdi3
1324
1325 DEFUN __udivdi3_umoddi3
1326     push    C0
1327     push    C1
1328     push    C4
1329     push    C5
1330     XCALL   __udivmod64
1331     pop     C5
1332     pop     C4
1333     pop     C1
1334     pop     C0
1335     ret
1336 ENDF __udivdi3_umoddi3
1337 #endif /* L_udivdi3 */
1338
1339 #if defined (L_udivmod64)
1340
1341 ;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation
1342 ;; No Registers saved/restored; the Callers will take Care.
1343 ;; Preserves B[] and T-flag
1344 ;; T = 0: Compute Quotient  in A[]
1345 ;; T = 1: Compute Remainder in A[] and shift SS one Bit left
1346
1347 DEFUN __udivmod64
1348
1349     ;; Clear Remainder (C6, C7 will follow)
1350     clr     C0
1351     clr     C1
1352     wmov    C2, C0
1353     wmov    C4, C0
1354     ldi     C7, 64
1355
1356 #if SPEED_DIV == 0 || SPEED_DIV == 16
1357     ;; Initialize Loop-Counter
1358     mov     R_cnt, C7
1359     wmov    C6, C0
1360 #endif /* SPEED_DIV */
1361
1362 #if SPEED_DIV == 8
1363
1364     push    A7
1365     clr     C6
1366
1367 1:  ;; Compare shifted Devidend against Divisor
1368     ;; If -- even after Shifting -- it is smaller...
1369     CP  A7,B0  $  cpc C0,B1  $  cpc C1,B2  $  cpc C2,B3
1370     cpc C3,B4  $  cpc C4,B5  $  cpc C5,B6  $  cpc C6,B7
1371     brcc    2f
1372
1373     ;; ...then we can subtract it.  Thus, it is legal to shift left
1374                $  mov C6,C5  $  mov C5,C4  $  mov C4,C3
1375     mov C3,C2  $  mov C2,C1  $  mov C1,C0  $  mov C0,A7
1376     mov A7,A6  $  mov A6,A5  $  mov A5,A4  $  mov A4,A3
1377     mov A3,A2  $  mov A2,A1  $  mov A1,A0  $  clr A0
1378
1379     ;; 8 Bits are done
1380     subi    C7, 8
1381     brne    1b
1382
1383     ;; Shifted 64 Bits:  A7 has traveled to C7
1384     pop     C7
1385     ;; Divisor is greater than Dividend. We have:
1386     ;; A[] % B[] = A[]
1387     ;; A[] / B[] = 0
1388     ;; Thus, we can return immediately
1389     rjmp    5f
1390
1391 2:  ;; Initialze Bit-Counter with Number of Bits still to be performed
1392     mov     R_cnt, C7
1393
1394     ;; Push of A7 is not needed because C7 is still 0
1395     pop     C7
1396     clr     C7
1397
1398 #elif  SPEED_DIV == 16
1399
1400     ;; Compare shifted Dividend against Divisor
1401     cp      A7, B3
1402     cpc     C0, B4
1403     cpc     C1, B5
1404     cpc     C2, B6
1405     cpc     C3, B7
1406     brcc    2f
1407
1408     ;; Divisor is greater than shifted Dividen: We can shift the Dividend
1409     ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk
1410     wmov  C2,A6  $  wmov C0,A4
1411     wmov  A6,A2  $  wmov A4,A0
1412     wmov  A2,C6  $  wmov A0,C4
1413
1414     ;; Set Bit Counter to 32
1415     lsr     R_cnt
1416 2:
1417 #elif SPEED_DIV
1418 #error SPEED_DIV = ?
1419 #endif /* SPEED_DIV */
1420
1421 ;; The very Division + Remainder Routine
1422
1423 3:  ;; Left-shift Dividend...
1424     lsl A0     $  rol A1     $  rol A2     $  rol A3
1425     rol A4     $  rol A5     $  rol A6     $  rol A7
1426
1427     ;; ...into Remainder
1428     rol C0     $  rol C1     $  rol C2     $  rol C3
1429     rol C4     $  rol C5     $  rol C6     $  rol C7
1430
1431     ;; Compare Remainder and Divisor
1432     CP  C0,B0  $  cpc C1,B1  $  cpc C2,B2  $  cpc C3,B3
1433     cpc C4,B4  $  cpc C5,B5  $  cpc C6,B6  $  cpc C7,B7
1434
1435     brcs 4f
1436
1437     ;; Divisor fits into Remainder:  Subtract it from Remainder...
1438     SUB C0,B0  $  sbc C1,B1  $  sbc C2,B2  $  sbc C3,B3
1439     sbc C4,B4  $  sbc C5,B5  $  sbc C6,B6  $  sbc C7,B7
1440
1441     ;; ...and set according Bit in the upcoming Quotient
1442     ;; The Bit will travel to its final Position
1443     ori A0, 1
1444
1445 4:  ;; This Bit is done
1446     dec     R_cnt
1447     brne    3b
1448     ;; __zero_reg__ is 0 again
1449
1450     ;; T = 0: We are fine with the Quotient in A[]
1451     ;; T = 1: Copy Remainder to A[]
1452 5:  brtc    6f
1453     wmov    A0, C0
1454     wmov    A2, C2
1455     wmov    A4, C4
1456     wmov    A6, C6
1457     ;; Move the Sign of the Result to SS.7
1458     lsl     SS
1459
1460 6:  ret
1461
1462 ENDF __udivmod64
1463 #endif /* L_udivmod64 */
1464
1465
1466 #if defined (L_divdi3)
1467
1468 ;; R25:R18 = R24:R18  mod  R17:R10
1469 ;; Ordinary ABI-Function
1470
1471 DEFUN __moddi3
1472     set
1473     rjmp    __divdi3_moddi3
1474 ENDF __moddi3
1475
1476 ;; R25:R18 = R24:R18  div  R17:R10
1477 ;; Ordinary ABI-Function
1478
1479 DEFUN __divdi3
1480     clt
1481 ENDF __divdi3
1482
1483 DEFUN  __divdi3_moddi3
1484 #if SPEED_DIV
1485     mov     r31, A7
1486     or      r31, B7
1487     brmi    0f
1488     ;; Both Signs are 0:  the following Complexitiy is not needed
1489     XJMP    __udivdi3_umoddi3
1490 #endif /* SPEED_DIV */
1491
1492 0:  ;; The Prologue
1493     ;; Save 12 Registers:  Y, 17...8
1494     ;; No Frame needed (X = 0)
1495     clr r26
1496     clr r27
1497     ldi r30, lo8(gs(1f))
1498     ldi r31, hi8(gs(1f))
1499     XJMP __prologue_saves__ + ((18 - 12) * 2)
1500
1501 1:  ;; SS.7 will contain the Sign of the Quotient  (A.sign * B.sign)
1502     ;; SS.6 will contain the Sign of the Remainder (A.sign)
1503     mov     SS, A7
1504     asr     SS
1505     ;; Adjust Dividend's Sign as needed
1506 #if SPEED_DIV
1507     ;; Compiling for Speed we know that at least one Sign must be < 0
1508     ;; Thus, if A[] >= 0 then we know B[] < 0
1509     brpl    22f
1510 #else
1511     brpl    21f
1512 #endif /* SPEED_DIV */
1513
1514     XCALL   __negdi2
1515
1516     ;; Adjust Divisor's Sign and SS.7 as needed
1517 21: tst     B7
1518     brpl    3f
1519 22: ldi     NN, 1 << 7
1520     eor     SS, NN
1521
1522     ldi NN, -1
1523     com B4     $  com B5     $  com B6     $  com B7
1524                $  com B1     $  com B2     $  com B3
1525     NEG B0
1526                $  sbc B1,NN  $  sbc B2,NN  $  sbc B3,NN
1527     sbc B4,NN  $  sbc B5,NN  $  sbc B6,NN  $  sbc B7,NN
1528
1529 3:  ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag)
1530     XCALL   __udivmod64
1531
1532     ;; Adjust Result's Sign
1533 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
1534     tst     SS
1535     brpl    4f
1536 #else
1537     sbrc    SS, 7
1538 #endif /* __AVR_HAVE_JMP_CALL__ */
1539     XCALL   __negdi2
1540
1541 4:  ;; Epilogue: Restore the Z = 12 Registers and return
1542     in r28, __SP_L__
1543 #if defined (__AVR_HAVE_8BIT_SP__)
1544     clr r29
1545 #else
1546     in r29, __SP_H__
1547 #endif /* #SP = 8/16 */
1548     ldi r30, 12
1549     XJMP __epilogue_restores__ + ((18 - 12) * 2)
1550
1551 ENDF __divdi3_moddi3
1552
1553 #undef R_cnt
1554 #undef SS
1555 #undef NN
1556
1557 #endif /* L_divdi3 */
1558
1559 .section .text.libgcc, "ax", @progbits
1560
1561 #define TT __tmp_reg__
1562
1563 #if defined (L_adddi3)
1564 ;; (set (reg:DI 18)
1565 ;;      (plus:DI (reg:DI 18)
1566 ;;               (reg:DI 10)))
1567 DEFUN __adddi3
1568     ADD A0,B0  $  adc A1,B1  $  adc A2,B2  $  adc A3,B3
1569     adc A4,B4  $  adc A5,B5  $  adc A6,B6  $  adc A7,B7
1570     ret
1571 ENDF __adddi3
1572 #endif /* L_adddi3 */
1573
1574 #if defined (L_adddi3_s8)
1575 ;; (set (reg:DI 18)
1576 ;;      (plus:DI (reg:DI 18)
1577 ;;               (sign_extend:SI (reg:QI 26))))
1578 DEFUN __adddi3_s8
1579     clr     TT
1580     sbrc    r26, 7
1581     com     TT
1582     ADD A0,r26 $  adc A1,TT  $  adc A2,TT  $  adc A3,TT
1583     adc A4,TT  $  adc A5,TT  $  adc A6,TT  $  adc A7,TT
1584     ret
1585 ENDF __adddi3_s8
1586 #endif /* L_adddi3_s8 */
1587
1588 #if defined (L_subdi3)
1589 ;; (set (reg:DI 18)
1590 ;;      (minus:DI (reg:DI 18)
1591 ;;                (reg:DI 10)))
1592 DEFUN __subdi3
1593     SUB A0,B0  $  sbc A1,B1  $  sbc A2,B2  $  sbc A3,B3
1594     sbc A4,B4  $  sbc A5,B5  $  sbc A6,B6  $  sbc A7,B7
1595     ret
1596 ENDF __subdi3
1597 #endif /* L_subdi3 */
1598
1599 #if defined (L_cmpdi2)
1600 ;; (set (cc0)
1601 ;;      (compare (reg:DI 18)
1602 ;;               (reg:DI 10)))
1603 DEFUN __cmpdi2
1604     CP  A0,B0  $  cpc A1,B1  $  cpc A2,B2  $  cpc A3,B3
1605     cpc A4,B4  $  cpc A5,B5  $  cpc A6,B6  $  cpc A7,B7
1606     ret
1607 ENDF __cmpdi2
1608 #endif /* L_cmpdi2 */
1609
1610 #if defined (L_cmpdi2_s8)
1611 ;; (set (cc0)
1612 ;;      (compare (reg:DI 18)
1613 ;;               (sign_extend:SI (reg:QI 26))))
1614 DEFUN __cmpdi2_s8
1615     clr     TT
1616     sbrc    r26, 7
1617     com     TT
1618     CP  A0,r26 $  cpc A1,TT  $  cpc A2,TT  $  cpc A3,TT
1619     cpc A4,TT  $  cpc A5,TT  $  cpc A6,TT  $  cpc A7,TT
1620     ret
1621 ENDF __cmpdi2_s8
1622 #endif /* L_cmpdi2_s8 */
1623
1624 #if defined (L_negdi2)
1625 DEFUN __negdi2
1626
1627     com  A4    $  com  A5    $  com  A6    $  com  A7
1628                $  com  A1    $  com  A2    $  com  A3
1629     NEG  A0
1630                $  sbci A1,-1 $  sbci A2,-1 $  sbci A3,-1
1631     sbci A4,-1 $  sbci A5,-1 $  sbci A6,-1 $  sbci A7,-1
1632     ret
1633
1634 ENDF __negdi2
1635 #endif /* L_negdi2 */
1636
1637 #undef TT
1638
1639 #undef C7
1640 #undef C6
1641 #undef C5
1642 #undef C4
1643 #undef C3
1644 #undef C2
1645 #undef C1
1646 #undef C0
1647
1648 #undef B7
1649 #undef B6
1650 #undef B5
1651 #undef B4
1652 #undef B3
1653 #undef B2
1654 #undef B1
1655 #undef B0
1656
1657 #undef A7
1658 #undef A6
1659 #undef A5
1660 #undef A4
1661 #undef A3
1662 #undef A2
1663 #undef A1
1664 #undef A0
1665
1666 \f
1667 .section .text.libgcc.prologue, "ax", @progbits
1668
1669 /**********************************
1670  * This is a prologue subroutine
1671  **********************************/
1672 #if defined (L_prologue)
1673
1674 ;; This function does not clobber T-flag; 64-bit division relies on it
1675 DEFUN __prologue_saves__
1676         push r2
1677         push r3
1678         push r4
1679         push r5
1680         push r6
1681         push r7
1682         push r8
1683         push r9
1684         push r10
1685         push r11
1686         push r12
1687         push r13
1688         push r14
1689         push r15
1690         push r16
1691         push r17
1692         push r28
1693         push r29
1694 #if defined (__AVR_HAVE_8BIT_SP__)
1695         in      r28,__SP_L__
1696         sub     r28,r26
1697         out     __SP_L__,r28
1698         clr     r29
1699 #elif defined (__AVR_XMEGA__)
1700         in      r28,__SP_L__
1701         in      r29,__SP_H__
1702         sub     r28,r26
1703         sbc     r29,r27
1704         out     __SP_L__,r28
1705         out     __SP_H__,r29
1706 #else
1707         in      r28,__SP_L__
1708         in      r29,__SP_H__
1709         sub     r28,r26
1710         sbc     r29,r27
1711         in      __tmp_reg__,__SREG__
1712         cli
1713         out     __SP_H__,r29
1714         out     __SREG__,__tmp_reg__
1715         out     __SP_L__,r28
1716 #endif /* #SP = 8/16 */
1717
1718 #if defined (__AVR_HAVE_EIJMP_EICALL__)
1719         eijmp
1720 #else
1721         ijmp
1722 #endif
1723
1724 ENDF __prologue_saves__
1725 #endif /* defined (L_prologue) */
1726
1727 /*
1728  * This is an epilogue subroutine
1729  */
1730 #if defined (L_epilogue)
1731
1732 DEFUN __epilogue_restores__
1733         ldd     r2,Y+18
1734         ldd     r3,Y+17
1735         ldd     r4,Y+16
1736         ldd     r5,Y+15
1737         ldd     r6,Y+14
1738         ldd     r7,Y+13
1739         ldd     r8,Y+12
1740         ldd     r9,Y+11
1741         ldd     r10,Y+10
1742         ldd     r11,Y+9
1743         ldd     r12,Y+8
1744         ldd     r13,Y+7
1745         ldd     r14,Y+6
1746         ldd     r15,Y+5
1747         ldd     r16,Y+4
1748         ldd     r17,Y+3
1749         ldd     r26,Y+2
1750 #if defined (__AVR_HAVE_8BIT_SP__)
1751         ldd     r29,Y+1
1752         add     r28,r30
1753         out     __SP_L__,r28
1754         mov     r28, r26
1755 #elif defined (__AVR_XMEGA__)
1756         ldd  r27,Y+1
1757         add  r28,r30
1758         adc  r29,__zero_reg__
1759         out  __SP_L__,r28
1760         out  __SP_H__,r29
1761         wmov 28, 26
1762 #else
1763         ldd     r27,Y+1
1764         add     r28,r30
1765         adc     r29,__zero_reg__
1766         in      __tmp_reg__,__SREG__
1767         cli
1768         out     __SP_H__,r29
1769         out     __SREG__,__tmp_reg__
1770         out     __SP_L__,r28
1771         mov_l   r28, r26
1772         mov_h   r29, r27
1773 #endif /* #SP = 8/16 */
1774         ret
1775 ENDF __epilogue_restores__
1776 #endif /* defined (L_epilogue) */
1777
1778 #ifdef L_exit
1779         .section .fini9,"ax",@progbits
1780 DEFUN _exit
1781         .weak   exit
1782 exit:
1783 ENDF _exit
1784
1785         /* Code from .fini8 ... .fini1 sections inserted by ld script.  */
1786
1787         .section .fini0,"ax",@progbits
1788         cli
1789 __stop_program:
1790         rjmp    __stop_program
1791 #endif /* defined (L_exit) */
1792
1793 #ifdef L_cleanup
1794         .weak   _cleanup
1795         .func   _cleanup
1796 _cleanup:
1797         ret
1798 .endfunc
1799 #endif /* defined (L_cleanup) */
1800
1801 \f
1802 .section .text.libgcc, "ax", @progbits
1803
1804 #ifdef L_tablejump
1805 DEFUN __tablejump2__
1806         lsl     r30
1807         rol     r31
1808     ;; FALLTHRU
1809 ENDF __tablejump2__
1810
1811 DEFUN __tablejump__
1812 #if defined (__AVR_HAVE_LPMX__)
1813         lpm __tmp_reg__, Z+
1814         lpm r31, Z
1815         mov r30, __tmp_reg__
1816 #if defined (__AVR_HAVE_EIJMP_EICALL__)
1817         eijmp
1818 #else
1819         ijmp
1820 #endif
1821
1822 #else /* !HAVE_LPMX */
1823         lpm
1824         adiw r30, 1
1825         push r0
1826         lpm
1827         push r0
1828 #if defined (__AVR_HAVE_EIJMP_EICALL__)
1829         in   __tmp_reg__, __EIND__
1830         push __tmp_reg__
1831 #endif
1832         ret
1833 #endif /* !HAVE_LPMX */
1834 ENDF __tablejump__
1835 #endif /* defined (L_tablejump) */
1836
1837 #ifdef L_copy_data
1838         .section .init4,"ax",@progbits
1839 DEFUN __do_copy_data
1840 #if defined(__AVR_HAVE_ELPMX__)
1841         ldi     r17, hi8(__data_end)
1842         ldi     r26, lo8(__data_start)
1843         ldi     r27, hi8(__data_start)
1844         ldi     r30, lo8(__data_load_start)
1845         ldi     r31, hi8(__data_load_start)
1846         ldi     r16, hh8(__data_load_start)
1847         out     __RAMPZ__, r16
1848         rjmp    .L__do_copy_data_start
1849 .L__do_copy_data_loop:
1850         elpm    r0, Z+
1851         st      X+, r0
1852 .L__do_copy_data_start:
1853         cpi     r26, lo8(__data_end)
1854         cpc     r27, r17
1855         brne    .L__do_copy_data_loop
1856 #elif  !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
1857         ldi     r17, hi8(__data_end)
1858         ldi     r26, lo8(__data_start)
1859         ldi     r27, hi8(__data_start)
1860         ldi     r30, lo8(__data_load_start)
1861         ldi     r31, hi8(__data_load_start)
1862         ldi     r16, hh8(__data_load_start - 0x10000)
1863 .L__do_copy_data_carry:
1864         inc     r16
1865         out     __RAMPZ__, r16
1866         rjmp    .L__do_copy_data_start
1867 .L__do_copy_data_loop:
1868         elpm
1869         st      X+, r0
1870         adiw    r30, 1
1871         brcs    .L__do_copy_data_carry
1872 .L__do_copy_data_start:
1873         cpi     r26, lo8(__data_end)
1874         cpc     r27, r17
1875         brne    .L__do_copy_data_loop
1876 #elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
1877         ldi     r17, hi8(__data_end)
1878         ldi     r26, lo8(__data_start)
1879         ldi     r27, hi8(__data_start)
1880         ldi     r30, lo8(__data_load_start)
1881         ldi     r31, hi8(__data_load_start)
1882         rjmp    .L__do_copy_data_start
1883 .L__do_copy_data_loop:
1884 #if defined (__AVR_HAVE_LPMX__)
1885         lpm     r0, Z+
1886 #else
1887         lpm
1888         adiw    r30, 1
1889 #endif
1890         st      X+, r0
1891 .L__do_copy_data_start:
1892         cpi     r26, lo8(__data_end)
1893         cpc     r27, r17
1894         brne    .L__do_copy_data_loop
1895 #endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
1896 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
1897         ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
1898         out     __RAMPZ__, __zero_reg__
1899 #endif /* ELPM && RAMPD */
1900 ENDF __do_copy_data
1901 #endif /* L_copy_data */
1902
1903 /* __do_clear_bss is only necessary if there is anything in .bss section.  */
1904
1905 #ifdef L_clear_bss
1906         .section .init4,"ax",@progbits
1907 DEFUN __do_clear_bss
1908         ldi     r17, hi8(__bss_end)
1909         ldi     r26, lo8(__bss_start)
1910         ldi     r27, hi8(__bss_start)
1911         rjmp    .do_clear_bss_start
1912 .do_clear_bss_loop:
1913         st      X+, __zero_reg__
1914 .do_clear_bss_start:
1915         cpi     r26, lo8(__bss_end)
1916         cpc     r27, r17
1917         brne    .do_clear_bss_loop
1918 ENDF __do_clear_bss
1919 #endif /* L_clear_bss */
1920
1921 /* __do_global_ctors and __do_global_dtors are only necessary
1922    if there are any constructors/destructors.  */
1923
1924 #ifdef L_ctors
1925         .section .init6,"ax",@progbits
1926 DEFUN __do_global_ctors
1927 #if defined(__AVR_HAVE_ELPM__)
1928         ldi     r17, hi8(__ctors_start)
1929         ldi     r28, lo8(__ctors_end)
1930         ldi     r29, hi8(__ctors_end)
1931         ldi     r16, hh8(__ctors_end)
1932         rjmp    .L__do_global_ctors_start
1933 .L__do_global_ctors_loop:
1934         sbiw    r28, 2
1935         sbc     r16, __zero_reg__
1936         mov_h   r31, r29
1937         mov_l   r30, r28
1938         out     __RAMPZ__, r16
1939         XCALL   __tablejump_elpm__
1940 .L__do_global_ctors_start:
1941         cpi     r28, lo8(__ctors_start)
1942         cpc     r29, r17
1943         ldi     r24, hh8(__ctors_start)
1944         cpc     r16, r24
1945         brne    .L__do_global_ctors_loop
1946 #else
1947         ldi     r17, hi8(__ctors_start)
1948         ldi     r28, lo8(__ctors_end)
1949         ldi     r29, hi8(__ctors_end)
1950         rjmp    .L__do_global_ctors_start
1951 .L__do_global_ctors_loop:
1952         sbiw    r28, 2
1953         mov_h   r31, r29
1954         mov_l   r30, r28
1955         XCALL   __tablejump__
1956 .L__do_global_ctors_start:
1957         cpi     r28, lo8(__ctors_start)
1958         cpc     r29, r17
1959         brne    .L__do_global_ctors_loop
1960 #endif /* defined(__AVR_HAVE_ELPM__) */
1961 ENDF __do_global_ctors
1962 #endif /* L_ctors */
1963
1964 #ifdef L_dtors
1965         .section .fini6,"ax",@progbits
1966 DEFUN __do_global_dtors
1967 #if defined(__AVR_HAVE_ELPM__)
1968         ldi     r17, hi8(__dtors_end)
1969         ldi     r28, lo8(__dtors_start)
1970         ldi     r29, hi8(__dtors_start)
1971         ldi     r16, hh8(__dtors_start)
1972         rjmp    .L__do_global_dtors_start
1973 .L__do_global_dtors_loop:
1974         sbiw    r28, 2
1975         sbc     r16, __zero_reg__
1976         mov_h   r31, r29
1977         mov_l   r30, r28
1978         out     __RAMPZ__, r16
1979         XCALL   __tablejump_elpm__
1980 .L__do_global_dtors_start:
1981         cpi     r28, lo8(__dtors_end)
1982         cpc     r29, r17
1983         ldi     r24, hh8(__dtors_end)
1984         cpc     r16, r24
1985         brne    .L__do_global_dtors_loop
1986 #else
1987         ldi     r17, hi8(__dtors_end)
1988         ldi     r28, lo8(__dtors_start)
1989         ldi     r29, hi8(__dtors_start)
1990         rjmp    .L__do_global_dtors_start
1991 .L__do_global_dtors_loop:
1992         mov_h   r31, r29
1993         mov_l   r30, r28
1994         XCALL   __tablejump__
1995         adiw    r28, 2
1996 .L__do_global_dtors_start:
1997         cpi     r28, lo8(__dtors_end)
1998         cpc     r29, r17
1999         brne    .L__do_global_dtors_loop
2000 #endif /* defined(__AVR_HAVE_ELPM__) */
2001 ENDF __do_global_dtors
2002 #endif /* L_dtors */
2003
2004 .section .text.libgcc, "ax", @progbits
2005
2006 #ifdef L_tablejump_elpm
2007 DEFUN __tablejump_elpm__
2008 #if defined (__AVR_HAVE_ELPMX__)
2009         elpm    __tmp_reg__, Z+
2010         elpm    r31, Z
2011         mov     r30, __tmp_reg__
2012 #if defined (__AVR_HAVE_RAMPD__)
2013         ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2014         out     __RAMPZ__, __zero_reg__
2015 #endif /* RAMPD */
2016 #if defined (__AVR_HAVE_EIJMP_EICALL__)
2017         eijmp
2018 #else
2019         ijmp
2020 #endif
2021
2022 #elif defined (__AVR_HAVE_ELPM__)
2023         elpm
2024         adiw    r30, 1
2025         push    r0
2026         elpm
2027         push    r0
2028 #if defined (__AVR_HAVE_EIJMP_EICALL__)
2029         in      __tmp_reg__, __EIND__
2030         push    __tmp_reg__
2031 #endif
2032         ret
2033 #endif
2034 ENDF __tablejump_elpm__
2035 #endif /* defined (L_tablejump_elpm) */
2036
2037 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2038 ;; Loading n bytes from Flash; n = 3,4
2039 ;; R22... = Flash[Z]
2040 ;; Clobbers: __tmp_reg__
2041
2042 #if (defined (L_load_3)        \
2043      || defined (L_load_4))    \
2044     && !defined (__AVR_HAVE_LPMX__)
2045
2046 ;; Destination
2047 #define D0  22
2048 #define D1  D0+1
2049 #define D2  D0+2
2050 #define D3  D0+3
2051
2052 .macro  .load dest, n
2053     lpm
2054     mov     \dest, r0
2055 .if \dest != D0+\n-1
2056     adiw    r30, 1
2057 .else
2058     sbiw    r30, \n-1
2059 .endif
2060 .endm
2061
2062 #if defined (L_load_3)
2063 DEFUN __load_3
2064     push  D3
2065     XCALL __load_4
2066     pop   D3
2067     ret
2068 ENDF __load_3
2069 #endif /* L_load_3 */
2070
2071 #if defined (L_load_4)
2072 DEFUN __load_4
2073     .load D0, 4
2074     .load D1, 4
2075     .load D2, 4
2076     .load D3, 4
2077     ret
2078 ENDF __load_4
2079 #endif /* L_load_4 */
2080
2081 #endif /* L_load_3 || L_load_3 */
2082
2083 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2084 ;; Loading n bytes from Flash or RAM;  n = 1,2,3,4
2085 ;; R22... = Flash[R21:Z] or RAM[Z] depending on R21.7
2086 ;; Clobbers: __tmp_reg__, R21, R30, R31
2087
2088 #if (defined (L_xload_1)            \
2089      || defined (L_xload_2)         \
2090      || defined (L_xload_3)         \
2091      || defined (L_xload_4))
2092
2093 ;; Destination
2094 #define D0  22
2095 #define D1  D0+1
2096 #define D2  D0+2
2097 #define D3  D0+3
2098
2099 ;; Register containing bits 16+ of the address
2100
2101 #define HHI8  21
2102
2103 .macro  .xload dest, n
2104 #if defined (__AVR_HAVE_ELPMX__)
2105     elpm    \dest, Z+
2106 #elif defined (__AVR_HAVE_ELPM__)
2107     elpm
2108     mov     \dest, r0
2109 .if \dest != D0+\n-1
2110     adiw    r30, 1
2111     adc     HHI8, __zero_reg__
2112     out     __RAMPZ__, HHI8
2113 .endif
2114 #elif defined (__AVR_HAVE_LPMX__)
2115     lpm     \dest, Z+
2116 #else
2117     lpm
2118     mov     \dest, r0
2119 .if \dest != D0+\n-1
2120     adiw    r30, 1
2121 .endif
2122 #endif
2123 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2124 .if \dest == D0+\n-1
2125     ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2126     out     __RAMPZ__, __zero_reg__
2127 .endif
2128 #endif
2129 .endm ; .xload
2130
2131 #if defined (L_xload_1)
2132 DEFUN __xload_1
2133 #if defined (__AVR_HAVE_LPMX__) && !defined (__AVR_HAVE_ELPM__)
2134     sbrc    HHI8, 7
2135     ld      D0, Z
2136     sbrs    HHI8, 7
2137     lpm     D0, Z
2138     ret
2139 #else
2140     sbrc    HHI8, 7
2141     rjmp    1f
2142 #if defined (__AVR_HAVE_ELPM__)
2143     out     __RAMPZ__, HHI8
2144 #endif /* __AVR_HAVE_ELPM__ */
2145     .xload  D0, 1
2146     ret
2147 1:  ld      D0, Z
2148     ret
2149 #endif /* LPMx && ! ELPM */
2150 ENDF __xload_1
2151 #endif /* L_xload_1 */
2152
2153 #if defined (L_xload_2)
2154 DEFUN __xload_2
2155     sbrc    HHI8, 7
2156     rjmp    1f
2157 #if defined (__AVR_HAVE_ELPM__)
2158     out     __RAMPZ__, HHI8
2159 #endif /* __AVR_HAVE_ELPM__ */
2160     .xload  D0, 2
2161     .xload  D1, 2
2162     ret
2163 1:  ld      D0, Z+
2164     ld      D1, Z+
2165     ret
2166 ENDF __xload_2
2167 #endif /* L_xload_2 */
2168
2169 #if defined (L_xload_3)
2170 DEFUN __xload_3
2171     sbrc    HHI8, 7
2172     rjmp    1f
2173 #if defined (__AVR_HAVE_ELPM__)
2174     out     __RAMPZ__, HHI8
2175 #endif /* __AVR_HAVE_ELPM__ */
2176     .xload  D0, 3
2177     .xload  D1, 3
2178     .xload  D2, 3
2179     ret
2180 1:  ld      D0, Z+
2181     ld      D1, Z+
2182     ld      D2, Z+
2183     ret
2184 ENDF __xload_3
2185 #endif /* L_xload_3 */
2186
2187 #if defined (L_xload_4)
2188 DEFUN __xload_4
2189     sbrc    HHI8, 7
2190     rjmp    1f
2191 #if defined (__AVR_HAVE_ELPM__)
2192     out     __RAMPZ__, HHI8
2193 #endif /* __AVR_HAVE_ELPM__ */
2194     .xload  D0, 4
2195     .xload  D1, 4
2196     .xload  D2, 4
2197     .xload  D3, 4
2198     ret
2199 1:  ld      D0, Z+
2200     ld      D1, Z+
2201     ld      D2, Z+
2202     ld      D3, Z+
2203     ret
2204 ENDF __xload_4
2205 #endif /* L_xload_4 */
2206
2207 #endif /* L_xload_{1|2|3|4} */
2208
2209 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2210 ;; memcopy from Address Space __pgmx to RAM
2211 ;; R23:Z = Source Address
2212 ;; X     = Destination Address
2213 ;; Clobbers: __tmp_reg__, R23, R24, R25, X, Z
2214
2215 #if defined (L_movmemx)
2216
2217 #define HHI8  23
2218 #define LOOP  24
2219
2220 DEFUN __movmemx_qi
2221     ;; #Bytes to copy fity in 8 Bits (1..255)
2222     ;; Zero-extend Loop Counter
2223     clr     LOOP+1
2224     ;; FALLTHRU
2225 ENDF __movmemx_qi
2226
2227 DEFUN __movmemx_hi
2228
2229 ;; Read from where?
2230     sbrc    HHI8, 7
2231     rjmp    1f
2232
2233 ;; Read from Flash
2234
2235 #if defined (__AVR_HAVE_ELPM__)
2236     out     __RAMPZ__, HHI8
2237 #endif
2238
2239 0:  ;; Load 1 Byte from Flash...
2240
2241 #if defined (__AVR_HAVE_ELPMX__)
2242     elpm    r0, Z+
2243 #elif defined (__AVR_HAVE_ELPM__)
2244     elpm
2245     adiw    r30, 1
2246     adc     HHI8, __zero_reg__
2247     out     __RAMPZ__, HHI8
2248 #elif defined (__AVR_HAVE_LPMX__)
2249     lpm     r0, Z+
2250 #else
2251     lpm
2252     adiw    r30, 1
2253 #endif
2254
2255     ;; ...and store that Byte to RAM Destination
2256     st      X+, r0
2257     sbiw    LOOP, 1
2258     brne    0b
2259 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2260     ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2261     out __RAMPZ__, __zero_reg__
2262 #endif /* ELPM && RAMPD */
2263     ret
2264
2265 ;; Read from RAM
2266
2267 1:  ;; Read 1 Byte from RAM...
2268     ld      r0, Z+
2269     ;; and store that Byte to RAM Destination
2270     st      X+, r0
2271     sbiw    LOOP, 1
2272     brne    1b
2273     ret
2274 ENDF __movmemx_hi
2275
2276 #undef HHI8
2277 #undef LOOP
2278
2279 #endif /* L_movmemx */
2280
2281 \f
2282 .section .text.libgcc.builtins, "ax", @progbits
2283
2284 /**********************************
2285  * Find first set Bit (ffs)
2286  **********************************/
2287
2288 #if defined (L_ffssi2)
2289 ;; find first set bit
2290 ;; r25:r24 = ffs32 (r25:r22)
2291 ;; clobbers: r22, r26
2292 DEFUN __ffssi2
2293     clr  r26
2294     tst  r22
2295     brne 1f
2296     subi r26, -8
2297     or   r22, r23
2298     brne 1f
2299     subi r26, -8
2300     or   r22, r24
2301     brne 1f
2302     subi r26, -8
2303     or   r22, r25
2304     brne 1f
2305     ret
2306 1:  mov  r24, r22
2307     XJMP __loop_ffsqi2
2308 ENDF __ffssi2
2309 #endif /* defined (L_ffssi2) */
2310
2311 #if defined (L_ffshi2)
2312 ;; find first set bit
2313 ;; r25:r24 = ffs16 (r25:r24)
2314 ;; clobbers: r26
2315 DEFUN __ffshi2
2316     clr  r26
2317 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2318     ;; Some cores have problem skipping 2-word instruction
2319     tst  r24
2320     breq 2f
2321 #else
2322     cpse r24, __zero_reg__
2323 #endif /* __AVR_HAVE_JMP_CALL__ */
2324 1:  XJMP __loop_ffsqi2
2325 2:  ldi  r26, 8
2326     or   r24, r25
2327     brne 1b
2328     ret
2329 ENDF __ffshi2
2330 #endif /* defined (L_ffshi2) */
2331
2332 #if defined (L_loop_ffsqi2)
2333 ;; Helper for ffshi2, ffssi2
2334 ;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
2335 ;; r24 must be != 0
2336 ;; clobbers: r26
2337 DEFUN __loop_ffsqi2
2338     inc  r26
2339     lsr  r24
2340     brcc __loop_ffsqi2
2341     mov  r24, r26
2342     clr  r25
2343     ret
2344 ENDF __loop_ffsqi2
2345 #endif /* defined (L_loop_ffsqi2) */
2346
2347 \f
2348 /**********************************
2349  * Count trailing Zeros (ctz)
2350  **********************************/
2351
2352 #if defined (L_ctzsi2)
2353 ;; count trailing zeros
2354 ;; r25:r24 = ctz32 (r25:r22)
2355 ;; clobbers: r26, r22
2356 ;; ctz(0) = 255
2357 ;; Note that ctz(0) in undefined for GCC
2358 DEFUN __ctzsi2
2359     XCALL __ffssi2
2360     dec  r24
2361     ret
2362 ENDF __ctzsi2
2363 #endif /* defined (L_ctzsi2) */
2364
2365 #if defined (L_ctzhi2)
2366 ;; count trailing zeros
2367 ;; r25:r24 = ctz16 (r25:r24)
2368 ;; clobbers: r26
2369 ;; ctz(0) = 255
2370 ;; Note that ctz(0) in undefined for GCC
2371 DEFUN __ctzhi2
2372     XCALL __ffshi2
2373     dec  r24
2374     ret
2375 ENDF __ctzhi2
2376 #endif /* defined (L_ctzhi2) */
2377
2378 \f
2379 /**********************************
2380  * Count leading Zeros (clz)
2381  **********************************/
2382
2383 #if defined (L_clzdi2)
2384 ;; count leading zeros
2385 ;; r25:r24 = clz64 (r25:r18)
2386 ;; clobbers: r22, r23, r26
2387 DEFUN __clzdi2
2388     XCALL __clzsi2
2389     sbrs r24, 5
2390     ret
2391     mov_l r22, r18
2392     mov_h r23, r19
2393     mov_l r24, r20
2394     mov_h r25, r21
2395     XCALL __clzsi2
2396     subi r24, -32
2397     ret
2398 ENDF __clzdi2
2399 #endif /* defined (L_clzdi2) */
2400
2401 #if defined (L_clzsi2)
2402 ;; count leading zeros
2403 ;; r25:r24 = clz32 (r25:r22)
2404 ;; clobbers: r26
2405 DEFUN __clzsi2
2406     XCALL __clzhi2
2407     sbrs r24, 4
2408     ret
2409     mov_l r24, r22
2410     mov_h r25, r23
2411     XCALL __clzhi2
2412     subi r24, -16
2413     ret
2414 ENDF __clzsi2
2415 #endif /* defined (L_clzsi2) */
2416
2417 #if defined (L_clzhi2)
2418 ;; count leading zeros
2419 ;; r25:r24 = clz16 (r25:r24)
2420 ;; clobbers: r26
2421 DEFUN __clzhi2
2422     clr  r26
2423     tst  r25
2424     brne 1f
2425     subi r26, -8
2426     or   r25, r24
2427     brne 1f
2428     ldi  r24, 16
2429     ret
2430 1:  cpi  r25, 16
2431     brsh 3f
2432     subi r26, -3
2433     swap r25
2434 2:  inc  r26
2435 3:  lsl  r25
2436     brcc 2b
2437     mov  r24, r26
2438     clr  r25
2439     ret
2440 ENDF __clzhi2
2441 #endif /* defined (L_clzhi2) */
2442
2443 \f
2444 /**********************************
2445  * Parity
2446  **********************************/
2447
2448 #if defined (L_paritydi2)
2449 ;; r25:r24 = parity64 (r25:r18)
2450 ;; clobbers: __tmp_reg__
2451 DEFUN __paritydi2
2452     eor  r24, r18
2453     eor  r24, r19
2454     eor  r24, r20
2455     eor  r24, r21
2456     XJMP __paritysi2
2457 ENDF __paritydi2
2458 #endif /* defined (L_paritydi2) */
2459
2460 #if defined (L_paritysi2)
2461 ;; r25:r24 = parity32 (r25:r22)
2462 ;; clobbers: __tmp_reg__
2463 DEFUN __paritysi2
2464     eor  r24, r22
2465     eor  r24, r23
2466     XJMP __parityhi2
2467 ENDF __paritysi2
2468 #endif /* defined (L_paritysi2) */
2469
2470 #if defined (L_parityhi2)
2471 ;; r25:r24 = parity16 (r25:r24)
2472 ;; clobbers: __tmp_reg__
2473 DEFUN __parityhi2
2474     eor  r24, r25
2475 ;; FALLTHRU
2476 ENDF __parityhi2
2477
2478 ;; r25:r24 = parity8 (r24)
2479 ;; clobbers: __tmp_reg__
2480 DEFUN __parityqi2
2481     ;; parity is in r24[0..7]
2482     mov  __tmp_reg__, r24
2483     swap __tmp_reg__
2484     eor  r24, __tmp_reg__
2485     ;; parity is in r24[0..3]
2486     subi r24, -4
2487     andi r24, -5
2488     subi r24, -6
2489     ;; parity is in r24[0,3]
2490     sbrc r24, 3
2491     inc  r24
2492     ;; parity is in r24[0]
2493     andi r24, 1
2494     clr  r25
2495     ret
2496 ENDF __parityqi2
2497 #endif /* defined (L_parityhi2) */
2498
2499 \f
2500 /**********************************
2501  * Population Count
2502  **********************************/
2503
2504 #if defined (L_popcounthi2)
2505 ;; population count
2506 ;; r25:r24 = popcount16 (r25:r24)
2507 ;; clobbers: __tmp_reg__
2508 DEFUN __popcounthi2
2509     XCALL __popcountqi2
2510     push r24
2511     mov  r24, r25
2512     XCALL __popcountqi2
2513     clr  r25
2514     ;; FALLTHRU
2515 ENDF __popcounthi2
2516
2517 DEFUN __popcounthi2_tail
2518     pop   __tmp_reg__
2519     add   r24, __tmp_reg__
2520     ret
2521 ENDF __popcounthi2_tail
2522 #endif /* defined (L_popcounthi2) */
2523
2524 #if defined (L_popcountsi2)
2525 ;; population count
2526 ;; r25:r24 = popcount32 (r25:r22)
2527 ;; clobbers: __tmp_reg__
2528 DEFUN __popcountsi2
2529     XCALL __popcounthi2
2530     push  r24
2531     mov_l r24, r22
2532     mov_h r25, r23
2533     XCALL __popcounthi2
2534     XJMP  __popcounthi2_tail
2535 ENDF __popcountsi2
2536 #endif /* defined (L_popcountsi2) */
2537
2538 #if defined (L_popcountdi2)
2539 ;; population count
2540 ;; r25:r24 = popcount64 (r25:r18)
2541 ;; clobbers: r22, r23, __tmp_reg__
2542 DEFUN __popcountdi2
2543     XCALL __popcountsi2
2544     push  r24
2545     mov_l r22, r18
2546     mov_h r23, r19
2547     mov_l r24, r20
2548     mov_h r25, r21
2549     XCALL __popcountsi2
2550     XJMP  __popcounthi2_tail
2551 ENDF __popcountdi2
2552 #endif /* defined (L_popcountdi2) */
2553
2554 #if defined (L_popcountqi2)
2555 ;; population count
2556 ;; r24 = popcount8 (r24)
2557 ;; clobbers: __tmp_reg__
2558 DEFUN __popcountqi2
2559     mov  __tmp_reg__, r24
2560     andi r24, 1
2561     lsr  __tmp_reg__
2562     lsr  __tmp_reg__
2563     adc  r24, __zero_reg__
2564     lsr  __tmp_reg__
2565     adc  r24, __zero_reg__
2566     lsr  __tmp_reg__
2567     adc  r24, __zero_reg__
2568     lsr  __tmp_reg__
2569     adc  r24, __zero_reg__
2570     lsr  __tmp_reg__
2571     adc  r24, __zero_reg__
2572     lsr  __tmp_reg__
2573     adc  r24, __tmp_reg__
2574     ret
2575 ENDF __popcountqi2
2576 #endif /* defined (L_popcountqi2) */
2577
2578 \f
2579 /**********************************
2580  * Swap bytes
2581  **********************************/
2582
2583 ;; swap two registers with different register number
2584 .macro bswap a, b
2585     eor \a, \b
2586     eor \b, \a
2587     eor \a, \b
2588 .endm
2589
2590 #if defined (L_bswapsi2)
2591 ;; swap bytes
2592 ;; r25:r22 = bswap32 (r25:r22)
2593 DEFUN __bswapsi2
2594     bswap r22, r25
2595     bswap r23, r24
2596     ret
2597 ENDF __bswapsi2
2598 #endif /* defined (L_bswapsi2) */
2599
2600 #if defined (L_bswapdi2)
2601 ;; swap bytes
2602 ;; r25:r18 = bswap64 (r25:r18)
2603 DEFUN __bswapdi2
2604     bswap r18, r25
2605     bswap r19, r24
2606     bswap r20, r23
2607     bswap r21, r22
2608     ret
2609 ENDF __bswapdi2
2610 #endif /* defined (L_bswapdi2) */
2611
2612 \f
2613 /**********************************
2614  * 64-bit shifts
2615  **********************************/
2616
2617 #if defined (L_ashrdi3)
2618 ;; Arithmetic shift right
2619 ;; r25:r18 = ashr64 (r25:r18, r17:r16)
2620 DEFUN __ashrdi3
2621     push r16
2622     andi r16, 63
2623     breq 2f
2624 1:  asr  r25
2625     ror  r24
2626     ror  r23
2627     ror  r22
2628     ror  r21
2629     ror  r20
2630     ror  r19
2631     ror  r18
2632     dec  r16
2633     brne 1b
2634 2:  pop  r16
2635     ret
2636 ENDF __ashrdi3
2637 #endif /* defined (L_ashrdi3) */
2638
2639 #if defined (L_lshrdi3)
2640 ;; Logic shift right
2641 ;; r25:r18 = lshr64 (r25:r18, r17:r16)
2642 DEFUN __lshrdi3
2643     push r16
2644     andi r16, 63
2645     breq 2f
2646 1:  lsr  r25
2647     ror  r24
2648     ror  r23
2649     ror  r22
2650     ror  r21
2651     ror  r20
2652     ror  r19
2653     ror  r18
2654     dec  r16
2655     brne 1b
2656 2:  pop  r16
2657     ret
2658 ENDF __lshrdi3
2659 #endif /* defined (L_lshrdi3) */
2660
2661 #if defined (L_ashldi3)
2662 ;; Shift left
2663 ;; r25:r18 = ashl64 (r25:r18, r17:r16)
2664 DEFUN __ashldi3
2665     push r16
2666     andi r16, 63
2667     breq 2f
2668 1:  lsl  r18
2669     rol  r19
2670     rol  r20
2671     rol  r21
2672     rol  r22
2673     rol  r23
2674     rol  r24
2675     rol  r25
2676     dec  r16
2677     brne 1b
2678 2:  pop  r16
2679     ret
2680 ENDF __ashldi3
2681 #endif /* defined (L_ashldi3) */
2682
2683 #if defined (L_rotldi3)
2684 ;; Shift left
2685 ;; r25:r18 = rotl64 (r25:r18, r17:r16)
2686 DEFUN __rotldi3
2687     push r16
2688     andi r16, 63
2689     breq 2f
2690 1:  lsl  r18
2691     rol  r19
2692     rol  r20
2693     rol  r21
2694     rol  r22
2695     rol  r23
2696     rol  r24
2697     rol  r25
2698     adc  r18, __zero_reg__
2699     dec  r16
2700     brne 1b
2701 2:  pop  r16
2702     ret
2703 ENDF __rotldi3
2704 #endif /* defined (L_rotldi3) */
2705
2706 \f
2707 .section .text.libgcc.fmul, "ax", @progbits
2708
2709 /***********************************************************/
2710 ;;; Softmul versions of FMUL, FMULS and FMULSU to implement
2711 ;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
2712 /***********************************************************/
2713
2714 #define A1 24
2715 #define B1 25
2716 #define C0 22
2717 #define C1 23
2718 #define A0 __tmp_reg__
2719
2720 #ifdef L_fmuls
2721 ;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
2722 ;;; Clobbers: r24, r25, __tmp_reg__
2723 DEFUN __fmuls
2724     ;; A0.7 = negate result?
2725     mov  A0, A1
2726     eor  A0, B1
2727     ;; B1 = |B1|
2728     sbrc B1, 7
2729     neg  B1
2730     XJMP __fmulsu_exit
2731 ENDF __fmuls
2732 #endif /* L_fmuls */
2733
2734 #ifdef L_fmulsu
2735 ;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
2736 ;;; Clobbers: r24, r25, __tmp_reg__
2737 DEFUN __fmulsu
2738     ;; A0.7 = negate result?
2739     mov  A0, A1
2740 ;; FALLTHRU
2741 ENDF __fmulsu
2742
2743 ;; Helper for __fmuls and __fmulsu
2744 DEFUN __fmulsu_exit
2745     ;; A1 = |A1|
2746     sbrc A1, 7
2747     neg  A1
2748 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2749     ;; Some cores have problem skipping 2-word instruction
2750     tst  A0
2751     brmi 1f
2752 #else
2753     sbrs A0, 7
2754 #endif /* __AVR_HAVE_JMP_CALL__ */
2755     XJMP  __fmul
2756 1:  XCALL __fmul
2757     ;; C = -C iff A0.7 = 1
2758     com  C1
2759     neg  C0
2760     sbci C1, -1
2761     ret
2762 ENDF __fmulsu_exit
2763 #endif /* L_fmulsu */
2764
2765
2766 #ifdef L_fmul
2767 ;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
2768 ;;; Clobbers: r24, r25, __tmp_reg__
2769 DEFUN __fmul
2770     ; clear result
2771     clr   C0
2772     clr   C1
2773     clr   A0
2774 1:  tst   B1
2775     ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
2776 2:  brpl  3f
2777     ;; C += A
2778     add   C0, A0
2779     adc   C1, A1
2780 3:  ;; A >>= 1
2781     lsr   A1
2782     ror   A0
2783     ;; B <<= 1
2784     lsl   B1
2785     brne  2b
2786     ret
2787 ENDF __fmul
2788 #endif /* L_fmul */
2789
2790 #undef A0
2791 #undef A1
2792 #undef B1
2793 #undef C0
2794 #undef C1