libgcc/config/avr/lib1funcs.S

   1 /*  -*- Mode: Asm -*-  */
   2 /* Copyright (C) 1998-2023 Free Software Foundation, Inc.
   3    Contributed by Denis Chertykov <chertykov@gmail.com>
   4
   5 This file is free software; you can redistribute it and/or modify it
   6 under the terms of the GNU General Public License as published by the
   7 Free Software Foundation; either version 3, or (at your option) any
   8 later version.
   9
  10 This file is distributed in the hope that it will be useful, but
  11 WITHOUT ANY WARRANTY; without even the implied warranty of
  12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13 General Public License for more details.
  14
  15 Under Section 7 of GPL version 3, you are granted additional
  16 permissions described in the GCC Runtime Library Exception, version
  17 3.1, as published by the Free Software Foundation.
  18
  19 You should have received a copy of the GNU General Public License and
  20 a copy of the GCC Runtime Library Exception along with this program;
  21 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  22 <http://www.gnu.org/licenses/>.  */
  23
  24 #if defined (__AVR_TINY__)
  25 #define __zero_reg__ r17
  26 #define __tmp_reg__ r16
  27 #else
  28 #define __zero_reg__ r1
  29 #define __tmp_reg__ r0
  30 #endif
  31 #define __SREG__ 0x3f
  32 #if defined (__AVR_HAVE_SPH__)
  33 #define __SP_H__ 0x3e
  34 #endif
  35 #define __SP_L__ 0x3d
  36 #define __RAMPZ__ 0x3B
  37 #define __EIND__  0x3C
  38
  39 /* Most of the functions here are called directly from avr.md
  40    patterns, instead of using the standard libcall mechanisms.
  41    This can make better code because GCC knows exactly which
  42    of the call-used registers (not all of them) are clobbered.  */
  43
  44 /* FIXME:  At present, there is no SORT directive in the linker
  45            script so that we must not assume that different modules
  46            in the same input section like .libgcc.text.mul will be
  47            located close together.  Therefore, we cannot use
  48            RCALL/RJMP to call a function like __udivmodhi4 from
  49            __divmodhi4 and have to use lengthy XCALL/XJMP even
  50            though they are in the same input section and all same
  51            input sections together are small enough to reach every
  52            location with a RCALL/RJMP instruction.  */
  53
  54 #if defined (__AVR_HAVE_EIJMP_EICALL__) && !defined (__AVR_HAVE_ELPMX__)
  55 #error device not supported
  56 #endif
  57
  58         .macro  mov_l  r_dest, r_src
  59 #if defined (__AVR_HAVE_MOVW__)
  60         movw    \r_dest, \r_src
  61 #else
  62         mov     \r_dest, \r_src
  63 #endif
  64         .endm
  65
  66         .macro  mov_h  r_dest, r_src
  67 #if defined (__AVR_HAVE_MOVW__)
  68         ; empty
  69 #else
  70         mov     \r_dest, \r_src
  71 #endif
  72         .endm
  73
  74 .macro  wmov  r_dest, r_src
  75 #if defined (__AVR_HAVE_MOVW__)
  76     movw \r_dest,   \r_src
  77 #else
  78     mov \r_dest,    \r_src
  79     mov \r_dest+1,  \r_src+1
  80 #endif
  81 .endm
  82
  83 #if defined (__AVR_HAVE_JMP_CALL__)
  84 #define XCALL call
  85 #define XJMP  jmp
  86 #else
  87 #define XCALL rcall
  88 #define XJMP  rjmp
  89 #endif
  90
  91 #if defined (__AVR_HAVE_EIJMP_EICALL__)
  92 #define XICALL eicall
  93 #define XIJMP  eijmp
  94 #else
  95 #define XICALL icall
  96 #define XIJMP  ijmp
  97 #endif
  98
  99 ;; Prologue stuff
 100
 101 .macro do_prologue_saves n_pushed n_frame=0
 102     ldi r26, lo8(\n_frame)
 103     ldi r27, hi8(\n_frame)
 104     ldi r30, lo8(gs(.L_prologue_saves.\@))
 105     ldi r31, hi8(gs(.L_prologue_saves.\@))
 106     XJMP __prologue_saves__ + ((18 - (\n_pushed)) * 2)
 107 .L_prologue_saves.\@:
 108 .endm
 109
 110 ;; Epilogue stuff
 111
 112 .macro do_epilogue_restores n_pushed n_frame=0
 113     in      r28, __SP_L__
 114 #ifdef __AVR_HAVE_SPH__
 115     in      r29, __SP_H__
 116 .if \n_frame > 63
 117     subi    r28, lo8(-\n_frame)
 118     sbci    r29, hi8(-\n_frame)
 119 .elseif \n_frame > 0
 120     adiw    r28, \n_frame
 121 .endif
 122 #else
 123     clr     r29
 124 .if \n_frame > 0
 125     subi    r28, lo8(-\n_frame)
 126 .endif
 127 #endif /* HAVE SPH */
 128     ldi     r30, \n_pushed
 129     XJMP __epilogue_restores__ + ((18 - (\n_pushed)) * 2)
 130 .endm
 131
 132 ;; Support function entry and exit for convenience
 133
 134 .macro wsubi r_arg1, i_arg2
 135 #if defined (__AVR_TINY__)
 136     subi \r_arg1,   lo8(\i_arg2)
 137     sbci \r_arg1+1, hi8(\i_arg2)
 138 #else
 139     sbiw \r_arg1, \i_arg2
 140 #endif
 141 .endm
 142
 143 .macro waddi r_arg1, i_arg2
 144 #if defined (__AVR_TINY__)
 145     subi \r_arg1,   lo8(-\i_arg2)
 146     sbci \r_arg1+1, hi8(-\i_arg2)
 147 #else
 148     adiw \r_arg1, \i_arg2
 149 #endif
 150 .endm
 151
 152 .macro DEFUN name
 153 .global \name
 154 .func \name
 155 \name:
 156 .endm
 157
 158 .macro ENDF name
 159 .size \name, .-\name
 160 .endfunc
 161 .endm
 162
 163 .macro FALIAS name
 164 .global \name
 165 .func \name
 166 \name:
 167 .size \name, .-\name
 168 .endfunc
 169 .endm
 170
 171 ;; Skip next instruction, typically a jump target
 172 #define skip cpse 16,16
 173
 174 ;; Negate a 2-byte value held in consecutive registers
 175 .macro NEG2  reg
 176     com     \reg+1
 177     neg     \reg
 178     sbci    \reg+1, -1
 179 .endm
 180
 181 ;; Negate a 4-byte value held in consecutive registers
 182 ;; Sets the V flag for signed overflow tests if REG >= 16
 183 .macro NEG4  reg
 184     com     \reg+3
 185     com     \reg+2
 186     com     \reg+1
 187 .if \reg >= 16
 188     neg     \reg
 189     sbci    \reg+1, -1
 190     sbci    \reg+2, -1
 191     sbci    \reg+3, -1
 192 .else
 193     com     \reg
 194     adc     \reg,   __zero_reg__
 195     adc     \reg+1, __zero_reg__
 196     adc     \reg+2, __zero_reg__
 197     adc     \reg+3, __zero_reg__
 198 .endif
 199 .endm
 200
 201 #define exp_lo(N)  hlo8 ((N) << 23)
 202 #define exp_hi(N)  hhi8 ((N) << 23)
 203
 204 \f
 205 .section .text.libgcc.mul, "ax", @progbits
 206
 207 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 208 /* Note: mulqi3, mulhi3 are open-coded on the enhanced core.  */
 209 #if !defined (__AVR_HAVE_MUL__)
 210 /*******************************************************
 211     Multiplication  8 x 8  without MUL
 212 *******************************************************/
 213 #if defined (L_mulqi3)
 214
 215 #define r_arg2  r22             /* multiplicand */
 216 #define r_arg1  r24             /* multiplier */
 217 #define r_res   __tmp_reg__     /* result */
 218
 219 DEFUN __mulqi3
 220         clr     r_res           ; clear result
 221 __mulqi3_loop:
 222         sbrc    r_arg1,0
 223         add     r_res,r_arg2
 224         add     r_arg2,r_arg2   ; shift multiplicand
 225         breq    __mulqi3_exit   ; while multiplicand != 0
 226         lsr     r_arg1          ;
 227         brne    __mulqi3_loop   ; exit if multiplier = 0
 228 __mulqi3_exit:
 229         mov     r_arg1,r_res    ; result to return register
 230         ret
 231 ENDF __mulqi3
 232
 233 #undef r_arg2
 234 #undef r_arg1
 235 #undef r_res
 236
 237 #endif  /* defined (L_mulqi3) */
 238
 239
 240 /*******************************************************
 241     Widening Multiplication  16 = 8 x 8  without MUL
 242     Multiplication  16 x 16  without MUL
 243 *******************************************************/
 244
 245 #define A0  22
 246 #define A1  23
 247 #define B0  24
 248 #define BB0 20
 249 #define B1  25
 250 ;; Output overlaps input, thus expand result in CC0/1
 251 #define C0  24
 252 #define C1  25
 253 #define CC0  __tmp_reg__
 254 #define CC1  21
 255
 256 #if defined (L_umulqihi3)
 257 ;;; R25:R24 = (unsigned int) R22 * (unsigned int) R24
 258 ;;; (C1:C0) = (unsigned int) A0  * (unsigned int) B0
 259 ;;; Clobbers: __tmp_reg__, R21..R23
 260 DEFUN __umulqihi3
 261     clr     A1
 262     clr     B1
 263     XJMP    __mulhi3
 264 ENDF __umulqihi3
 265 #endif /* L_umulqihi3 */
 266
 267 #if defined (L_mulqihi3)
 268 ;;; R25:R24 = (signed int) R22 * (signed int) R24
 269 ;;; (C1:C0) = (signed int) A0  * (signed int) B0
 270 ;;; Clobbers: __tmp_reg__, R20..R23
 271 DEFUN __mulqihi3
 272     ;; Sign-extend B0
 273     clr     B1
 274     sbrc    B0, 7
 275     com     B1
 276     ;; The multiplication runs twice as fast if A1 is zero, thus:
 277     ;; Zero-extend A0
 278     clr     A1
 279 #ifdef __AVR_HAVE_JMP_CALL__
 280     ;; Store  B0 * sign of A
 281     clr     BB0
 282     sbrc    A0, 7
 283     mov     BB0, B0
 284     call    __mulhi3
 285 #else /* have no CALL */
 286     ;; Skip sign-extension of A if A >= 0
 287     ;; Same size as with the first alternative but avoids errata skip
 288     ;; and is faster if A >= 0
 289     sbrs    A0, 7
 290     rjmp    __mulhi3
 291     ;; If  A < 0  store B
 292     mov     BB0, B0
 293     rcall   __mulhi3
 294 #endif /* HAVE_JMP_CALL */
 295     ;; 1-extend A after the multiplication
 296     sub     C1, BB0
 297     ret
 298 ENDF __mulqihi3
 299 #endif /* L_mulqihi3 */
 300
 301 #if defined (L_mulhi3)
 302 ;;; R25:R24 = R23:R22 * R25:R24
 303 ;;; (C1:C0) = (A1:A0) * (B1:B0)
 304 ;;; Clobbers: __tmp_reg__, R21..R23
 305 DEFUN __mulhi3
 306
 307     ;; Clear result
 308     clr     CC0
 309     clr     CC1
 310     rjmp 3f
 311 1:
 312     ;; Bit n of A is 1  -->  C += B << n
 313     add     CC0, B0
 314     adc     CC1, B1
 315 2:
 316     lsl     B0
 317     rol     B1
 318 3:
 319     ;; If B == 0 we are ready
 320     wsubi   B0, 0
 321     breq 9f
 322
 323     ;; Carry = n-th bit of A
 324     lsr     A1
 325     ror     A0
 326     ;; If bit n of A is set, then go add  B * 2^n  to  C
 327     brcs 1b
 328
 329     ;; Carry = 0  -->  The ROR above acts like  CP A0, 0
 330     ;; Thus, it is sufficient to CPC the high part to test A against 0
 331     cpc     A1, __zero_reg__
 332     ;; Only proceed if A != 0
 333     brne    2b
 334 9:
 335     ;; Move Result into place
 336     mov     C0, CC0
 337     mov     C1, CC1
 338     ret
 339 ENDF  __mulhi3
 340 #endif /* L_mulhi3 */
 341
 342 #undef A0
 343 #undef A1
 344 #undef B0
 345 #undef BB0
 346 #undef B1
 347 #undef C0
 348 #undef C1
 349 #undef CC0
 350 #undef CC1
 351
 352 \f
 353 #define A0 22
 354 #define A1 A0+1
 355 #define A2 A0+2
 356 #define A3 A0+3
 357
 358 #define B0 18
 359 #define B1 B0+1
 360 #define B2 B0+2
 361 #define B3 B0+3
 362
 363 #define CC0 26
 364 #define CC1 CC0+1
 365 #define CC2 30
 366 #define CC3 CC2+1
 367
 368 #define C0 22
 369 #define C1 C0+1
 370 #define C2 C0+2
 371 #define C3 C0+3
 372
 373 /*******************************************************
 374     Widening Multiplication  32 = 16 x 16  without MUL
 375 *******************************************************/
 376
 377 #if defined (L_umulhisi3)
 378 DEFUN __umulhisi3
 379     wmov    B0, 24
 380     ;; Zero-extend B
 381     clr     B2
 382     clr     B3
 383     ;; Zero-extend A
 384     wmov    A2, B2
 385     XJMP    __mulsi3
 386 ENDF __umulhisi3
 387 #endif /* L_umulhisi3 */
 388
 389 #if defined (L_mulhisi3)
 390 DEFUN __mulhisi3
 391     wmov    B0, 24
 392     ;; Sign-extend B
 393     lsl     r25
 394     sbc     B2, B2
 395     mov     B3, B2
 396 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
 397     ;; Sign-extend A
 398     clr     A2
 399     sbrc    A1, 7
 400     com     A2
 401     mov     A3, A2
 402     XJMP __mulsi3
 403 #else /*  no __AVR_ERRATA_SKIP_JMP_CALL__ */
 404     ;; Zero-extend A and __mulsi3 will run at least twice as fast
 405     ;; compared to a sign-extended A.
 406     clr     A2
 407     clr     A3
 408     sbrs    A1, 7
 409     XJMP __mulsi3
 410     ;; If  A < 0  then perform the  B * 0xffff.... before the
 411     ;; very multiplication by initializing the high part of the
 412     ;; result CC with -B.
 413     wmov    CC2, A2
 414     sub     CC2, B0
 415     sbc     CC3, B1
 416     XJMP __mulsi3_helper
 417 #endif /*  __AVR_ERRATA_SKIP_JMP_CALL__ */
 418 ENDF __mulhisi3
 419 #endif /* L_mulhisi3 */
 420
 421
 422 /*******************************************************
 423     Multiplication  32 x 32  without MUL
 424 *******************************************************/
 425
 426 #if defined (L_mulsi3)
 427 DEFUN __mulsi3
 428 #if defined (__AVR_TINY__)
 429     in     r26, __SP_L__ ; safe to use X, as it is CC0/CC1
 430     in     r27, __SP_H__
 431     subi   r26, lo8(-3)   ; Add 3 to point past return address
 432     sbci   r27, hi8(-3)
 433     push   B0    ; save callee saved regs
 434     push   B1
 435     ld     B0, X+   ; load from caller stack
 436     ld     B1, X+
 437     ld     B2, X+
 438     ld     B3, X
 439 #endif
 440     ;; Clear result
 441     clr     CC2
 442     clr     CC3
 443     ;; FALLTHRU
 444 ENDF  __mulsi3
 445
 446 DEFUN __mulsi3_helper
 447     clr     CC0
 448     clr     CC1
 449     rjmp 3f
 450
 451 1:  ;; If bit n of A is set, then add  B * 2^n  to the result in CC
 452     ;; CC += B
 453     add  CC0,B0  $  adc  CC1,B1  $  adc  CC2,B2  $  adc  CC3,B3
 454
 455 2:  ;; B <<= 1
 456     lsl  B0      $  rol  B1      $  rol  B2      $  rol  B3
 457
 458 3:  ;; A >>= 1:  Carry = n-th bit of A
 459     lsr  A3      $  ror  A2      $  ror  A1      $  ror  A0
 460
 461     brcs 1b
 462     ;; Only continue if  A != 0
 463     sbci    A1, 0
 464     brne 2b
 465     wsubi   A2, 0
 466     brne 2b
 467
 468     ;; All bits of A are consumed:  Copy result to return register C
 469     wmov    C0, CC0
 470     wmov    C2, CC2
 471 #if defined (__AVR_TINY__)
 472     pop     B1      ; restore callee saved regs
 473     pop     B0
 474 #endif  /* defined (__AVR_TINY__) */
 475
 476     ret
 477 ENDF __mulsi3_helper
 478 #endif /* L_mulsi3 */
 479
 480 #undef A0
 481 #undef A1
 482 #undef A2
 483 #undef A3
 484 #undef B0
 485 #undef B1
 486 #undef B2
 487 #undef B3
 488 #undef C0
 489 #undef C1
 490 #undef C2
 491 #undef C3
 492 #undef CC0
 493 #undef CC1
 494 #undef CC2
 495 #undef CC3
 496
 497 #endif /* !defined (__AVR_HAVE_MUL__) */
 498 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 499 \f
 500 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 501 #if defined (__AVR_HAVE_MUL__)
 502 #define A0 26
 503 #define B0 18
 504 #define C0 22
 505
 506 #define A1 A0+1
 507
 508 #define B1 B0+1
 509 #define B2 B0+2
 510 #define B3 B0+3
 511
 512 #define C1 C0+1
 513 #define C2 C0+2
 514 #define C3 C0+3
 515
 516 /*******************************************************
 517     Widening Multiplication  32 = 16 x 16  with MUL
 518 *******************************************************/
 519
 520 #if defined (L_mulhisi3)
 521 ;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
 522 ;;; C3:C0   = (signed long) A1:A0   * (signed long) B1:B0
 523 ;;; Clobbers: __tmp_reg__
 524 DEFUN __mulhisi3
 525     XCALL   __umulhisi3
 526     ;; Sign-extend B
 527     tst     B1
 528     brpl    1f
 529     sub     C2, A0
 530     sbc     C3, A1
 531 1:  ;; Sign-extend A
 532     XJMP __usmulhisi3_tail
 533 ENDF __mulhisi3
 534 #endif /* L_mulhisi3 */
 535
 536 #if defined (L_usmulhisi3)
 537 ;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
 538 ;;; C3:C0   = (signed long) A1:A0   * (unsigned long) B1:B0
 539 ;;; Clobbers: __tmp_reg__
 540 DEFUN __usmulhisi3
 541     XCALL   __umulhisi3
 542     ;; FALLTHRU
 543 ENDF __usmulhisi3
 544
 545 DEFUN __usmulhisi3_tail
 546     ;; Sign-extend A
 547     sbrs    A1, 7
 548     ret
 549     sub     C2, B0
 550     sbc     C3, B1
 551     ret
 552 ENDF __usmulhisi3_tail
 553 #endif /* L_usmulhisi3 */
 554
 555 #if defined (L_umulhisi3)
 556 ;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
 557 ;;; C3:C0   = (unsigned long) A1:A0   * (unsigned long) B1:B0
 558 ;;; Clobbers: __tmp_reg__
 559 DEFUN __umulhisi3
 560     mul     A0, B0
 561     movw    C0, r0
 562     mul     A1, B1
 563     movw    C2, r0
 564     mul     A0, B1
 565 #ifdef __AVR_HAVE_JMP_CALL__
 566     ;; This function is used by many other routines, often multiple times.
 567     ;; Therefore, if the flash size is not too limited, avoid the RCALL
 568     ;; and inverst 6 Bytes to speed things up.
 569     add     C1, r0
 570     adc     C2, r1
 571     clr     __zero_reg__
 572     adc     C3, __zero_reg__
 573 #else
 574     rcall   1f
 575 #endif
 576     mul     A1, B0
 577 1:  add     C1, r0
 578     adc     C2, r1
 579     clr     __zero_reg__
 580     adc     C3, __zero_reg__
 581     ret
 582 ENDF __umulhisi3
 583 #endif /* L_umulhisi3 */
 584
 585 /*******************************************************
 586     Widening Multiplication  32 = 16 x 32  with MUL
 587 *******************************************************/
 588
 589 #if defined (L_mulshisi3)
 590 ;;; R25:R22 = (signed long) R27:R26 * R21:R18
 591 ;;; (C3:C0) = (signed long) A1:A0   * B3:B0
 592 ;;; Clobbers: __tmp_reg__
 593 DEFUN __mulshisi3
 594 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
 595     ;; Some cores have problem skipping 2-word instruction
 596     tst     A1
 597     brmi    __mulohisi3
 598 #else
 599     sbrs    A1, 7
 600 #endif /* __AVR_HAVE_JMP_CALL__ */
 601     XJMP    __muluhisi3
 602     ;; FALLTHRU
 603 ENDF __mulshisi3
 604
 605 ;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
 606 ;;; (C3:C0) = (one-extended long) A1:A0   * B3:B0
 607 ;;; Clobbers: __tmp_reg__
 608 DEFUN __mulohisi3
 609     XCALL   __muluhisi3
 610     ;; One-extend R27:R26 (A1:A0)
 611     sub     C2, B0
 612     sbc     C3, B1
 613     ret
 614 ENDF __mulohisi3
 615 #endif /* L_mulshisi3 */
 616
 617 #if defined (L_muluhisi3)
 618 ;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
 619 ;;; (C3:C0) = (unsigned long) A1:A0   * B3:B0
 620 ;;; Clobbers: __tmp_reg__
 621 DEFUN __muluhisi3
 622     XCALL   __umulhisi3
 623     mul     A0, B3
 624     add     C3, r0
 625     mul     A1, B2
 626     add     C3, r0
 627     mul     A0, B2
 628     add     C2, r0
 629     adc     C3, r1
 630     clr     __zero_reg__
 631     ret
 632 ENDF __muluhisi3
 633 #endif /* L_muluhisi3 */
 634
 635 /*******************************************************
 636     Multiplication  32 x 32  with MUL
 637 *******************************************************/
 638
 639 #if defined (L_mulsi3)
 640 ;;; R25:R22 = R25:R22 * R21:R18
 641 ;;; (C3:C0) = C3:C0   * B3:B0
 642 ;;; Clobbers: R26, R27, __tmp_reg__
 643 DEFUN __mulsi3
 644     movw    A0, C0
 645     push    C2
 646     push    C3
 647     XCALL   __muluhisi3
 648     pop     A1
 649     pop     A0
 650     ;; A1:A0 now contains the high word of A
 651     mul     A0, B0
 652     add     C2, r0
 653     adc     C3, r1
 654     mul     A0, B1
 655     add     C3, r0
 656     mul     A1, B0
 657     add     C3, r0
 658     clr     __zero_reg__
 659     ret
 660 ENDF __mulsi3
 661 #endif /* L_mulsi3 */
 662
 663 #undef A0
 664 #undef A1
 665
 666 #undef B0
 667 #undef B1
 668 #undef B2
 669 #undef B3
 670
 671 #undef C0
 672 #undef C1
 673 #undef C2
 674 #undef C3
 675
 676 #endif /* __AVR_HAVE_MUL__ */
 677
 678 /*******************************************************
 679        Multiplication 24 x 24 with MUL
 680 *******************************************************/
 681
 682 #if defined (L_mulpsi3)
 683
 684 ;; A[0..2]: In: Multiplicand; Out: Product
 685 #define A0  22
 686 #define A1  A0+1
 687 #define A2  A0+2
 688
 689 ;; B[0..2]: In: Multiplier
 690 #define B0  18
 691 #define B1  B0+1
 692 #define B2  B0+2
 693
 694 #if defined (__AVR_HAVE_MUL__)
 695
 696 ;; C[0..2]: Expand Result
 697 #define C0  22
 698 #define C1  C0+1
 699 #define C2  C0+2
 700
 701 ;; R24:R22 *= R20:R18
 702 ;; Clobbers: r21, r25, r26, r27, __tmp_reg__
 703
 704 #define AA0 26
 705 #define AA2 21
 706
 707 DEFUN __mulpsi3
 708     wmov    AA0, A0
 709     mov     AA2, A2
 710     XCALL   __umulhisi3
 711     mul     AA2, B0     $  add  C2, r0
 712     mul     AA0, B2     $  add  C2, r0
 713     clr     __zero_reg__
 714     ret
 715 ENDF __mulpsi3
 716
 717 #undef AA2
 718 #undef AA0
 719
 720 #undef C2
 721 #undef C1
 722 #undef C0
 723
 724 #else /* !HAVE_MUL */
 725 ;; C[0..2]: Expand Result
 726 #if defined (__AVR_TINY__)
 727 #define C0  16
 728 #else
 729 #define C0  0
 730 #endif /* defined (__AVR_TINY__) */
 731 #define C1  C0+1
 732 #define C2  21
 733
 734 ;; R24:R22 *= R20:R18
 735 ;; Clobbers: __tmp_reg__, R18, R19, R20, R21
 736
 737 DEFUN __mulpsi3
 738 #if defined (__AVR_TINY__)
 739     in r26,__SP_L__
 740     in r27,__SP_H__
 741     subi r26, lo8(-3)   ; Add 3 to point past return address
 742     sbci r27, hi8(-3)
 743     push B0    ; save callee saved regs
 744     push B1
 745     ld B0,X+   ; load from caller stack
 746     ld B1,X+
 747     ld B2,X+
 748 #endif /* defined (__AVR_TINY__) */
 749
 750     ;; C[] = 0
 751     clr     __tmp_reg__
 752     clr     C2
 753
 754 0:  ;; Shift N-th Bit of B[] into Carry.  N = 24 - Loop
 755     LSR  B2     $  ror  B1     $  ror  B0
 756
 757     ;; If the N-th Bit of B[] was set...
 758     brcc    1f
 759
 760     ;; ...then add A[] * 2^N to the Result C[]
 761     ADD  C0,A0  $  adc  C1,A1  $  adc  C2,A2
 762
 763 1:  ;; Multiply A[] by 2
 764     LSL  A0     $  rol  A1     $  rol  A2
 765
 766     ;; Loop until B[] is 0
 767     subi B0,0   $  sbci B1,0   $  sbci B2,0
 768     brne    0b
 769
 770     ;; Copy C[] to the return Register A[]
 771     wmov    A0, C0
 772     mov     A2, C2
 773
 774     clr     __zero_reg__
 775 #if defined (__AVR_TINY__)
 776     pop B1
 777     pop B0
 778 #endif /* (__AVR_TINY__) */
 779     ret
 780 ENDF __mulpsi3
 781
 782 #undef C2
 783 #undef C1
 784 #undef C0
 785
 786 #endif /* HAVE_MUL */
 787
 788 #undef B2
 789 #undef B1
 790 #undef B0
 791
 792 #undef A2
 793 #undef A1
 794 #undef A0
 795
 796 #endif /* L_mulpsi3 */
 797
 798 #if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__)
 799
 800 ;; A[0..2]: In: Multiplicand
 801 #define A0  22
 802 #define A1  A0+1
 803 #define A2  A0+2
 804
 805 ;; BB: In: Multiplier
 806 #define BB  25
 807
 808 ;; C[0..2]: Result
 809 #define C0  18
 810 #define C1  C0+1
 811 #define C2  C0+2
 812
 813 ;; C[] = A[] * sign_extend (BB)
 814 DEFUN __mulsqipsi3
 815     mul     A0, BB
 816     movw    C0, r0
 817     mul     A2, BB
 818     mov     C2, r0
 819     mul     A1, BB
 820     add     C1, r0
 821     adc     C2, r1
 822     clr     __zero_reg__
 823     sbrs    BB, 7
 824     ret
 825     ;; One-extend BB
 826     sub     C1, A0
 827     sbc     C2, A1
 828     ret
 829 ENDF __mulsqipsi3
 830
 831 #undef C2
 832 #undef C1
 833 #undef C0
 834
 835 #undef BB
 836
 837 #undef A2
 838 #undef A1
 839 #undef A0
 840
 841 #endif /* L_mulsqipsi3  &&  HAVE_MUL */
 842
 843 /*******************************************************
 844        Multiplication 64 x 64
 845 *******************************************************/
 846
 847 ;; A[] = A[] * B[]
 848
 849 ;; A[0..7]: In: Multiplicand
 850 ;; Out: Product
 851 #define A0  18
 852 #define A1  A0+1
 853 #define A2  A0+2
 854 #define A3  A0+3
 855 #define A4  A0+4
 856 #define A5  A0+5
 857 #define A6  A0+6
 858 #define A7  A0+7
 859
 860 ;; B[0..7]: In: Multiplier
 861 #define B0  10
 862 #define B1  B0+1
 863 #define B2  B0+2
 864 #define B3  B0+3
 865 #define B4  B0+4
 866 #define B5  B0+5
 867 #define B6  B0+6
 868 #define B7  B0+7
 869
 870 #ifndef __AVR_TINY__
 871 #if defined (__AVR_HAVE_MUL__)
 872 ;; Define C[] for convenience
 873 ;; Notice that parts of C[] overlap A[] respective B[]
 874 #define C0  16
 875 #define C1  C0+1
 876 #define C2  20
 877 #define C3  C2+1
 878 #define C4  28
 879 #define C5  C4+1
 880 #define C6  C4+2
 881 #define C7  C4+3
 882
 883 #if defined (L_muldi3)
 884
 885 ;; A[]     *= B[]
 886 ;; R25:R18 *= R17:R10
 887 ;; Ordinary ABI-Function
 888
 889 DEFUN __muldi3
 890     push    r29
 891     push    r28
 892     push    r17
 893     push    r16
 894
 895     ;; Counting in Words, we have to perform a 4 * 4 Multiplication
 896
 897     ;; 3 * 0  +  0 * 3
 898     mul  A7,B0  $             $  mov C7,r0
 899     mul  A0,B7  $             $  add C7,r0
 900     mul  A6,B1  $             $  add C7,r0
 901     mul  A6,B0  $  mov C6,r0  $  add C7,r1
 902     mul  B6,A1  $             $  add C7,r0
 903     mul  B6,A0  $  add C6,r0  $  adc C7,r1
 904
 905     ;; 1 * 2
 906     mul  A2,B4  $  add C6,r0  $  adc C7,r1
 907     mul  A3,B4  $             $  add C7,r0
 908     mul  A2,B5  $             $  add C7,r0
 909
 910     push    A5
 911     push    A4
 912     push    B1
 913     push    B0
 914     push    A3
 915     push    A2
 916
 917     ;; 0 * 0
 918     wmov    26, B0
 919     XCALL   __umulhisi3
 920     wmov    C0, 22
 921     wmov    C2, 24
 922
 923     ;; 0 * 2
 924     wmov    26, B4
 925     XCALL   __umulhisi3  $  wmov C4,22            $ add C6,24 $ adc C7,25
 926
 927     wmov    26, B2
 928     ;; 0 * 1
 929     XCALL   __muldi3_6
 930
 931     pop     A0
 932     pop     A1
 933     ;; 1 * 1
 934     wmov    26, B2
 935     XCALL   __umulhisi3  $  add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
 936
 937     pop     r26
 938     pop     r27
 939     ;; 1 * 0
 940     XCALL   __muldi3_6
 941
 942     pop     A0
 943     pop     A1
 944     ;; 2 * 0
 945     XCALL   __umulhisi3  $  add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
 946
 947     ;; 2 * 1
 948     wmov    26, B2
 949     XCALL   __umulhisi3  $            $           $ add C6,22 $ adc C7,23
 950
 951     ;; A[] = C[]
 952     wmov    A0, C0
 953     ;; A2 = C2 already
 954     wmov    A4, C4
 955     wmov    A6, C6
 956
 957     pop     r16
 958     pop     r17
 959     pop     r28
 960     pop     r29
 961     ret
 962 ENDF __muldi3
 963 #endif /* L_muldi3 */
 964
 965 #if defined (L_muldi3_6)
 966 ;; A helper for some 64-bit multiplications with MUL available
 967 DEFUN __muldi3_6
 968 __muldi3_6:
 969     XCALL   __umulhisi3
 970     add     C2, 22
 971     adc     C3, 23
 972     adc     C4, 24
 973     adc     C5, 25
 974     brcc    0f
 975     adiw    C6, 1
 976 0:  ret
 977 ENDF __muldi3_6
 978 #endif /* L_muldi3_6 */
 979
 980 #undef C7
 981 #undef C6
 982 #undef C5
 983 #undef C4
 984 #undef C3
 985 #undef C2
 986 #undef C1
 987 #undef C0
 988
 989 #else /* !HAVE_MUL */
 990
 991 #if defined (L_muldi3)
 992
 993 #define C0  26
 994 #define C1  C0+1
 995 #define C2  C0+2
 996 #define C3  C0+3
 997 #define C4  C0+4
 998 #define C5  C0+5
 999 #define C6  0
1000 #define C7  C6+1
1001
1002 #define Loop 9
1003
1004 ;; A[]     *= B[]
1005 ;; R25:R18 *= R17:R10
1006 ;; Ordinary ABI-Function
1007
1008 DEFUN __muldi3
1009     push    r29
1010     push    r28
1011     push    Loop
1012
1013     ldi     C0, 64
1014     mov     Loop, C0
1015
1016     ;; C[] = 0
1017     clr     __tmp_reg__
1018     wmov    C0, 0
1019     wmov    C2, 0
1020     wmov    C4, 0
1021
1022 0:  ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[]
1023     ;; where N = 64 - Loop.
1024     ;; Notice that B[] = B[] >>> 64 so after this Routine has finished,
1025     ;; B[] will have its initial Value again.
1026     LSR  B7     $  ror  B6     $  ror  B5     $  ror  B4
1027     ror  B3     $  ror  B2     $  ror  B1     $  ror  B0
1028
1029     ;; If the N-th Bit of B[] was set then...
1030     brcc    1f
1031     ;; ...finish Rotation...
1032     ori     B7, 1 << 7
1033
1034     ;; ...and add A[] * 2^N to the Result C[]
1035     ADD  C0,A0  $  adc  C1,A1  $  adc  C2,A2  $  adc  C3,A3
1036     adc  C4,A4  $  adc  C5,A5  $  adc  C6,A6  $  adc  C7,A7
1037
1038 1:  ;; Multiply A[] by 2
1039     LSL  A0     $  rol  A1     $  rol  A2     $  rol  A3
1040     rol  A4     $  rol  A5     $  rol  A6     $  rol  A7
1041
1042     dec     Loop
1043     brne    0b
1044
1045     ;; We expanded the Result in C[]
1046     ;; Copy Result to the Return Register A[]
1047     wmov    A0, C0
1048     wmov    A2, C2
1049     wmov    A4, C4
1050     wmov    A6, C6
1051
1052     clr     __zero_reg__
1053     pop     Loop
1054     pop     r28
1055     pop     r29
1056     ret
1057 ENDF __muldi3
1058
1059 #undef Loop
1060
1061 #undef C7
1062 #undef C6
1063 #undef C5
1064 #undef C4
1065 #undef C3
1066 #undef C2
1067 #undef C1
1068 #undef C0
1069
1070 #endif /* L_muldi3 */
1071 #endif /* HAVE_MUL */
1072 #endif /* if not __AVR_TINY__ */
1073
1074 #undef B7
1075 #undef B6
1076 #undef B5
1077 #undef B4
1078 #undef B3
1079 #undef B2
1080 #undef B1
1081 #undef B0
1082
1083 #undef A7
1084 #undef A6
1085 #undef A5
1086 #undef A4
1087 #undef A3
1088 #undef A2
1089 #undef A1
1090 #undef A0
1091
1092 /*******************************************************
1093    Widening Multiplication 64 = 32 x 32  with  MUL
1094 *******************************************************/
1095
1096 #if defined (__AVR_HAVE_MUL__)
1097 #define A0 r22
1098 #define A1 r23
1099 #define A2 r24
1100 #define A3 r25
1101
1102 #define B0 r18
1103 #define B1 r19
1104 #define B2 r20
1105 #define B3 r21
1106
1107 #define C0  18
1108 #define C1  C0+1
1109 #define C2  20
1110 #define C3  C2+1
1111 #define C4  28
1112 #define C5  C4+1
1113 #define C6  C4+2
1114 #define C7  C4+3
1115
1116 #if defined (L_umulsidi3)
1117
1118 ;; Unsigned widening 64 = 32 * 32 Multiplication with MUL
1119
1120 ;; R18[8] = R22[4] * R18[4]
1121 ;;
1122 ;; Ordinary ABI Function, but additionally sets
1123 ;; X = R20[2] = B2[2]
1124 ;; Z = R22[2] = A0[2]
1125 DEFUN __umulsidi3
1126     clt
1127     ;; FALLTHRU
1128 ENDF  __umulsidi3
1129     ;; T = sign (A)
1130 DEFUN __umulsidi3_helper
1131     push    29  $  push    28 ; Y
1132     wmov    30, A2
1133     ;; Counting in Words, we have to perform 4 Multiplications
1134     ;; 0 * 0
1135     wmov    26, A0
1136     XCALL __umulhisi3
1137     push    23  $  push    22 ; C0
1138     wmov    28, B0
1139     wmov    18, B2
1140     wmov    C2, 24
1141     push    27  $  push    26 ; A0
1142     push    19  $  push    18 ; B2
1143     ;;
1144     ;;  18  20  22  24  26  28  30  |  B2, B3, A0, A1, C0, C1, Y
1145     ;;  B2  C2  --  --  --  B0  A2
1146     ;; 1 * 1
1147     wmov    26, 30      ; A2
1148     XCALL __umulhisi3
1149     ;; Sign-extend A.  T holds the sign of A
1150     brtc    0f
1151     ;; Subtract B from the high part of the result
1152     sub     22, 28
1153     sbc     23, 29
1154     sbc     24, 18
1155     sbc     25, 19
1156 0:  wmov    18, 28      ;; B0
1157     wmov    C4, 22
1158     wmov    C6, 24
1159     ;;
1160     ;;  18  20  22  24  26  28  30  |  B2, B3, A0, A1, C0, C1, Y
1161     ;;  B0  C2  --  --  A2  C4  C6
1162     ;;
1163     ;; 1 * 0
1164     XCALL __muldi3_6
1165     ;; 0 * 1
1166     pop     26  $   pop 27  ;; B2
1167     pop     18  $   pop 19  ;; A0
1168     XCALL __muldi3_6
1169
1170     ;; Move result C into place and save A0 in Z
1171     wmov    22, C4
1172     wmov    24, C6
1173     wmov    30, 18 ; A0
1174     pop     C0  $   pop C1
1175
1176     ;; Epilogue
1177     pop     28  $   pop 29  ;; Y
1178     ret
1179 ENDF __umulsidi3_helper
1180 #endif /* L_umulsidi3 */
1181
1182
1183 #if defined (L_mulsidi3)
1184
1185 ;; Signed widening 64 = 32 * 32 Multiplication
1186 ;;
1187 ;; R18[8] = R22[4] * R18[4]
1188 ;; Ordinary ABI Function
1189 DEFUN __mulsidi3
1190     bst     A3, 7
1191     sbrs    B3, 7           ; Enhanced core has no skip bug
1192     XJMP __umulsidi3_helper
1193
1194     ;; B needs sign-extension
1195     push    A3
1196     push    A2
1197     XCALL __umulsidi3_helper
1198     ;; A0 survived in Z
1199     sub     r22, r30
1200     sbc     r23, r31
1201     pop     r26
1202     pop     r27
1203     sbc     r24, r26
1204     sbc     r25, r27
1205     ret
1206 ENDF __mulsidi3
1207 #endif /* L_mulsidi3 */
1208
1209 #undef A0
1210 #undef A1
1211 #undef A2
1212 #undef A3
1213 #undef B0
1214 #undef B1
1215 #undef B2
1216 #undef B3
1217 #undef C0
1218 #undef C1
1219 #undef C2
1220 #undef C3
1221 #undef C4
1222 #undef C5
1223 #undef C6
1224 #undef C7
1225 #endif /* HAVE_MUL */
1226
1227 /**********************************************************
1228     Widening Multiplication 64 = 32 x 32  without  MUL
1229 **********************************************************/
1230 #ifndef __AVR_TINY__ /* if not __AVR_TINY__ */
1231 #if defined (L_mulsidi3) && !defined (__AVR_HAVE_MUL__)
1232 #define A0 18
1233 #define A1 A0+1
1234 #define A2 A0+2
1235 #define A3 A0+3
1236 #define A4 A0+4
1237 #define A5 A0+5
1238 #define A6 A0+6
1239 #define A7 A0+7
1240
1241 #define B0 10
1242 #define B1 B0+1
1243 #define B2 B0+2
1244 #define B3 B0+3
1245 #define B4 B0+4
1246 #define B5 B0+5
1247 #define B6 B0+6
1248 #define B7 B0+7
1249
1250 #define AA0 22
1251 #define AA1 AA0+1
1252 #define AA2 AA0+2
1253 #define AA3 AA0+3
1254
1255 #define BB0 18
1256 #define BB1 BB0+1
1257 #define BB2 BB0+2
1258 #define BB3 BB0+3
1259
1260 #define Mask r30
1261
1262 ;; Signed / Unsigned widening 64 = 32 * 32 Multiplication without MUL
1263 ;;
1264 ;; R18[8] = R22[4] * R18[4]
1265 ;; Ordinary ABI Function
1266 DEFUN __mulsidi3
1267     set
1268     skip
1269     ;; FALLTHRU
1270 ENDF  __mulsidi3
1271
1272 DEFUN __umulsidi3
1273     clt     ; skipped
1274     ;; Save 10 Registers: R10..R17, R28, R29
1275     do_prologue_saves 10
1276     ldi     Mask, 0xff
1277     bld     Mask, 7
1278     ;; Move B into place...
1279     wmov    B0, BB0
1280     wmov    B2, BB2
1281     ;; ...and extend it
1282     and     BB3, Mask
1283     lsl     BB3
1284     sbc     B4, B4
1285     mov     B5, B4
1286     wmov    B6, B4
1287     ;; Move A into place...
1288     wmov    A0, AA0
1289     wmov    A2, AA2
1290     ;; ...and extend it
1291     and     AA3, Mask
1292     lsl     AA3
1293     sbc     A4, A4
1294     mov     A5, A4
1295     wmov    A6, A4
1296     XCALL   __muldi3
1297     do_epilogue_restores 10
1298 ENDF __umulsidi3
1299
1300 #undef A0
1301 #undef A1
1302 #undef A2
1303 #undef A3
1304 #undef A4
1305 #undef A5
1306 #undef A6
1307 #undef A7
1308 #undef B0
1309 #undef B1
1310 #undef B2
1311 #undef B3
1312 #undef B4
1313 #undef B5
1314 #undef B6
1315 #undef B7
1316 #undef AA0
1317 #undef AA1
1318 #undef AA2
1319 #undef AA3
1320 #undef BB0
1321 #undef BB1
1322 #undef BB2
1323 #undef BB3
1324 #undef Mask
1325 #endif /* L_mulsidi3 && !HAVE_MUL */
1326 #endif /* if not __AVR_TINY__ */
1327 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1328
1329 \f
1330 .section .text.libgcc.div, "ax", @progbits
1331
1332 /*******************************************************
1333        Division 8 / 8 => (result + remainder)
1334 *******************************************************/
1335 #define r_rem   r25     /* remainder */
1336 #define r_arg1  r24     /* dividend, quotient */
1337 #define r_arg2  r22     /* divisor */
1338 #define r_cnt   r23     /* loop count */
1339
1340 #if defined (L_udivmodqi4)
1341 DEFUN __udivmodqi4
1342         sub     r_rem,r_rem     ; clear remainder and carry
1343         ldi     r_cnt,9         ; init loop counter
1344         rjmp    __udivmodqi4_ep ; jump to entry point
1345 __udivmodqi4_loop:
1346         rol     r_rem           ; shift dividend into remainder
1347         cp      r_rem,r_arg2    ; compare remainder & divisor
1348         brcs    __udivmodqi4_ep ; remainder <= divisor
1349         sub     r_rem,r_arg2    ; restore remainder
1350 __udivmodqi4_ep:
1351         rol     r_arg1          ; shift dividend (with CARRY)
1352         dec     r_cnt           ; decrement loop counter
1353         brne    __udivmodqi4_loop
1354         com     r_arg1          ; complement result
1355                                 ; because C flag was complemented in loop
1356         ret
1357 ENDF __udivmodqi4
1358 #endif /* defined (L_udivmodqi4) */
1359
1360 #if defined (L_divmodqi4)
1361 DEFUN __divmodqi4
1362         bst     r_arg1,7        ; store sign of dividend
1363         mov     __tmp_reg__,r_arg1
1364         eor     __tmp_reg__,r_arg2; r0.7 is sign of result
1365         sbrc    r_arg1,7
1366         neg     r_arg1          ; dividend negative : negate
1367         sbrc    r_arg2,7
1368         neg     r_arg2          ; divisor negative : negate
1369         XCALL   __udivmodqi4    ; do the unsigned div/mod
1370         brtc    __divmodqi4_1
1371         neg     r_rem           ; correct remainder sign
1372 __divmodqi4_1:
1373         sbrc    __tmp_reg__,7
1374         neg     r_arg1          ; correct result sign
1375 __divmodqi4_exit:
1376         ret
1377 ENDF __divmodqi4
1378 #endif /* defined (L_divmodqi4) */
1379
1380 #undef r_rem
1381 #undef r_arg1
1382 #undef r_arg2
1383 #undef r_cnt
1384
1385
1386 /*******************************************************
1387        Division 16 / 16 => (result + remainder)
1388 *******************************************************/
1389 #define r_remL  r26     /* remainder Low */
1390 #define r_remH  r27     /* remainder High */
1391
1392 /* return: remainder */
1393 #define r_arg1L r24     /* dividend Low */
1394 #define r_arg1H r25     /* dividend High */
1395
1396 /* return: quotient */
1397 #define r_arg2L r22     /* divisor Low */
1398 #define r_arg2H r23     /* divisor High */
1399
1400 #define r_cnt   r21     /* loop count */
1401
1402 #if defined (L_udivmodhi4)
1403 DEFUN __udivmodhi4
1404         sub     r_remL,r_remL
1405         sub     r_remH,r_remH   ; clear remainder and carry
1406         ldi     r_cnt,17        ; init loop counter
1407         rjmp    __udivmodhi4_ep ; jump to entry point
1408 __udivmodhi4_loop:
1409         rol     r_remL          ; shift dividend into remainder
1410         rol     r_remH
1411         cp      r_remL,r_arg2L  ; compare remainder & divisor
1412         cpc     r_remH,r_arg2H
1413         brcs    __udivmodhi4_ep ; remainder < divisor
1414         sub     r_remL,r_arg2L  ; restore remainder
1415         sbc     r_remH,r_arg2H
1416 __udivmodhi4_ep:
1417         rol     r_arg1L         ; shift dividend (with CARRY)
1418         rol     r_arg1H
1419         dec     r_cnt           ; decrement loop counter
1420         brne    __udivmodhi4_loop
1421         com     r_arg1L
1422         com     r_arg1H
1423 ; div/mod results to return registers, as for the div() function
1424         mov_l   r_arg2L, r_arg1L        ; quotient
1425         mov_h   r_arg2H, r_arg1H
1426         mov_l   r_arg1L, r_remL         ; remainder
1427         mov_h   r_arg1H, r_remH
1428         ret
1429 ENDF __udivmodhi4
1430 #endif /* defined (L_udivmodhi4) */
1431
1432 #if defined (L_divmodhi4)
1433 DEFUN __divmodhi4
1434     .global _div
1435 _div:
1436     bst     r_arg1H,7           ; store sign of dividend
1437     mov     __tmp_reg__,r_arg2H
1438     brtc    0f
1439     com     __tmp_reg__         ; r0.7 is sign of result
1440     rcall   __divmodhi4_neg1    ; dividend negative: negate
1441 0:
1442     sbrc    r_arg2H,7
1443     rcall   __divmodhi4_neg2    ; divisor negative: negate
1444     XCALL   __udivmodhi4        ; do the unsigned div/mod
1445     sbrc    __tmp_reg__,7
1446     rcall   __divmodhi4_neg2    ; correct remainder sign
1447     brtc    __divmodhi4_exit
1448 __divmodhi4_neg1:
1449     ;; correct dividend/remainder sign
1450     com     r_arg1H
1451     neg     r_arg1L
1452     sbci    r_arg1H,0xff
1453     ret
1454 __divmodhi4_neg2:
1455     ;; correct divisor/result sign
1456     com     r_arg2H
1457     neg     r_arg2L
1458     sbci    r_arg2H,0xff
1459 __divmodhi4_exit:
1460     ret
1461 ENDF __divmodhi4
1462 #endif /* defined (L_divmodhi4) */
1463
1464 #undef r_remH
1465 #undef r_remL
1466
1467 #undef r_arg1H
1468 #undef r_arg1L
1469
1470 #undef r_arg2H
1471 #undef r_arg2L
1472
1473 #undef r_cnt
1474
1475 /*******************************************************
1476        Division 24 / 24 => (result + remainder)
1477 *******************************************************/
1478
1479 ;; A[0..2]: In: Dividend; Out: Quotient
1480 #define A0  22
1481 #define A1  A0+1
1482 #define A2  A0+2
1483
1484 ;; B[0..2]: In: Divisor;   Out: Remainder
1485 #define B0  18
1486 #define B1  B0+1
1487 #define B2  B0+2
1488
1489 ;; C[0..2]: Expand remainder
1490 #define C0  __zero_reg__
1491 #define C1  26
1492 #define C2  25
1493
1494 ;; Loop counter
1495 #define r_cnt   21
1496
1497 #if defined (L_udivmodpsi4)
1498 ;; R24:R22 = R24:R24  udiv  R20:R18
1499 ;; R20:R18 = R24:R22  umod  R20:R18
1500 ;; Clobbers: R21, R25, R26
1501
1502 DEFUN __udivmodpsi4
1503     ; init loop counter
1504     ldi     r_cnt, 24+1
1505     ; Clear remainder and carry.  C0 is already 0
1506     clr     C1
1507     sub     C2, C2
1508     ; jump to entry point
1509     rjmp    __udivmodpsi4_start
1510 __udivmodpsi4_loop:
1511     ; shift dividend into remainder
1512     rol     C0
1513     rol     C1
1514     rol     C2
1515     ; compare remainder & divisor
1516     cp      C0, B0
1517     cpc     C1, B1
1518     cpc     C2, B2
1519     brcs    __udivmodpsi4_start ; remainder <= divisor
1520     sub     C0, B0              ; restore remainder
1521     sbc     C1, B1
1522     sbc     C2, B2
1523 __udivmodpsi4_start:
1524     ; shift dividend (with CARRY)
1525     rol     A0
1526     rol     A1
1527     rol     A2
1528     ; decrement loop counter
1529     dec     r_cnt
1530     brne    __udivmodpsi4_loop
1531     com     A0
1532     com     A1
1533     com     A2
1534     ; div/mod results to return registers
1535     ; remainder
1536     mov     B0, C0
1537     mov     B1, C1
1538     mov     B2, C2
1539     clr     __zero_reg__ ; C0
1540     ret
1541 ENDF __udivmodpsi4
1542 #endif /* defined (L_udivmodpsi4) */
1543
1544 #if defined (L_divmodpsi4)
1545 ;; R24:R22 = R24:R22  div  R20:R18
1546 ;; R20:R18 = R24:R22  mod  R20:R18
1547 ;; Clobbers: T, __tmp_reg__, R21, R25, R26
1548
1549 DEFUN __divmodpsi4
1550     ; R0.7 will contain the sign of the result:
1551     ; R0.7 = A.sign ^ B.sign
1552     mov __tmp_reg__, B2
1553     ; T-flag = sign of dividend
1554     bst     A2, 7
1555     brtc    0f
1556     com     __tmp_reg__
1557     ; Adjust dividend's sign
1558     rcall   __divmodpsi4_negA
1559 0:
1560     ; Adjust divisor's sign
1561     sbrc    B2, 7
1562     rcall   __divmodpsi4_negB
1563
1564     ; Do the unsigned div/mod
1565     XCALL   __udivmodpsi4
1566
1567     ; Adjust quotient's sign
1568     sbrc    __tmp_reg__, 7
1569     rcall   __divmodpsi4_negA
1570
1571     ; Adjust remainder's sign
1572     brtc    __divmodpsi4_end
1573
1574 __divmodpsi4_negB:
1575     ; Correct divisor/remainder sign
1576     com     B2
1577     com     B1
1578     neg     B0
1579     sbci    B1, -1
1580     sbci    B2, -1
1581     ret
1582
1583     ; Correct dividend/quotient sign
1584 __divmodpsi4_negA:
1585     com     A2
1586     com     A1
1587     neg     A0
1588     sbci    A1, -1
1589     sbci    A2, -1
1590 __divmodpsi4_end:
1591     ret
1592
1593 ENDF __divmodpsi4
1594 #endif /* defined (L_divmodpsi4) */
1595
1596 #undef A0
1597 #undef A1
1598 #undef A2
1599
1600 #undef B0
1601 #undef B1
1602 #undef B2
1603
1604 #undef C0
1605 #undef C1
1606 #undef C2
1607
1608 #undef r_cnt
1609
1610 /*******************************************************
1611        Division 32 / 32 => (result + remainder)
1612 *******************************************************/
1613 #define r_remHH r31     /* remainder High */
1614 #define r_remHL r30
1615 #define r_remH  r27
1616 #define r_remL  r26     /* remainder Low */
1617
1618 /* return: remainder */
1619 #define r_arg1HH r25    /* dividend High */
1620 #define r_arg1HL r24
1621 #define r_arg1H  r23
1622 #define r_arg1L  r22    /* dividend Low */
1623
1624 /* return: quotient */
1625 #define r_arg2HH r21    /* divisor High */
1626 #define r_arg2HL r20
1627 #define r_arg2H  r19
1628 #define r_arg2L  r18    /* divisor Low */
1629
1630 #define r_cnt __zero_reg__  /* loop count (0 after the loop!) */
1631
1632 #if defined (L_udivmodsi4)
1633 DEFUN __udivmodsi4
1634         ldi     r_remL, 33      ; init loop counter
1635         mov     r_cnt, r_remL
1636         sub     r_remL,r_remL
1637         sub     r_remH,r_remH   ; clear remainder and carry
1638         mov_l   r_remHL, r_remL
1639         mov_h   r_remHH, r_remH
1640         rjmp    __udivmodsi4_ep ; jump to entry point
1641 __udivmodsi4_loop:
1642         rol     r_remL          ; shift dividend into remainder
1643         rol     r_remH
1644         rol     r_remHL
1645         rol     r_remHH
1646         cp      r_remL,r_arg2L  ; compare remainder & divisor
1647         cpc     r_remH,r_arg2H
1648         cpc     r_remHL,r_arg2HL
1649         cpc     r_remHH,r_arg2HH
1650         brcs    __udivmodsi4_ep ; remainder <= divisor
1651         sub     r_remL,r_arg2L  ; restore remainder
1652         sbc     r_remH,r_arg2H
1653         sbc     r_remHL,r_arg2HL
1654         sbc     r_remHH,r_arg2HH
1655 __udivmodsi4_ep:
1656         rol     r_arg1L         ; shift dividend (with CARRY)
1657         rol     r_arg1H
1658         rol     r_arg1HL
1659         rol     r_arg1HH
1660         dec     r_cnt           ; decrement loop counter
1661         brne    __udivmodsi4_loop
1662                                 ; __zero_reg__ now restored (r_cnt == 0)
1663         com     r_arg1L
1664         com     r_arg1H
1665         com     r_arg1HL
1666         com     r_arg1HH
1667 ; div/mod results to return registers, as for the ldiv() function
1668         mov_l   r_arg2L,  r_arg1L       ; quotient
1669         mov_h   r_arg2H,  r_arg1H
1670         mov_l   r_arg2HL, r_arg1HL
1671         mov_h   r_arg2HH, r_arg1HH
1672         mov_l   r_arg1L,  r_remL        ; remainder
1673         mov_h   r_arg1H,  r_remH
1674         mov_l   r_arg1HL, r_remHL
1675         mov_h   r_arg1HH, r_remHH
1676         ret
1677 ENDF __udivmodsi4
1678 #endif /* defined (L_udivmodsi4) */
1679
1680 #if defined (L_divmodsi4)
1681 DEFUN __divmodsi4
1682     mov     __tmp_reg__,r_arg2HH
1683     bst     r_arg1HH,7          ; store sign of dividend
1684     brtc    0f
1685     com     __tmp_reg__         ; r0.7 is sign of result
1686     XCALL   __negsi2            ; dividend negative: negate
1687 0:
1688     sbrc    r_arg2HH,7
1689     rcall   __divmodsi4_neg2    ; divisor negative: negate
1690     XCALL   __udivmodsi4        ; do the unsigned div/mod
1691     sbrc    __tmp_reg__, 7      ; correct quotient sign
1692     rcall   __divmodsi4_neg2
1693     brtc    __divmodsi4_exit    ; correct remainder sign
1694     XJMP    __negsi2
1695 __divmodsi4_neg2:
1696     ;; correct divisor/quotient sign
1697     com     r_arg2HH
1698     com     r_arg2HL
1699     com     r_arg2H
1700     neg     r_arg2L
1701     sbci    r_arg2H,0xff
1702     sbci    r_arg2HL,0xff
1703     sbci    r_arg2HH,0xff
1704 __divmodsi4_exit:
1705     ret
1706 ENDF __divmodsi4
1707 #endif /* defined (L_divmodsi4) */
1708
1709 #if defined (L_negsi2)
1710 ;; (set (reg:SI 22)
1711 ;;      (neg:SI (reg:SI 22)))
1712 ;; Sets the V flag for signed overflow tests
1713 DEFUN __negsi2
1714     NEG4    22
1715     ret
1716 ENDF __negsi2
1717 #endif /* L_negsi2 */
1718
1719 #undef r_remHH
1720 #undef r_remHL
1721 #undef r_remH
1722 #undef r_remL
1723 #undef r_arg1HH
1724 #undef r_arg1HL
1725 #undef r_arg1H
1726 #undef r_arg1L
1727 #undef r_arg2HH
1728 #undef r_arg2HL
1729 #undef r_arg2H
1730 #undef r_arg2L
1731 #undef r_cnt
1732
1733 /* *di routines use registers below R19 and won't work with tiny arch
1734    right now. */
1735
1736 #if !defined (__AVR_TINY__)
1737 /*******************************************************
1738        Division 64 / 64
1739        Modulo   64 % 64
1740 *******************************************************/
1741
1742 ;; Use Speed-optimized Version on "big" Devices, i.e. Devices with
1743 ;; at least 16k of Program Memory.  For smaller Devices, depend
1744 ;; on MOVW and SP Size.  There is a Connexion between SP Size and
1745 ;; Flash Size so that SP Size can be used to test for Flash Size.
1746
1747 #if defined (__AVR_HAVE_JMP_CALL__)
1748 #   define SPEED_DIV 8
1749 #elif defined (__AVR_HAVE_MOVW__) && defined (__AVR_HAVE_SPH__)
1750 #   define SPEED_DIV 16
1751 #else
1752 #   define SPEED_DIV 0
1753 #endif
1754
1755 ;; A[0..7]: In: Dividend;
1756 ;; Out: Quotient  (T = 0)
1757 ;; Out: Remainder (T = 1)
1758 #define A0  18
1759 #define A1  A0+1
1760 #define A2  A0+2
1761 #define A3  A0+3
1762 #define A4  A0+4
1763 #define A5  A0+5
1764 #define A6  A0+6
1765 #define A7  A0+7
1766
1767 ;; B[0..7]: In: Divisor;   Out: Clobber
1768 #define B0  10
1769 #define B1  B0+1
1770 #define B2  B0+2
1771 #define B3  B0+3
1772 #define B4  B0+4
1773 #define B5  B0+5
1774 #define B6  B0+6
1775 #define B7  B0+7
1776
1777 ;; C[0..7]: Expand remainder;  Out: Remainder (unused)
1778 #define C0  8
1779 #define C1  C0+1
1780 #define C2  30
1781 #define C3  C2+1
1782 #define C4  28
1783 #define C5  C4+1
1784 #define C6  26
1785 #define C7  C6+1
1786
1787 ;; Holds Signs during Division Routine
1788 #define SS      __tmp_reg__
1789
1790 ;; Bit-Counter in Division Routine
1791 #define R_cnt   __zero_reg__
1792
1793 ;; Scratch Register for Negation
1794 #define NN      r31
1795
1796 #if defined (L_udivdi3)
1797
1798 ;; R25:R18 = R24:R18  umod  R17:R10
1799 ;; Ordinary ABI-Function
1800
1801 DEFUN __umoddi3
1802     set
1803     rjmp __udivdi3_umoddi3
1804 ENDF __umoddi3
1805
1806 ;; R25:R18 = R24:R18  udiv  R17:R10
1807 ;; Ordinary ABI-Function
1808
1809 DEFUN __udivdi3
1810     clt
1811 ENDF __udivdi3
1812
1813 DEFUN __udivdi3_umoddi3
1814     push    C0
1815     push    C1
1816     push    C4
1817     push    C5
1818     XCALL   __udivmod64
1819     pop     C5
1820     pop     C4
1821     pop     C1
1822     pop     C0
1823     ret
1824 ENDF __udivdi3_umoddi3
1825 #endif /* L_udivdi3 */
1826
1827 #if defined (L_udivmod64)
1828
1829 ;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation
1830 ;; No Registers saved/restored; the Callers will take Care.
1831 ;; Preserves B[] and T-flag
1832 ;; T = 0: Compute Quotient  in A[]
1833 ;; T = 1: Compute Remainder in A[] and shift SS one Bit left
1834
1835 DEFUN __udivmod64
1836
1837     ;; Clear Remainder (C6, C7 will follow)
1838     clr     C0
1839     clr     C1
1840     wmov    C2, C0
1841     wmov    C4, C0
1842     ldi     C7, 64
1843
1844 #if SPEED_DIV == 0 || SPEED_DIV == 16
1845     ;; Initialize Loop-Counter
1846     mov     R_cnt, C7
1847     wmov    C6, C0
1848 #endif /* SPEED_DIV */
1849
1850 #if SPEED_DIV == 8
1851
1852     push    A7
1853     clr     C6
1854
1855 1:  ;; Compare shifted Devidend against Divisor
1856     ;; If -- even after Shifting -- it is smaller...
1857     CP  A7,B0  $  cpc C0,B1  $  cpc C1,B2  $  cpc C2,B3
1858     cpc C3,B4  $  cpc C4,B5  $  cpc C5,B6  $  cpc C6,B7
1859     brcc    2f
1860
1861     ;; ...then we can subtract it.  Thus, it is legal to shift left
1862                $  mov C6,C5  $  mov C5,C4  $  mov C4,C3
1863     mov C3,C2  $  mov C2,C1  $  mov C1,C0  $  mov C0,A7
1864     mov A7,A6  $  mov A6,A5  $  mov A5,A4  $  mov A4,A3
1865     mov A3,A2  $  mov A2,A1  $  mov A1,A0  $  clr A0
1866
1867     ;; 8 Bits are done
1868     subi    C7, 8
1869     brne    1b
1870
1871     ;; Shifted 64 Bits:  A7 has traveled to C7
1872     pop     C7
1873     ;; Divisor is greater than Dividend. We have:
1874     ;; A[] % B[] = A[]
1875     ;; A[] / B[] = 0
1876     ;; Thus, we can return immediately
1877     rjmp    5f
1878
1879 2:  ;; Initialze Bit-Counter with Number of Bits still to be performed
1880     mov     R_cnt, C7
1881
1882     ;; Push of A7 is not needed because C7 is still 0
1883     pop     C7
1884     clr     C7
1885
1886 #elif  SPEED_DIV == 16
1887
1888     ;; Compare shifted Dividend against Divisor
1889     cp      A7, B3
1890     cpc     C0, B4
1891     cpc     C1, B5
1892     cpc     C2, B6
1893     cpc     C3, B7
1894     brcc    2f
1895
1896     ;; Divisor is greater than shifted Dividen: We can shift the Dividend
1897     ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk
1898     wmov  C2,A6  $  wmov C0,A4
1899     wmov  A6,A2  $  wmov A4,A0
1900     wmov  A2,C6  $  wmov A0,C4
1901
1902     ;; Set Bit Counter to 32
1903     lsr     R_cnt
1904 2:
1905 #elif SPEED_DIV
1906 #error SPEED_DIV = ?
1907 #endif /* SPEED_DIV */
1908
1909 ;; The very Division + Remainder Routine
1910
1911 3:  ;; Left-shift Dividend...
1912     lsl A0     $  rol A1     $  rol A2     $  rol A3
1913     rol A4     $  rol A5     $  rol A6     $  rol A7
1914
1915     ;; ...into Remainder
1916     rol C0     $  rol C1     $  rol C2     $  rol C3
1917     rol C4     $  rol C5     $  rol C6     $  rol C7
1918
1919     ;; Compare Remainder and Divisor
1920     CP  C0,B0  $  cpc C1,B1  $  cpc C2,B2  $  cpc C3,B3
1921     cpc C4,B4  $  cpc C5,B5  $  cpc C6,B6  $  cpc C7,B7
1922
1923     brcs 4f
1924
1925     ;; Divisor fits into Remainder:  Subtract it from Remainder...
1926     SUB C0,B0  $  sbc C1,B1  $  sbc C2,B2  $  sbc C3,B3
1927     sbc C4,B4  $  sbc C5,B5  $  sbc C6,B6  $  sbc C7,B7
1928
1929     ;; ...and set according Bit in the upcoming Quotient
1930     ;; The Bit will travel to its final Position
1931     ori A0, 1
1932
1933 4:  ;; This Bit is done
1934     dec     R_cnt
1935     brne    3b
1936     ;; __zero_reg__ is 0 again
1937
1938     ;; T = 0: We are fine with the Quotient in A[]
1939     ;; T = 1: Copy Remainder to A[]
1940 5:  brtc    6f
1941     wmov    A0, C0
1942     wmov    A2, C2
1943     wmov    A4, C4
1944     wmov    A6, C6
1945     ;; Move the Sign of the Result to SS.7
1946     lsl     SS
1947
1948 6:  ret
1949
1950 ENDF __udivmod64
1951 #endif /* L_udivmod64 */
1952
1953
1954 #if defined (L_divdi3)
1955
1956 ;; R25:R18 = R24:R18  mod  R17:R10
1957 ;; Ordinary ABI-Function
1958
1959 DEFUN __moddi3
1960     set
1961     rjmp    __divdi3_moddi3
1962 ENDF __moddi3
1963
1964 ;; R25:R18 = R24:R18  div  R17:R10
1965 ;; Ordinary ABI-Function
1966
1967 DEFUN __divdi3
1968     clt
1969 ENDF __divdi3
1970
1971 DEFUN  __divdi3_moddi3
1972 #if SPEED_DIV
1973     mov     r31, A7
1974     or      r31, B7
1975     brmi    0f
1976     ;; Both Signs are 0:  the following Complexitiy is not needed
1977     XJMP    __udivdi3_umoddi3
1978 #endif /* SPEED_DIV */
1979
1980 0:  ;; The Prologue
1981     ;; Save 12 Registers:  Y, 17...8
1982     ;; No Frame needed
1983     do_prologue_saves 12
1984
1985     ;; SS.7 will contain the Sign of the Quotient  (A.sign * B.sign)
1986     ;; SS.6 will contain the Sign of the Remainder (A.sign)
1987     mov     SS, A7
1988     asr     SS
1989     ;; Adjust Dividend's Sign as needed
1990 #if SPEED_DIV
1991     ;; Compiling for Speed we know that at least one Sign must be < 0
1992     ;; Thus, if A[] >= 0 then we know B[] < 0
1993     brpl    22f
1994 #else
1995     brpl    21f
1996 #endif /* SPEED_DIV */
1997
1998     XCALL   __negdi2
1999
2000     ;; Adjust Divisor's Sign and SS.7 as needed
2001 21: tst     B7
2002     brpl    3f
2003 22: ldi     NN, 1 << 7
2004     eor     SS, NN
2005
2006     ldi NN, -1
2007     com B4     $  com B5     $  com B6     $  com B7
2008                $  com B1     $  com B2     $  com B3
2009     NEG B0
2010                $  sbc B1,NN  $  sbc B2,NN  $  sbc B3,NN
2011     sbc B4,NN  $  sbc B5,NN  $  sbc B6,NN  $  sbc B7,NN
2012
2013 3:  ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag)
2014     XCALL   __udivmod64
2015
2016     ;; Adjust Result's Sign
2017 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2018     tst     SS
2019     brpl    4f
2020 #else
2021     sbrc    SS, 7
2022 #endif /* __AVR_HAVE_JMP_CALL__ */
2023     XCALL   __negdi2
2024
2025 4:  ;; Epilogue: Restore 12 Registers and return
2026     do_epilogue_restores 12
2027
2028 ENDF __divdi3_moddi3
2029
2030 #endif /* L_divdi3 */
2031
2032 #undef R_cnt
2033 #undef SS
2034 #undef NN
2035
2036 .section .text.libgcc, "ax", @progbits
2037
2038 #define TT __tmp_reg__
2039
2040 #if defined (L_adddi3)
2041 ;; (set (reg:DI 18)
2042 ;;      (plus:DI (reg:DI 18)
2043 ;;               (reg:DI 10)))
2044 ;; Sets the V flag for signed overflow tests
2045 ;; Sets the C flag for unsigned overflow tests
2046 DEFUN __adddi3
2047     ADD A0,B0  $  adc A1,B1  $  adc A2,B2  $  adc A3,B3
2048     adc A4,B4  $  adc A5,B5  $  adc A6,B6  $  adc A7,B7
2049     ret
2050 ENDF __adddi3
2051 #endif /* L_adddi3 */
2052
2053 #if defined (L_adddi3_s8)
2054 ;; (set (reg:DI 18)
2055 ;;      (plus:DI (reg:DI 18)
2056 ;;               (sign_extend:SI (reg:QI 26))))
2057 ;; Sets the V flag for signed overflow tests
2058 ;; Sets the C flag for unsigned overflow tests provided 0 <= R26 < 128
2059 DEFUN __adddi3_s8
2060     clr     TT
2061     sbrc    r26, 7
2062     com     TT
2063     ADD A0,r26 $  adc A1,TT  $  adc A2,TT  $  adc A3,TT
2064     adc A4,TT  $  adc A5,TT  $  adc A6,TT  $  adc A7,TT
2065     ret
2066 ENDF __adddi3_s8
2067 #endif /* L_adddi3_s8 */
2068
2069 #if defined (L_subdi3)
2070 ;; (set (reg:DI 18)
2071 ;;      (minus:DI (reg:DI 18)
2072 ;;                (reg:DI 10)))
2073 ;; Sets the V flag for signed overflow tests
2074 ;; Sets the C flag for unsigned overflow tests
2075 DEFUN __subdi3
2076     SUB A0,B0  $  sbc A1,B1  $  sbc A2,B2  $  sbc A3,B3
2077     sbc A4,B4  $  sbc A5,B5  $  sbc A6,B6  $  sbc A7,B7
2078     ret
2079 ENDF __subdi3
2080 #endif /* L_subdi3 */
2081
2082 #if defined (L_cmpdi2)
2083 ;; (set (cc0)
2084 ;;      (compare (reg:DI 18)
2085 ;;               (reg:DI 10)))
2086 DEFUN __cmpdi2
2087     CP  A0,B0  $  cpc A1,B1  $  cpc A2,B2  $  cpc A3,B3
2088     cpc A4,B4  $  cpc A5,B5  $  cpc A6,B6  $  cpc A7,B7
2089     ret
2090 ENDF __cmpdi2
2091 #endif /* L_cmpdi2 */
2092
2093 #if defined (L_cmpdi2_s8)
2094 ;; (set (cc0)
2095 ;;      (compare (reg:DI 18)
2096 ;;               (sign_extend:SI (reg:QI 26))))
2097 DEFUN __cmpdi2_s8
2098     clr     TT
2099     sbrc    r26, 7
2100     com     TT
2101     CP  A0,r26 $  cpc A1,TT  $  cpc A2,TT  $  cpc A3,TT
2102     cpc A4,TT  $  cpc A5,TT  $  cpc A6,TT  $  cpc A7,TT
2103     ret
2104 ENDF __cmpdi2_s8
2105 #endif /* L_cmpdi2_s8 */
2106
2107 #if defined (L_negdi2)
2108 ;; (set (reg:DI 18)
2109 ;;      (neg:DI (reg:DI 18)))
2110 ;; Sets the V flag for signed overflow tests
2111 DEFUN __negdi2
2112
2113     com  A4    $  com  A5    $  com  A6    $  com  A7
2114                $  com  A1    $  com  A2    $  com  A3
2115     NEG  A0
2116                $  sbci A1,-1 $  sbci A2,-1 $  sbci A3,-1
2117     sbci A4,-1 $  sbci A5,-1 $  sbci A6,-1 $  sbci A7,-1
2118     ret
2119
2120 ENDF __negdi2
2121 #endif /* L_negdi2 */
2122
2123 #undef TT
2124
2125 #undef C7
2126 #undef C6
2127 #undef C5
2128 #undef C4
2129 #undef C3
2130 #undef C2
2131 #undef C1
2132 #undef C0
2133
2134 #undef B7
2135 #undef B6
2136 #undef B5
2137 #undef B4
2138 #undef B3
2139 #undef B2
2140 #undef B1
2141 #undef B0
2142
2143 #undef A7
2144 #undef A6
2145 #undef A5
2146 #undef A4
2147 #undef A3
2148 #undef A2
2149 #undef A1
2150 #undef A0
2151
2152 #endif /* !defined (__AVR_TINY__) */
2153
2154 \f
2155 .section .text.libgcc.prologue, "ax", @progbits
2156
2157 /**********************************
2158  * This is a prologue subroutine
2159  **********************************/
2160 #if !defined (__AVR_TINY__)
2161 #if defined (L_prologue)
2162
2163 ;; This function does not clobber T-flag; 64-bit division relies on it
2164 DEFUN __prologue_saves__
2165         push r2
2166         push r3
2167         push r4
2168         push r5
2169         push r6
2170         push r7
2171         push r8
2172         push r9
2173         push r10
2174         push r11
2175         push r12
2176         push r13
2177         push r14
2178         push r15
2179         push r16
2180         push r17
2181         push r28
2182         push r29
2183 #if !defined (__AVR_HAVE_SPH__)
2184         in      r28,__SP_L__
2185         sub     r28,r26
2186         out     __SP_L__,r28
2187         clr     r29
2188 #elif defined (__AVR_XMEGA__)
2189         in      r28,__SP_L__
2190         in      r29,__SP_H__
2191         sub     r28,r26
2192         sbc     r29,r27
2193         out     __SP_L__,r28
2194         out     __SP_H__,r29
2195 #else
2196         in      r28,__SP_L__
2197         in      r29,__SP_H__
2198         sub     r28,r26
2199         sbc     r29,r27
2200         in      __tmp_reg__,__SREG__
2201         cli
2202         out     __SP_H__,r29
2203         out     __SREG__,__tmp_reg__
2204         out     __SP_L__,r28
2205 #endif /* #SP = 8/16 */
2206
2207         XIJMP
2208
2209 ENDF __prologue_saves__
2210 #endif /* defined (L_prologue) */
2211
2212 /*
2213  * This is an epilogue subroutine
2214  */
2215 #if defined (L_epilogue)
2216
2217 DEFUN __epilogue_restores__
2218         ldd     r2,Y+18
2219         ldd     r3,Y+17
2220         ldd     r4,Y+16
2221         ldd     r5,Y+15
2222         ldd     r6,Y+14
2223         ldd     r7,Y+13
2224         ldd     r8,Y+12
2225         ldd     r9,Y+11
2226         ldd     r10,Y+10
2227         ldd     r11,Y+9
2228         ldd     r12,Y+8
2229         ldd     r13,Y+7
2230         ldd     r14,Y+6
2231         ldd     r15,Y+5
2232         ldd     r16,Y+4
2233         ldd     r17,Y+3
2234         ldd     r26,Y+2
2235 #if !defined (__AVR_HAVE_SPH__)
2236         ldd     r29,Y+1
2237         add     r28,r30
2238         out     __SP_L__,r28
2239         mov     r28, r26
2240 #elif defined (__AVR_XMEGA__)
2241         ldd  r27,Y+1
2242         add  r28,r30
2243         adc  r29,__zero_reg__
2244         out  __SP_L__,r28
2245         out  __SP_H__,r29
2246         wmov 28, 26
2247 #else
2248         ldd     r27,Y+1
2249         add     r28,r30
2250         adc     r29,__zero_reg__
2251         in      __tmp_reg__,__SREG__
2252         cli
2253         out     __SP_H__,r29
2254         out     __SREG__,__tmp_reg__
2255         out     __SP_L__,r28
2256         mov_l   r28, r26
2257         mov_h   r29, r27
2258 #endif /* #SP = 8/16 */
2259         ret
2260 ENDF __epilogue_restores__
2261 #endif /* defined (L_epilogue) */
2262 #endif /* !defined (__AVR_TINY__) */
2263
2264 #ifdef L_exit
2265         .section .fini9,"ax",@progbits
2266 DEFUN _exit
2267         .weak   exit
2268 exit:
2269 ENDF _exit
2270
2271         /* Code from .fini8 ... .fini1 sections inserted by ld script.  */
2272
2273         .section .fini0,"ax",@progbits
2274         cli
2275 __stop_program:
2276         rjmp    __stop_program
2277 #endif /* defined (L_exit) */
2278
2279 #ifdef L_cleanup
2280         .weak   _cleanup
2281         .func   _cleanup
2282 _cleanup:
2283         ret
2284 .endfunc
2285 #endif /* defined (L_cleanup) */
2286
2287 \f
2288 .section .text.libgcc, "ax", @progbits
2289
2290 #ifdef L_tablejump2
2291 DEFUN __tablejump2__
2292     lsl     r30
2293     rol     r31
2294 #if defined (__AVR_HAVE_EIJMP_EICALL__)
2295     ;; Word address of gs() jumptable entry in R24:Z
2296     rol     r24
2297     out     __RAMPZ__, r24
2298 #elif defined (__AVR_HAVE_ELPM__)
2299     ;; Word address of jumptable entry in Z
2300     clr     __tmp_reg__
2301     rol     __tmp_reg__
2302     out     __RAMPZ__, __tmp_reg__
2303 #endif
2304
2305     ;; Read word address from jumptable and jump
2306
2307 #if defined (__AVR_HAVE_ELPMX__)
2308     elpm    __tmp_reg__, Z+
2309     elpm    r31, Z
2310     mov     r30, __tmp_reg__
2311 #ifdef __AVR_HAVE_RAMPD__
2312     ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2313     out     __RAMPZ__, __zero_reg__
2314 #endif /* RAMPD */
2315     XIJMP
2316 #elif defined (__AVR_HAVE_ELPM__)
2317     elpm
2318     push    r0
2319     adiw    r30, 1
2320     elpm
2321     push    r0
2322     ret
2323 #elif defined (__AVR_HAVE_LPMX__)
2324     lpm     __tmp_reg__, Z+
2325     lpm     r31, Z
2326     mov     r30, __tmp_reg__
2327     ijmp
2328 #elif defined (__AVR_TINY__)
2329     wsubi 30, -(__AVR_TINY_PM_BASE_ADDRESS__) ; Add PM offset to Z
2330     ld __tmp_reg__, Z+
2331     ld r31, Z   ; Use ld instead of lpm to load Z
2332     mov r30, __tmp_reg__
2333     ijmp
2334 #else
2335     lpm
2336     push    r0
2337     adiw    r30, 1
2338     lpm
2339     push    r0
2340     ret
2341 #endif
2342 ENDF __tablejump2__
2343 #endif /* L_tablejump2 */
2344
2345 #if defined(__AVR_TINY__)
2346 #ifdef L_copy_data
2347         .section .init4,"ax",@progbits
2348         .global __do_copy_data
2349 __do_copy_data:
2350         ldi     r18, hi8(__data_end)
2351         ldi     r26, lo8(__data_start)
2352         ldi     r27, hi8(__data_start)
2353         ldi     r30, lo8(__data_load_start + __AVR_TINY_PM_BASE_ADDRESS__)
2354         ldi     r31, hi8(__data_load_start + __AVR_TINY_PM_BASE_ADDRESS__)
2355         rjmp    .L__do_copy_data_start
2356 .L__do_copy_data_loop:
2357         ld      r19, z+
2358         st      X+, r19
2359 .L__do_copy_data_start:
2360         cpi     r26, lo8(__data_end)
2361         cpc     r27, r18
2362         brne    .L__do_copy_data_loop
2363 #endif
2364 #else
2365 #ifdef L_copy_data
2366         .section .init4,"ax",@progbits
2367 DEFUN __do_copy_data
2368 #if defined(__AVR_HAVE_ELPMX__)
2369         ldi     r17, hi8(__data_end)
2370         ldi     r26, lo8(__data_start)
2371         ldi     r27, hi8(__data_start)
2372         ldi     r30, lo8(__data_load_start)
2373         ldi     r31, hi8(__data_load_start)
2374         ldi     r16, hh8(__data_load_start)
2375         out     __RAMPZ__, r16
2376         rjmp    .L__do_copy_data_start
2377 .L__do_copy_data_loop:
2378         elpm    r0, Z+
2379         st      X+, r0
2380 .L__do_copy_data_start:
2381         cpi     r26, lo8(__data_end)
2382         cpc     r27, r17
2383         brne    .L__do_copy_data_loop
2384 #elif  !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
2385         ldi     r17, hi8(__data_end)
2386         ldi     r26, lo8(__data_start)
2387         ldi     r27, hi8(__data_start)
2388         ldi     r30, lo8(__data_load_start)
2389         ldi     r31, hi8(__data_load_start)
2390         ldi     r16, hh8(__data_load_start - 0x10000)
2391 .L__do_copy_data_carry:
2392         inc     r16
2393         out     __RAMPZ__, r16
2394         rjmp    .L__do_copy_data_start
2395 .L__do_copy_data_loop:
2396         elpm
2397         st      X+, r0
2398         adiw    r30, 1
2399         brcs    .L__do_copy_data_carry
2400 .L__do_copy_data_start:
2401         cpi     r26, lo8(__data_end)
2402         cpc     r27, r17
2403         brne    .L__do_copy_data_loop
2404 #elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
2405         ldi     r17, hi8(__data_end)
2406         ldi     r26, lo8(__data_start)
2407         ldi     r27, hi8(__data_start)
2408         ldi     r30, lo8(__data_load_start)
2409         ldi     r31, hi8(__data_load_start)
2410         rjmp    .L__do_copy_data_start
2411 .L__do_copy_data_loop:
2412 #if defined (__AVR_HAVE_LPMX__)
2413         lpm     r0, Z+
2414 #else
2415         lpm
2416         adiw    r30, 1
2417 #endif
2418         st      X+, r0
2419 .L__do_copy_data_start:
2420         cpi     r26, lo8(__data_end)
2421         cpc     r27, r17
2422         brne    .L__do_copy_data_loop
2423 #endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
2424 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2425         ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2426         out     __RAMPZ__, __zero_reg__
2427 #endif /* ELPM && RAMPD */
2428 ENDF __do_copy_data
2429 #endif /* L_copy_data */
2430 #endif /* !defined (__AVR_TINY__) */
2431
2432 /* __do_clear_bss is only necessary if there is anything in .bss section.  */
2433
2434 #ifdef L_clear_bss
2435         .section .init4,"ax",@progbits
2436 DEFUN __do_clear_bss
2437         ldi     r18, hi8(__bss_end)
2438         ldi     r26, lo8(__bss_start)
2439         ldi     r27, hi8(__bss_start)
2440         rjmp    .do_clear_bss_start
2441 .do_clear_bss_loop:
2442         st      X+, __zero_reg__
2443 .do_clear_bss_start:
2444         cpi     r26, lo8(__bss_end)
2445         cpc     r27, r18
2446         brne    .do_clear_bss_loop
2447 ENDF __do_clear_bss
2448 #endif /* L_clear_bss */
2449
2450 /* __do_global_ctors and __do_global_dtors are only necessary
2451    if there are any constructors/destructors.  */
2452
2453 #if defined(__AVR_TINY__)
2454 #define cdtors_tst_reg r18
2455 #else
2456 #define cdtors_tst_reg r17
2457 #endif
2458
2459 #ifdef L_ctors
2460         .section .init6,"ax",@progbits
2461 DEFUN __do_global_ctors
2462     ldi     cdtors_tst_reg, pm_hi8(__ctors_start)
2463     ldi     r28, pm_lo8(__ctors_end)
2464     ldi     r29, pm_hi8(__ctors_end)
2465 #ifdef __AVR_HAVE_EIJMP_EICALL__
2466     ldi     r16, pm_hh8(__ctors_end)
2467 #endif /* HAVE_EIJMP */
2468     rjmp    .L__do_global_ctors_start
2469 .L__do_global_ctors_loop:
2470     wsubi   28, 1
2471 #ifdef __AVR_HAVE_EIJMP_EICALL__
2472     sbc     r16, __zero_reg__
2473     mov     r24, r16
2474 #endif /* HAVE_EIJMP */
2475     mov_h   r31, r29
2476     mov_l   r30, r28
2477     XCALL   __tablejump2__
2478 .L__do_global_ctors_start:
2479     cpi     r28, pm_lo8(__ctors_start)
2480     cpc     r29, cdtors_tst_reg
2481 #ifdef __AVR_HAVE_EIJMP_EICALL__
2482     ldi     r24, pm_hh8(__ctors_start)
2483     cpc     r16, r24
2484 #endif /* HAVE_EIJMP */
2485     brne    .L__do_global_ctors_loop
2486 ENDF __do_global_ctors
2487 #endif /* L_ctors */
2488
2489 #ifdef L_dtors
2490         .section .fini6,"ax",@progbits
2491 DEFUN __do_global_dtors
2492     ldi     cdtors_tst_reg, pm_hi8(__dtors_end)
2493     ldi     r28, pm_lo8(__dtors_start)
2494     ldi     r29, pm_hi8(__dtors_start)
2495 #ifdef __AVR_HAVE_EIJMP_EICALL__
2496     ldi     r16, pm_hh8(__dtors_start)
2497 #endif /* HAVE_EIJMP */
2498     rjmp    .L__do_global_dtors_start
2499 .L__do_global_dtors_loop:
2500 #ifdef __AVR_HAVE_EIJMP_EICALL__
2501     mov     r24, r16
2502 #endif /* HAVE_EIJMP */
2503     mov_h   r31, r29
2504     mov_l   r30, r28
2505     XCALL   __tablejump2__
2506     waddi   28, 1
2507 #ifdef __AVR_HAVE_EIJMP_EICALL__
2508     adc     r16, __zero_reg__
2509 #endif /* HAVE_EIJMP */
2510 .L__do_global_dtors_start:
2511     cpi     r28, pm_lo8(__dtors_end)
2512     cpc     r29, cdtors_tst_reg
2513 #ifdef __AVR_HAVE_EIJMP_EICALL__
2514     ldi     r24, pm_hh8(__dtors_end)
2515     cpc     r16, r24
2516 #endif /* HAVE_EIJMP */
2517     brne    .L__do_global_dtors_loop
2518 ENDF __do_global_dtors
2519 #endif /* L_dtors */
2520
2521 #undef cdtors_tst_reg
2522
2523 .section .text.libgcc, "ax", @progbits
2524
2525 #if !defined (__AVR_TINY__)
2526 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2527 ;; Loading n bytes from Flash; n = 3,4
2528 ;; R22... = Flash[Z]
2529 ;; Clobbers: __tmp_reg__
2530
2531 #if (defined (L_load_3)        \
2532      || defined (L_load_4))    \
2533     && !defined (__AVR_HAVE_LPMX__)
2534
2535 ;; Destination
2536 #define D0  22
2537 #define D1  D0+1
2538 #define D2  D0+2
2539 #define D3  D0+3
2540
2541 .macro  .load dest, n
2542     lpm
2543     mov     \dest, r0
2544 .if \dest != D0+\n-1
2545     adiw    r30, 1
2546 .else
2547     sbiw    r30, \n-1
2548 .endif
2549 .endm
2550
2551 #if defined (L_load_3)
2552 DEFUN __load_3
2553     push  D3
2554     XCALL __load_4
2555     pop   D3
2556     ret
2557 ENDF __load_3
2558 #endif /* L_load_3 */
2559
2560 #if defined (L_load_4)
2561 DEFUN __load_4
2562     .load D0, 4
2563     .load D1, 4
2564     .load D2, 4
2565     .load D3, 4
2566     ret
2567 ENDF __load_4
2568 #endif /* L_load_4 */
2569
2570 #endif /* L_load_3 || L_load_3 */
2571 #endif /* !defined (__AVR_TINY__) */
2572
2573 #if !defined (__AVR_TINY__)
2574 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2575 ;; Loading n bytes from Flash or RAM;  n = 1,2,3,4
2576 ;; R22... = Flash[R21:Z] or RAM[Z] depending on R21.7
2577 ;; Clobbers: __tmp_reg__, R21, R30, R31
2578
2579 #if (defined (L_xload_1)            \
2580      || defined (L_xload_2)         \
2581      || defined (L_xload_3)         \
2582      || defined (L_xload_4))
2583
2584 ;; Destination
2585 #define D0  22
2586 #define D1  D0+1
2587 #define D2  D0+2
2588 #define D3  D0+3
2589
2590 ;; Register containing bits 16+ of the address
2591
2592 #define HHI8  21
2593
2594 .macro  .xload dest, n
2595 #if defined (__AVR_HAVE_ELPMX__)
2596     elpm    \dest, Z+
2597 #elif defined (__AVR_HAVE_ELPM__)
2598     elpm
2599     mov     \dest, r0
2600 .if \dest != D0+\n-1
2601     adiw    r30, 1
2602     adc     HHI8, __zero_reg__
2603     out     __RAMPZ__, HHI8
2604 .endif
2605 #elif defined (__AVR_HAVE_LPMX__)
2606     lpm     \dest, Z+
2607 #else
2608     lpm
2609     mov     \dest, r0
2610 .if \dest != D0+\n-1
2611     adiw    r30, 1
2612 .endif
2613 #endif
2614 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2615 .if \dest == D0+\n-1
2616     ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2617     out     __RAMPZ__, __zero_reg__
2618 .endif
2619 #endif
2620 .endm ; .xload
2621
2622 #if defined (L_xload_1)
2623 DEFUN __xload_1
2624 #if defined (__AVR_HAVE_LPMX__) && !defined (__AVR_HAVE_ELPM__)
2625     sbrc    HHI8, 7
2626     ld      D0, Z
2627     sbrs    HHI8, 7
2628     lpm     D0, Z
2629     ret
2630 #else
2631     sbrc    HHI8, 7
2632     rjmp    1f
2633 #if defined (__AVR_HAVE_ELPM__)
2634     out     __RAMPZ__, HHI8
2635 #endif /* __AVR_HAVE_ELPM__ */
2636     .xload  D0, 1
2637     ret
2638 1:  ld      D0, Z
2639     ret
2640 #endif /* LPMx && ! ELPM */
2641 ENDF __xload_1
2642 #endif /* L_xload_1 */
2643
2644 #if defined (L_xload_2)
2645 DEFUN __xload_2
2646     sbrc    HHI8, 7
2647     rjmp    1f
2648 #if defined (__AVR_HAVE_ELPM__)
2649     out     __RAMPZ__, HHI8
2650 #endif /* __AVR_HAVE_ELPM__ */
2651     .xload  D0, 2
2652     .xload  D1, 2
2653     ret
2654 1:  ld      D0, Z+
2655     ld      D1, Z+
2656     ret
2657 ENDF __xload_2
2658 #endif /* L_xload_2 */
2659
2660 #if defined (L_xload_3)
2661 DEFUN __xload_3
2662     sbrc    HHI8, 7
2663     rjmp    1f
2664 #if defined (__AVR_HAVE_ELPM__)
2665     out     __RAMPZ__, HHI8
2666 #endif /* __AVR_HAVE_ELPM__ */
2667     .xload  D0, 3
2668     .xload  D1, 3
2669     .xload  D2, 3
2670     ret
2671 1:  ld      D0, Z+
2672     ld      D1, Z+
2673     ld      D2, Z+
2674     ret
2675 ENDF __xload_3
2676 #endif /* L_xload_3 */
2677
2678 #if defined (L_xload_4)
2679 DEFUN __xload_4
2680     sbrc    HHI8, 7
2681     rjmp    1f
2682 #if defined (__AVR_HAVE_ELPM__)
2683     out     __RAMPZ__, HHI8
2684 #endif /* __AVR_HAVE_ELPM__ */
2685     .xload  D0, 4
2686     .xload  D1, 4
2687     .xload  D2, 4
2688     .xload  D3, 4
2689     ret
2690 1:  ld      D0, Z+
2691     ld      D1, Z+
2692     ld      D2, Z+
2693     ld      D3, Z+
2694     ret
2695 ENDF __xload_4
2696 #endif /* L_xload_4 */
2697
2698 #endif /* L_xload_{1|2|3|4} */
2699 #endif /* if !defined (__AVR_TINY__) */
2700
2701 #if !defined (__AVR_TINY__)
2702 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2703 ;; memcopy from Address Space __pgmx to RAM
2704 ;; R23:Z = Source Address
2705 ;; X     = Destination Address
2706 ;; Clobbers: __tmp_reg__, R23, R24, R25, X, Z
2707
2708 #if defined (L_movmemx)
2709
2710 #define HHI8  23
2711 #define LOOP  24
2712
2713 DEFUN __movmemx_qi
2714     ;; #Bytes to copy fity in 8 Bits (1..255)
2715     ;; Zero-extend Loop Counter
2716     clr     LOOP+1
2717     ;; FALLTHRU
2718 ENDF __movmemx_qi
2719
2720 DEFUN __movmemx_hi
2721
2722 ;; Read from where?
2723     sbrc    HHI8, 7
2724     rjmp    1f
2725
2726 ;; Read from Flash
2727
2728 #if defined (__AVR_HAVE_ELPM__)
2729     out     __RAMPZ__, HHI8
2730 #endif
2731
2732 0:  ;; Load 1 Byte from Flash...
2733
2734 #if defined (__AVR_HAVE_ELPMX__)
2735     elpm    r0, Z+
2736 #elif defined (__AVR_HAVE_ELPM__)
2737     elpm
2738     adiw    r30, 1
2739     adc     HHI8, __zero_reg__
2740     out     __RAMPZ__, HHI8
2741 #elif defined (__AVR_HAVE_LPMX__)
2742     lpm     r0, Z+
2743 #else
2744     lpm
2745     adiw    r30, 1
2746 #endif
2747
2748     ;; ...and store that Byte to RAM Destination
2749     st      X+, r0
2750     sbiw    LOOP, 1
2751     brne    0b
2752 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2753     ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2754     out __RAMPZ__, __zero_reg__
2755 #endif /* ELPM && RAMPD */
2756     ret
2757
2758 ;; Read from RAM
2759
2760 1:  ;; Read 1 Byte from RAM...
2761     ld      r0, Z+
2762     ;; and store that Byte to RAM Destination
2763     st      X+, r0
2764     sbiw    LOOP, 1
2765     brne    1b
2766     ret
2767 ENDF __movmemx_hi
2768
2769 #undef HHI8
2770 #undef LOOP
2771
2772 #endif /* L_movmemx */
2773 #endif /* !defined (__AVR_TINY__) */
2774
2775 \f
2776 .section .text.libgcc.builtins, "ax", @progbits
2777
2778 /**********************************
2779  * Find first set Bit (ffs)
2780  **********************************/
2781
2782 #if defined (L_ffssi2)
2783 ;; find first set bit
2784 ;; r25:r24 = ffs32 (r25:r22)
2785 ;; clobbers: r22, r26
2786 DEFUN __ffssi2
2787     clr  r26
2788     tst  r22
2789     brne 1f
2790     subi r26, -8
2791     or   r22, r23
2792     brne 1f
2793     subi r26, -8
2794     or   r22, r24
2795     brne 1f
2796     subi r26, -8
2797     or   r22, r25
2798     brne 1f
2799     ret
2800 1:  mov  r24, r22
2801     XJMP __loop_ffsqi2
2802 ENDF __ffssi2
2803 #endif /* defined (L_ffssi2) */
2804
2805 #if defined (L_ffshi2)
2806 ;; find first set bit
2807 ;; r25:r24 = ffs16 (r25:r24)
2808 ;; clobbers: r26
2809 DEFUN __ffshi2
2810     clr  r26
2811 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2812     ;; Some cores have problem skipping 2-word instruction
2813     tst  r24
2814     breq 2f
2815 #else
2816     cpse r24, __zero_reg__
2817 #endif /* __AVR_HAVE_JMP_CALL__ */
2818 1:  XJMP __loop_ffsqi2
2819 2:  ldi  r26, 8
2820     or   r24, r25
2821     brne 1b
2822     ret
2823 ENDF __ffshi2
2824 #endif /* defined (L_ffshi2) */
2825
2826 #if defined (L_loop_ffsqi2)
2827 ;; Helper for ffshi2, ffssi2
2828 ;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
2829 ;; r24 must be != 0
2830 ;; clobbers: r26
2831 DEFUN __loop_ffsqi2
2832     inc  r26
2833     lsr  r24
2834     brcc __loop_ffsqi2
2835     mov  r24, r26
2836     clr  r25
2837     ret
2838 ENDF __loop_ffsqi2
2839 #endif /* defined (L_loop_ffsqi2) */
2840
2841 \f
2842 /**********************************
2843  * Count trailing Zeros (ctz)
2844  **********************************/
2845
2846 #if defined (L_ctzsi2)
2847 ;; count trailing zeros
2848 ;; r25:r24 = ctz32 (r25:r22)
2849 ;; clobbers: r26, r22
2850 ;; ctz(0) = 255
2851 ;; Note that ctz(0) in undefined for GCC
2852 DEFUN __ctzsi2
2853     XCALL __ffssi2
2854     dec  r24
2855     ret
2856 ENDF __ctzsi2
2857 #endif /* defined (L_ctzsi2) */
2858
2859 #if defined (L_ctzhi2)
2860 ;; count trailing zeros
2861 ;; r25:r24 = ctz16 (r25:r24)
2862 ;; clobbers: r26
2863 ;; ctz(0) = 255
2864 ;; Note that ctz(0) in undefined for GCC
2865 DEFUN __ctzhi2
2866     XCALL __ffshi2
2867     dec  r24
2868     ret
2869 ENDF __ctzhi2
2870 #endif /* defined (L_ctzhi2) */
2871
2872 \f
2873 /**********************************
2874  * Count leading Zeros (clz)
2875  **********************************/
2876
2877 #if defined (L_clzdi2)
2878 ;; count leading zeros
2879 ;; r25:r24 = clz64 (r25:r18)
2880 ;; clobbers: r22, r23, r26
2881 DEFUN __clzdi2
2882     XCALL __clzsi2
2883     sbrs r24, 5
2884     ret
2885     mov_l r22, r18
2886     mov_h r23, r19
2887     mov_l r24, r20
2888     mov_h r25, r21
2889     XCALL __clzsi2
2890     subi r24, -32
2891     ret
2892 ENDF __clzdi2
2893 #endif /* defined (L_clzdi2) */
2894
2895 #if defined (L_clzsi2)
2896 ;; count leading zeros
2897 ;; r25:r24 = clz32 (r25:r22)
2898 ;; clobbers: r26
2899 DEFUN __clzsi2
2900     XCALL __clzhi2
2901     sbrs r24, 4
2902     ret
2903     mov_l r24, r22
2904     mov_h r25, r23
2905     XCALL __clzhi2
2906     subi r24, -16
2907     ret
2908 ENDF __clzsi2
2909 #endif /* defined (L_clzsi2) */
2910
2911 #if defined (L_clzhi2)
2912 ;; count leading zeros
2913 ;; r25:r24 = clz16 (r25:r24)
2914 ;; clobbers: r26
2915 DEFUN __clzhi2
2916     clr  r26
2917     tst  r25
2918     brne 1f
2919     subi r26, -8
2920     or   r25, r24
2921     brne 1f
2922     ldi  r24, 16
2923     ret
2924 1:  cpi  r25, 16
2925     brsh 3f
2926     subi r26, -3
2927     swap r25
2928 2:  inc  r26
2929 3:  lsl  r25
2930     brcc 2b
2931     mov  r24, r26
2932     clr  r25
2933     ret
2934 ENDF __clzhi2
2935 #endif /* defined (L_clzhi2) */
2936
2937 \f
2938 /**********************************
2939  * Parity
2940  **********************************/
2941
2942 #if defined (L_paritydi2)
2943 ;; r25:r24 = parity64 (r25:r18)
2944 ;; clobbers: __tmp_reg__
2945 DEFUN __paritydi2
2946     eor  r24, r18
2947     eor  r24, r19
2948     eor  r24, r20
2949     eor  r24, r21
2950     XJMP __paritysi2
2951 ENDF __paritydi2
2952 #endif /* defined (L_paritydi2) */
2953
2954 #if defined (L_paritysi2)
2955 ;; r25:r24 = parity32 (r25:r22)
2956 ;; clobbers: __tmp_reg__
2957 DEFUN __paritysi2
2958     eor  r24, r22
2959     eor  r24, r23
2960     XJMP __parityhi2
2961 ENDF __paritysi2
2962 #endif /* defined (L_paritysi2) */
2963
2964 #if defined (L_parityhi2)
2965 ;; r25:r24 = parity16 (r25:r24)
2966 ;; clobbers: __tmp_reg__
2967 DEFUN __parityhi2
2968     eor  r24, r25
2969 ;; FALLTHRU
2970 ENDF __parityhi2
2971
2972 ;; r25:r24 = parity8 (r24)
2973 ;; clobbers: __tmp_reg__
2974 DEFUN __parityqi2
2975     ;; parity is in r24[0..7]
2976     mov  __tmp_reg__, r24
2977     swap __tmp_reg__
2978     eor  r24, __tmp_reg__
2979     ;; parity is in r24[0..3]
2980     subi r24, -4
2981     andi r24, -5
2982     subi r24, -6
2983     ;; parity is in r24[0,3]
2984     sbrc r24, 3
2985     inc  r24
2986     ;; parity is in r24[0]
2987     andi r24, 1
2988     clr  r25
2989     ret
2990 ENDF __parityqi2
2991 #endif /* defined (L_parityhi2) */
2992
2993 \f
2994 /**********************************
2995  * Population Count
2996  **********************************/
2997
2998 #if defined (L_popcounthi2)
2999 ;; population count
3000 ;; r25:r24 = popcount16 (r25:r24)
3001 ;; clobbers: __tmp_reg__
3002 DEFUN __popcounthi2
3003     XCALL __popcountqi2
3004     push r24
3005     mov  r24, r25
3006     XCALL __popcountqi2
3007     clr  r25
3008     ;; FALLTHRU
3009 ENDF __popcounthi2
3010
3011 DEFUN __popcounthi2_tail
3012     pop   __tmp_reg__
3013     add   r24, __tmp_reg__
3014     ret
3015 ENDF __popcounthi2_tail
3016 #endif /* defined (L_popcounthi2) */
3017
3018 #if defined (L_popcountsi2)
3019 ;; population count
3020 ;; r25:r24 = popcount32 (r25:r22)
3021 ;; clobbers: __tmp_reg__
3022 DEFUN __popcountsi2
3023     XCALL __popcounthi2
3024     push  r24
3025     mov_l r24, r22
3026     mov_h r25, r23
3027     XCALL __popcounthi2
3028     XJMP  __popcounthi2_tail
3029 ENDF __popcountsi2
3030 #endif /* defined (L_popcountsi2) */
3031
3032 #if defined (L_popcountdi2)
3033 ;; population count
3034 ;; r25:r24 = popcount64 (r25:r18)
3035 ;; clobbers: r22, r23, __tmp_reg__
3036 DEFUN __popcountdi2
3037     XCALL __popcountsi2
3038     push  r24
3039     mov_l r22, r18
3040     mov_h r23, r19
3041     mov_l r24, r20
3042     mov_h r25, r21
3043     XCALL __popcountsi2
3044     XJMP  __popcounthi2_tail
3045 ENDF __popcountdi2
3046 #endif /* defined (L_popcountdi2) */
3047
3048 #if defined (L_popcountqi2)
3049 ;; population count
3050 ;; r24 = popcount8 (r24)
3051 ;; clobbers: __tmp_reg__
3052 DEFUN __popcountqi2
3053     mov  __tmp_reg__, r24
3054     andi r24, 1
3055     lsr  __tmp_reg__
3056     lsr  __tmp_reg__
3057     adc  r24, __zero_reg__
3058     lsr  __tmp_reg__
3059     adc  r24, __zero_reg__
3060     lsr  __tmp_reg__
3061     adc  r24, __zero_reg__
3062     lsr  __tmp_reg__
3063     adc  r24, __zero_reg__
3064     lsr  __tmp_reg__
3065     adc  r24, __zero_reg__
3066     lsr  __tmp_reg__
3067     adc  r24, __tmp_reg__
3068     ret
3069 ENDF __popcountqi2
3070 #endif /* defined (L_popcountqi2) */
3071
3072 \f
3073 /**********************************
3074  * Swap bytes
3075  **********************************/
3076
3077 ;; swap two registers with different register number
3078 .macro bswap a, b
3079     eor \a, \b
3080     eor \b, \a
3081     eor \a, \b
3082 .endm
3083
3084 #if defined (L_bswapsi2)
3085 ;; swap bytes
3086 ;; r25:r22 = bswap32 (r25:r22)
3087 DEFUN __bswapsi2
3088     bswap r22, r25
3089     bswap r23, r24
3090     ret
3091 ENDF __bswapsi2
3092 #endif /* defined (L_bswapsi2) */
3093
3094 #if defined (L_bswapdi2)
3095 ;; swap bytes
3096 ;; r25:r18 = bswap64 (r25:r18)
3097 DEFUN __bswapdi2
3098     bswap r18, r25
3099     bswap r19, r24
3100     bswap r20, r23
3101     bswap r21, r22
3102     ret
3103 ENDF __bswapdi2
3104 #endif /* defined (L_bswapdi2) */
3105
3106 \f
3107 /**********************************
3108  * 64-bit shifts
3109  **********************************/
3110
3111 #if defined (L_ashrdi3)
3112
3113 #define SS __zero_reg__
3114
3115 ;; Arithmetic shift right
3116 ;; r25:r18 = ashr64 (r25:r18, r17:r16)
3117 DEFUN __ashrdi3
3118     sbrc    r25, 7
3119     com     SS
3120     ;; FALLTHRU
3121 ENDF  __ashrdi3
3122
3123 ;; Logic shift right
3124 ;; r25:r18 = lshr64 (r25:r18, r17:r16)
3125 DEFUN __lshrdi3
3126     ;; Signs are in SS (zero_reg)
3127     mov     __tmp_reg__, r16
3128 0:  cpi     r16, 8
3129     brlo 2f
3130     subi    r16, 8
3131     mov     r18, r19
3132     mov     r19, r20
3133     mov     r20, r21
3134     mov     r21, r22
3135     mov     r22, r23
3136     mov     r23, r24
3137     mov     r24, r25
3138     mov     r25, SS
3139     rjmp 0b
3140 1:  asr     SS
3141     ror     r25
3142     ror     r24
3143     ror     r23
3144     ror     r22
3145     ror     r21
3146     ror     r20
3147     ror     r19
3148     ror     r18
3149 2:  dec     r16
3150     brpl 1b
3151     clr     __zero_reg__
3152     mov     r16, __tmp_reg__
3153     ret
3154 ENDF __lshrdi3
3155
3156 #undef SS
3157
3158 #endif /* defined (L_ashrdi3) */
3159
3160 #if defined (L_ashldi3)
3161 ;; Shift left
3162 ;; r25:r18 = ashl64 (r25:r18, r17:r16)
3163 ;; This function does not clobber T.
3164 DEFUN __ashldi3
3165     mov     __tmp_reg__, r16
3166 0:  cpi     r16, 8
3167     brlo 2f
3168     mov     r25, r24
3169     mov     r24, r23
3170     mov     r23, r22
3171     mov     r22, r21
3172     mov     r21, r20
3173     mov     r20, r19
3174     mov     r19, r18
3175     clr     r18
3176     subi    r16, 8
3177     rjmp 0b
3178 1:  lsl     r18
3179     rol     r19
3180     rol     r20
3181     rol     r21
3182     rol     r22
3183     rol     r23
3184     rol     r24
3185     rol     r25
3186 2:  dec     r16
3187     brpl 1b
3188     mov     r16, __tmp_reg__
3189     ret
3190 ENDF __ashldi3
3191 #endif /* defined (L_ashldi3) */
3192
3193 #if defined (L_rotldi3)
3194 ;; Rotate left
3195 ;; r25:r18 = rotl64 (r25:r18, r17:r16)
3196 DEFUN __rotldi3
3197     push    r16
3198 0:  cpi     r16, 8
3199     brlo 2f
3200     subi    r16, 8
3201     mov     __tmp_reg__, r25
3202     mov     r25, r24
3203     mov     r24, r23
3204     mov     r23, r22
3205     mov     r22, r21
3206     mov     r21, r20
3207     mov     r20, r19
3208     mov     r19, r18
3209     mov     r18, __tmp_reg__
3210     rjmp 0b
3211 1:  lsl     r18
3212     rol     r19
3213     rol     r20
3214     rol     r21
3215     rol     r22
3216     rol     r23
3217     rol     r24
3218     rol     r25
3219     adc     r18, __zero_reg__
3220 2:  dec     r16
3221     brpl 1b
3222     pop     r16
3223     ret
3224 ENDF __rotldi3
3225 #endif /* defined (L_rotldi3) */
3226
3227 \f
3228 .section .text.libgcc.fmul, "ax", @progbits
3229
3230 /***********************************************************/
3231 ;;; Softmul versions of FMUL, FMULS and FMULSU to implement
3232 ;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
3233 /***********************************************************/
3234
3235 #define A1 24
3236 #define B1 25
3237 #define C0 22
3238 #define C1 23
3239 #define A0 __tmp_reg__
3240
3241 #ifdef L_fmuls
3242 ;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
3243 ;;; Clobbers: r24, r25, __tmp_reg__
3244 DEFUN __fmuls
3245     ;; A0.7 = negate result?
3246     mov  A0, A1
3247     eor  A0, B1
3248     ;; B1 = |B1|
3249     sbrc B1, 7
3250     neg  B1
3251     XJMP __fmulsu_exit
3252 ENDF __fmuls
3253 #endif /* L_fmuls */
3254
3255 #ifdef L_fmulsu
3256 ;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
3257 ;;; Clobbers: r24, r25, __tmp_reg__
3258 DEFUN __fmulsu
3259     ;; A0.7 = negate result?
3260     mov  A0, A1
3261 ;; FALLTHRU
3262 ENDF __fmulsu
3263
3264 ;; Helper for __fmuls and __fmulsu
3265 DEFUN __fmulsu_exit
3266     ;; A1 = |A1|
3267     sbrc A1, 7
3268     neg  A1
3269 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
3270     ;; Some cores have problem skipping 2-word instruction
3271     tst  A0
3272     brmi 1f
3273 #else
3274     sbrs A0, 7
3275 #endif /* __AVR_HAVE_JMP_CALL__ */
3276     XJMP  __fmul
3277 1:  XCALL __fmul
3278     ;; C = -C iff A0.7 = 1
3279     NEG2 C0
3280     ret
3281 ENDF __fmulsu_exit
3282 #endif /* L_fmulsu */
3283
3284
3285 #ifdef L_fmul
3286 ;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
3287 ;;; Clobbers: r24, r25, __tmp_reg__
3288 DEFUN __fmul
3289     ; clear result
3290     clr   C0
3291     clr   C1
3292     clr   A0
3293 1:  tst   B1
3294     ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
3295 2:  brpl  3f
3296     ;; C += A
3297     add   C0, A0
3298     adc   C1, A1
3299 3:  ;; A >>= 1
3300     lsr   A1
3301     ror   A0
3302     ;; B <<= 1
3303     lsl   B1
3304     brne  2b
3305     ret
3306 ENDF __fmul
3307 #endif /* L_fmul */
3308
3309 #undef A0
3310 #undef A1
3311 #undef B1
3312 #undef C0
3313 #undef C1
3314
3315 #include "lib1funcs-fixed.S"