libgcc/config/avr/lib1funcs-fixed.S

   1 /*  -*- Mode: Asm -*-  */
   2 ;;    Copyright (C) 2012-2016 Free Software Foundation, Inc.
   3 ;;    Contributed by Sean D'Epagnier  (sean@depagnier.com)
   4 ;;                   Georg-Johann Lay (avr@gjlay.de)
   5
   6 ;; This file is free software; you can redistribute it and/or modify it
   7 ;; under the terms of the GNU General Public License as published by the
   8 ;; Free Software Foundation; either version 3, or (at your option) any
   9 ;; later version.
  10
  11 ;; In addition to the permissions in the GNU General Public License, the
  12 ;; Free Software Foundation gives you unlimited permission to link the
  13 ;; compiled version of this file into combinations with other programs,
  14 ;; and to distribute those combinations without any restriction coming
  15 ;; from the use of this file.  (The General Public License restrictions
  16 ;; do apply in other respects; for example, they cover modification of
  17 ;; the file, and distribution when not linked into a combine
  18 ;; executable.)
  19
  20 ;; This file is distributed in the hope that it will be useful, but
  21 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  22 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  23 ;; General Public License for more details.
  24
  25 ;; You should have received a copy of the GNU General Public License
  26 ;; along with this program; see the file COPYING.  If not, write to
  27 ;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
  28 ;; Boston, MA 02110-1301, USA.
  29
  30 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  31 ;; Fixed point library routines for AVR
  32 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  33
  34 #if defined __AVR_TINY__
  35 #define __zero_reg__ r17
  36 #define __tmp_reg__ r16
  37 #else
  38 #define __zero_reg__ r1
  39 #define __tmp_reg__ r0
  40 #endif
  41
  42 .section .text.libgcc.fixed, "ax", @progbits
  43
  44 #ifndef __AVR_TINY__
  45
  46 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  47 ;; Conversions to float
  48 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  49
  50 #if defined (L_fractqqsf)
  51 DEFUN __fractqqsf
  52     ;; Move in place for SA -> SF conversion
  53     clr     r22
  54     mov     r23, r24
  55     ;; Sign-extend
  56     lsl     r24
  57     sbc     r24, r24
  58     mov     r25, r24
  59     XJMP    __fractsasf
  60 ENDF __fractqqsf
  61 #endif  /* L_fractqqsf */
  62
  63 #if defined (L_fractuqqsf)
  64 DEFUN __fractuqqsf
  65     ;; Move in place for USA -> SF conversion
  66     clr     r22
  67     mov     r23, r24
  68     ;; Zero-extend
  69     clr     r24
  70     clr     r25
  71     XJMP    __fractusasf
  72 ENDF __fractuqqsf
  73 #endif  /* L_fractuqqsf */
  74
  75 #if defined (L_fracthqsf)
  76 DEFUN __fracthqsf
  77     ;; Move in place for SA -> SF conversion
  78     wmov    22, 24
  79     ;; Sign-extend
  80     lsl     r25
  81     sbc     r24, r24
  82     mov     r25, r24
  83     XJMP    __fractsasf
  84 ENDF __fracthqsf
  85 #endif  /* L_fracthqsf */
  86
  87 #if defined (L_fractuhqsf)
  88 DEFUN __fractuhqsf
  89     ;; Move in place for USA -> SF conversion
  90     wmov    22, 24
  91     ;; Zero-extend
  92     clr     r24
  93     clr     r25
  94     XJMP    __fractusasf
  95 ENDF __fractuhqsf
  96 #endif  /* L_fractuhqsf */
  97
  98 #if defined (L_fracthasf)
  99 DEFUN __fracthasf
 100     ;; Move in place for SA -> SF conversion
 101     clr     r22
 102     mov     r23, r24
 103     mov     r24, r25
 104     ;; Sign-extend
 105     lsl     r25
 106     sbc     r25, r25
 107     XJMP    __fractsasf
 108 ENDF __fracthasf
 109 #endif  /* L_fracthasf */
 110
 111 #if defined (L_fractuhasf)
 112 DEFUN __fractuhasf
 113     ;; Move in place for USA -> SF conversion
 114     clr     r22
 115     mov     r23, r24
 116     mov     r24, r25
 117     ;; Zero-extend
 118     clr     r25
 119     XJMP    __fractusasf
 120 ENDF __fractuhasf
 121 #endif  /* L_fractuhasf */
 122
 123
 124 #if defined (L_fractsqsf)
 125 DEFUN __fractsqsf
 126     XCALL   __floatsisf
 127     ;; Divide non-zero results by 2^31 to move the
 128     ;; decimal point into place
 129     tst     r25
 130     breq    0f
 131     subi    r24, exp_lo (31)
 132     sbci    r25, exp_hi (31)
 133 0:  ret
 134 ENDF __fractsqsf
 135 #endif  /* L_fractsqsf */
 136
 137 #if defined (L_fractusqsf)
 138 DEFUN __fractusqsf
 139     XCALL   __floatunsisf
 140     ;; Divide non-zero results by 2^32 to move the
 141     ;; decimal point into place
 142     cpse    r25, __zero_reg__
 143     subi    r25, exp_hi (32)
 144     ret
 145 ENDF __fractusqsf
 146 #endif  /* L_fractusqsf */
 147
 148 #if defined (L_fractsasf)
 149 DEFUN __fractsasf
 150     XCALL   __floatsisf
 151     ;; Divide non-zero results by 2^15 to move the
 152     ;; decimal point into place
 153     tst     r25
 154     breq    0f
 155     subi    r24, exp_lo (15)
 156     sbci    r25, exp_hi (15)
 157 0:  ret
 158 ENDF __fractsasf
 159 #endif  /* L_fractsasf */
 160
 161 #if defined (L_fractusasf)
 162 DEFUN __fractusasf
 163     XCALL   __floatunsisf
 164     ;; Divide non-zero results by 2^16 to move the
 165     ;; decimal point into place
 166     cpse    r25, __zero_reg__
 167     subi    r25, exp_hi (16)
 168     ret
 169 ENDF __fractusasf
 170 #endif  /* L_fractusasf */
 171
 172 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 173 ;; Conversions from float
 174 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 175
 176 #if defined (L_fractsfqq)
 177 DEFUN __fractsfqq
 178     ;; Multiply with 2^{24+7} to get a QQ result in r25
 179     subi    r24, exp_lo (-31)
 180     sbci    r25, exp_hi (-31)
 181     XCALL   __fixsfsi
 182     mov     r24, r25
 183     ret
 184 ENDF __fractsfqq
 185 #endif  /* L_fractsfqq */
 186
 187 #if defined (L_fractsfuqq)
 188 DEFUN __fractsfuqq
 189     ;; Multiply with 2^{24+8} to get a UQQ result in r25
 190     subi    r25, exp_hi (-32)
 191     XCALL   __fixunssfsi
 192     mov     r24, r25
 193     ret
 194 ENDF __fractsfuqq
 195 #endif  /* L_fractsfuqq */
 196
 197 #if defined (L_fractsfha)
 198 DEFUN __fractsfha
 199     ;; Multiply with 2^{16+7} to get a HA result in r25:r24
 200     subi    r24, exp_lo (-23)
 201     sbci    r25, exp_hi (-23)
 202     XJMP    __fixsfsi
 203 ENDF __fractsfha
 204 #endif  /* L_fractsfha */
 205
 206 #if defined (L_fractsfuha)
 207 DEFUN __fractsfuha
 208     ;; Multiply with 2^24 to get a UHA result in r25:r24
 209     subi    r25, exp_hi (-24)
 210     XJMP    __fixunssfsi
 211 ENDF __fractsfuha
 212 #endif  /* L_fractsfuha */
 213
 214 #if defined (L_fractsfhq)
 215 FALIAS __fractsfsq
 216
 217 DEFUN __fractsfhq
 218     ;; Multiply with 2^{16+15} to get a HQ result in r25:r24
 219     ;; resp. with 2^31 to get a SQ result in r25:r22
 220     subi    r24, exp_lo (-31)
 221     sbci    r25, exp_hi (-31)
 222     XJMP    __fixsfsi
 223 ENDF __fractsfhq
 224 #endif  /* L_fractsfhq */
 225
 226 #if defined (L_fractsfuhq)
 227 FALIAS __fractsfusq
 228
 229 DEFUN __fractsfuhq
 230     ;; Multiply with 2^{16+16} to get a UHQ result in r25:r24
 231     ;; resp. with 2^32 to get a USQ result in r25:r22
 232     subi    r25, exp_hi (-32)
 233     XJMP    __fixunssfsi
 234 ENDF __fractsfuhq
 235 #endif  /* L_fractsfuhq */
 236
 237 #if defined (L_fractsfsa)
 238 DEFUN __fractsfsa
 239     ;; Multiply with 2^15 to get a SA result in r25:r22
 240     subi    r24, exp_lo (-15)
 241     sbci    r25, exp_hi (-15)
 242     XJMP    __fixsfsi
 243 ENDF __fractsfsa
 244 #endif  /* L_fractsfsa */
 245
 246 #if defined (L_fractsfusa)
 247 DEFUN __fractsfusa
 248     ;; Multiply with 2^16 to get a USA result in r25:r22
 249     subi    r25, exp_hi (-16)
 250     XJMP    __fixunssfsi
 251 ENDF __fractsfusa
 252 #endif  /* L_fractsfusa */
 253
 254
 255 ;; For multiplication the functions here are called directly from
 256 ;; avr-fixed.md instead of using the standard libcall mechanisms.
 257 ;; This can make better code because GCC knows exactly which
 258 ;; of the call-used registers (not all of them) are clobbered.  */
 259
 260 /*******************************************************
 261     Fractional  Multiplication  8 x 8  without MUL
 262 *******************************************************/
 263
 264 #if defined (L_mulqq3) && !defined (__AVR_HAVE_MUL__)
 265 ;;; R23 = R24 * R25
 266 ;;; Clobbers: __tmp_reg__, R22, R24, R25
 267 ;;; Rounding: ???
 268 DEFUN __mulqq3
 269     XCALL   __fmuls
 270     ;; TR 18037 requires that  (-1) * (-1)  does not overflow
 271     ;; The only input that can produce  -1  is  (-1)^2.
 272     dec     r23
 273     brvs    0f
 274     inc     r23
 275 0:  ret
 276 ENDF  __mulqq3
 277 #endif /* L_mulqq3 && ! HAVE_MUL */
 278
 279 /*******************************************************
 280     Fractional Multiply  .16 x .16  with and without MUL
 281 *******************************************************/
 282
 283 #if defined (L_mulhq3)
 284 ;;; Same code with and without MUL, but the interfaces differ:
 285 ;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25)
 286 ;;;         Clobbers: ABI, called by optabs
 287 ;;; MUL:    (R25:R24) = (R19:R18) * (R27:R26)
 288 ;;;         Clobbers: __tmp_reg__, R22, R23
 289 ;;; Rounding:  -0.5 LSB  <= error  <=  0.5 LSB
 290 DEFUN   __mulhq3
 291     XCALL   __mulhisi3
 292     ;; Shift result into place
 293     lsl     r23
 294     rol     r24
 295     rol     r25
 296     brvs    1f
 297     ;; Round
 298     sbrc    r23, 7
 299     adiw    r24, 1
 300     ret
 301 1:  ;; Overflow.  TR 18037 requires  (-1)^2  not to overflow
 302     ldi     r24, lo8 (0x7fff)
 303     ldi     r25, hi8 (0x7fff)
 304     ret
 305 ENDF __mulhq3
 306 #endif  /* defined (L_mulhq3) */
 307
 308 #if defined (L_muluhq3)
 309 ;;; Same code with and without MUL, but the interfaces differ:
 310 ;;; no MUL: (R25:R24) *= (R23:R22)
 311 ;;;         Clobbers: ABI, called by optabs
 312 ;;; MUL:    (R25:R24) = (R19:R18) * (R27:R26)
 313 ;;;         Clobbers: __tmp_reg__, R22, R23
 314 ;;; Rounding:  -0.5 LSB  <  error  <=  0.5 LSB
 315 DEFUN   __muluhq3
 316     XCALL   __umulhisi3
 317     ;; Round
 318     sbrc    r23, 7
 319     adiw    r24, 1
 320     ret
 321 ENDF __muluhq3
 322 #endif  /* L_muluhq3 */
 323
 324
 325 /*******************************************************
 326     Fixed  Multiply  8.8 x 8.8  with and without MUL
 327 *******************************************************/
 328
 329 #if defined (L_mulha3)
 330 ;;; Same code with and without MUL, but the interfaces differ:
 331 ;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25)
 332 ;;;         Clobbers: ABI, called by optabs
 333 ;;; MUL:    (R25:R24) = (R19:R18) * (R27:R26)
 334 ;;;         Clobbers: __tmp_reg__, R22, R23
 335 ;;; Rounding:  -0.5 LSB  <=  error  <=  0.5 LSB
 336 DEFUN   __mulha3
 337     XCALL   __mulhisi3
 338     lsl     r22
 339     rol     r23
 340     rol     r24
 341     XJMP    __muluha3_round
 342 ENDF __mulha3
 343 #endif  /* L_mulha3 */
 344
 345 #if defined (L_muluha3)
 346 ;;; Same code with and without MUL, but the interfaces differ:
 347 ;;; no MUL: (R25:R24) *= (R23:R22)
 348 ;;;         Clobbers: ABI, called by optabs
 349 ;;; MUL:    (R25:R24) = (R19:R18) * (R27:R26)
 350 ;;;         Clobbers: __tmp_reg__, R22, R23
 351 ;;; Rounding:  -0.5 LSB  <  error  <=  0.5 LSB
 352 DEFUN   __muluha3
 353     XCALL   __umulhisi3
 354     XJMP    __muluha3_round
 355 ENDF __muluha3
 356 #endif  /* L_muluha3 */
 357
 358 #if defined (L_muluha3_round)
 359 DEFUN   __muluha3_round
 360     ;; Shift result into place
 361     mov     r25, r24
 362     mov     r24, r23
 363     ;; Round
 364     sbrc    r22, 7
 365     adiw    r24, 1
 366     ret
 367 ENDF __muluha3_round
 368 #endif  /* L_muluha3_round */
 369
 370
 371 /*******************************************************
 372     Fixed  Multiplication  16.16 x 16.16
 373 *******************************************************/
 374
 375 ;; Bits outside the result (below LSB), used in the signed version
 376 #define GUARD __tmp_reg__
 377
 378 #if defined (__AVR_HAVE_MUL__)
 379
 380 ;; Multiplier
 381 #define A0  16
 382 #define A1  A0+1
 383 #define A2  A1+1
 384 #define A3  A2+1
 385
 386 ;; Multiplicand
 387 #define B0  20
 388 #define B1  B0+1
 389 #define B2  B1+1
 390 #define B3  B2+1
 391
 392 ;; Result
 393 #define C0  24
 394 #define C1  C0+1
 395 #define C2  C1+1
 396 #define C3  C2+1
 397
 398 #if defined (L_mulusa3)
 399 ;;; (C3:C0) = (A3:A0) * (B3:B0)
 400 DEFUN __mulusa3
 401     set
 402     ;; Fallthru
 403 ENDF  __mulusa3
 404
 405 ;;; Round for last digit iff T = 1
 406 ;;; Return guard bits in GUARD (__tmp_reg__).
 407 ;;; Rounding, T = 0:  -1.0 LSB  <  error  <=  0   LSB
 408 ;;; Rounding, T = 1:  -0.5 LSB  <  error  <=  0.5 LSB
 409 DEFUN __mulusa3_round
 410     ;; Some of the MUL instructions have LSBs outside the result.
 411     ;; Don't ignore these LSBs in order to tame rounding error.
 412     ;; Use C2/C3 for these LSBs.
 413
 414     clr C0
 415     clr C1
 416     mul A0, B0  $  movw C2, r0
 417
 418     mul A1, B0  $  add  C3, r0  $  adc C0, r1
 419     mul A0, B1  $  add  C3, r0  $  adc C0, r1  $  rol C1
 420
 421     ;; Round if T = 1.  Store guarding bits outside the result for rounding
 422     ;; and left-shift by the signed version (function below).
 423     brtc 0f
 424     sbrc C3, 7
 425     adiw C0, 1
 426 0:  push C3
 427
 428     ;; The following MULs don't have LSBs outside the result.
 429     ;; C2/C3 is the high part.
 430
 431     mul  A0, B2  $  add C0, r0  $  adc C1, r1  $  sbc  C2, C2
 432     mul  A1, B1  $  add C0, r0  $  adc C1, r1  $  sbci C2, 0
 433     mul  A2, B0  $  add C0, r0  $  adc C1, r1  $  sbci C2, 0
 434     neg  C2
 435
 436     mul  A0, B3  $  add C1, r0  $  adc C2, r1  $  sbc  C3, C3
 437     mul  A1, B2  $  add C1, r0  $  adc C2, r1  $  sbci C3, 0
 438     mul  A2, B1  $  add C1, r0  $  adc C2, r1  $  sbci C3, 0
 439     mul  A3, B0  $  add C1, r0  $  adc C2, r1  $  sbci C3, 0
 440     neg  C3
 441
 442     mul  A1, B3  $  add C2, r0  $  adc C3, r1
 443     mul  A2, B2  $  add C2, r0  $  adc C3, r1
 444     mul  A3, B1  $  add C2, r0  $  adc C3, r1
 445
 446     mul  A2, B3  $  add C3, r0
 447     mul  A3, B2  $  add C3, r0
 448
 449     ;; Guard bits used in the signed version below.
 450     pop  GUARD
 451     clr  __zero_reg__
 452     ret
 453 ENDF __mulusa3_round
 454 #endif /* L_mulusa3 */
 455
 456 #if defined (L_mulsa3)
 457 ;;; (C3:C0) = (A3:A0) * (B3:B0)
 458 ;;; Clobbers: __tmp_reg__, T
 459 ;;; Rounding:  -0.5 LSB  <=  error  <=  0.5 LSB
 460 DEFUN __mulsa3
 461     clt
 462     XCALL   __mulusa3_round
 463     ;; A posteriori sign extension of the operands
 464     tst     B3
 465     brpl 1f
 466     sub     C2, A0
 467     sbc     C3, A1
 468 1:  sbrs    A3, 7
 469     rjmp 2f
 470     sub     C2, B0
 471     sbc     C3, B1
 472 2:
 473     ;;  Shift 1 bit left to adjust for 15 fractional bits
 474     lsl     GUARD
 475     rol     C0
 476     rol     C1
 477     rol     C2
 478     rol     C3
 479     ;; Round last digit
 480     lsl     GUARD
 481     adc     C0, __zero_reg__
 482     adc     C1, __zero_reg__
 483     adc     C2, __zero_reg__
 484     adc     C3, __zero_reg__
 485     ret
 486 ENDF __mulsa3
 487 #endif /* L_mulsa3 */
 488
 489 #undef A0
 490 #undef A1
 491 #undef A2
 492 #undef A3
 493 #undef B0
 494 #undef B1
 495 #undef B2
 496 #undef B3
 497 #undef C0
 498 #undef C1
 499 #undef C2
 500 #undef C3
 501
 502 #else /* __AVR_HAVE_MUL__ */
 503
 504 #define A0 18
 505 #define A1 A0+1
 506 #define A2 A0+2
 507 #define A3 A0+3
 508
 509 #define B0 22
 510 #define B1 B0+1
 511 #define B2 B0+2
 512 #define B3 B0+3
 513
 514 #define C0  22
 515 #define C1  C0+1
 516 #define C2  C0+2
 517 #define C3  C0+3
 518
 519 ;; __tmp_reg__
 520 #define CC0  0
 521 ;; __zero_reg__
 522 #define CC1  1
 523 #define CC2  16
 524 #define CC3  17
 525
 526 #define AA0  26
 527 #define AA1  AA0+1
 528 #define AA2  30
 529 #define AA3  AA2+1
 530
 531 #if defined (L_mulsa3)
 532 ;;; (R25:R22)  *=  (R21:R18)
 533 ;;; Clobbers: ABI, called by optabs
 534 ;;; Rounding:  -1 LSB  <=  error  <=  1 LSB
 535 DEFUN   __mulsa3
 536     push    B0
 537     push    B1
 538     push    B3
 539     clt
 540     XCALL   __mulusa3_round
 541     pop     r30
 542     ;; sign-extend B
 543     bst     r30, 7
 544     brtc 1f
 545     ;; A1, A0 survived in  R27:R26
 546     sub     C2, AA0
 547     sbc     C3, AA1
 548 1:
 549     pop     AA1  ;; B1
 550     pop     AA0  ;; B0
 551
 552     ;; sign-extend A.  A3 survived in  R31
 553     bst     AA3, 7
 554     brtc 2f
 555     sub     C2, AA0
 556     sbc     C3, AA1
 557 2:
 558     ;;  Shift 1 bit left to adjust for 15 fractional bits
 559     lsl     GUARD
 560     rol     C0
 561     rol     C1
 562     rol     C2
 563     rol     C3
 564     ;; Round last digit
 565     lsl     GUARD
 566     adc     C0, __zero_reg__
 567     adc     C1, __zero_reg__
 568     adc     C2, __zero_reg__
 569     adc     C3, __zero_reg__
 570     ret
 571 ENDF __mulsa3
 572 #endif  /* L_mulsa3 */
 573
 574 #if defined (L_mulusa3)
 575 ;;; (R25:R22)  *=  (R21:R18)
 576 ;;; Clobbers: ABI, called by optabs
 577 ;;; Rounding:  -1 LSB  <=  error  <=  1 LSB
 578 DEFUN __mulusa3
 579     set
 580     ;; Fallthru
 581 ENDF  __mulusa3
 582
 583 ;;; A[] survives in 26, 27, 30, 31
 584 ;;; Also used by __mulsa3 with T = 0
 585 ;;; Round if T = 1
 586 ;;; Return Guard bits in GUARD (__tmp_reg__), used by signed version.
 587 DEFUN __mulusa3_round
 588     push    CC2
 589     push    CC3
 590     ; clear result
 591     clr     __tmp_reg__
 592     wmov    CC2, CC0
 593     ; save multiplicand
 594     wmov    AA0, A0
 595     wmov    AA2, A2
 596     rjmp 3f
 597
 598     ;; Loop the integral part
 599
 600 1:  ;; CC += A * 2^n;  n >= 0
 601     add  CC0,A0  $  adc CC1,A1  $  adc  CC2,A2  $  adc  CC3,A3
 602
 603 2:  ;; A <<= 1
 604     lsl  A0      $  rol A1      $  rol  A2      $  rol  A3
 605
 606 3:  ;; IBIT(B) >>= 1
 607     ;; Carry = n-th bit of B;  n >= 0
 608     lsr     B3
 609     ror     B2
 610     brcs 1b
 611     sbci    B3, 0
 612     brne 2b
 613
 614     ;; Loop the fractional part
 615     ;; B2/B3 is 0 now, use as guard bits for rounding
 616     ;; Restore multiplicand
 617     wmov    A0, AA0
 618     wmov    A2, AA2
 619     rjmp 5f
 620
 621 4:  ;; CC += A:Guard * 2^n;  n < 0
 622     add  B3,B2 $  adc  CC0,A0  $  adc  CC1,A1  $  adc  CC2,A2  $  adc  CC3,A3
 623 5:
 624     ;; A:Guard >>= 1
 625     lsr  A3   $  ror  A2  $  ror  A1  $  ror   A0  $   ror  B2
 626
 627     ;; FBIT(B) <<= 1
 628     ;; Carry = n-th bit of B;  n < 0
 629     lsl     B0
 630     rol     B1
 631     brcs 4b
 632     sbci    B0, 0
 633     brne 5b
 634
 635     ;; Save guard bits and set carry for rounding
 636     push    B3
 637     lsl     B3
 638     ;; Move result into place
 639     wmov    C2, CC2
 640     wmov    C0, CC0
 641     clr     __zero_reg__
 642     brtc 6f
 643     ;; Round iff T = 1
 644     adc     C0, __zero_reg__
 645     adc     C1, __zero_reg__
 646     adc     C2, __zero_reg__
 647     adc     C3, __zero_reg__
 648 6:
 649     pop     GUARD
 650     ;; Epilogue
 651     pop     CC3
 652     pop     CC2
 653     ret
 654 ENDF __mulusa3_round
 655 #endif  /* L_mulusa3 */
 656
 657 #undef A0
 658 #undef A1
 659 #undef A2
 660 #undef A3
 661 #undef B0
 662 #undef B1
 663 #undef B2
 664 #undef B3
 665 #undef C0
 666 #undef C1
 667 #undef C2
 668 #undef C3
 669 #undef AA0
 670 #undef AA1
 671 #undef AA2
 672 #undef AA3
 673 #undef CC0
 674 #undef CC1
 675 #undef CC2
 676 #undef CC3
 677
 678 #endif /* __AVR_HAVE_MUL__ */
 679
 680 #undef GUARD
 681
 682 /***********************************************************
 683     Fixed  unsigned saturated Multiplication  8.8 x 8.8
 684 ***********************************************************/
 685
 686 #define C0  22
 687 #define C1  C0+1
 688 #define C2  C0+2
 689 #define C3  C0+3
 690 #define SS __tmp_reg__
 691
 692 #if defined (L_usmuluha3)
 693 DEFUN __usmuluha3
 694     ;; Widening multiply
 695 #ifdef __AVR_HAVE_MUL__
 696     ;; Adjust interface
 697     movw    R26, R22
 698     movw    R18, R24
 699 #endif /* HAVE MUL */
 700     XCALL   __umulhisi3
 701     tst     C3
 702     brne .Lmax
 703     ;; Round, target is in C1..C2
 704     lsl     C0
 705     adc     C1, __zero_reg__
 706     adc     C2, __zero_reg__
 707     brcs .Lmax
 708     ;; Move result into place
 709     mov     C3, C2
 710     mov     C2, C1
 711     ret
 712 .Lmax:
 713     ;; Saturate
 714     ldi     C2, 0xff
 715     ldi     C3, 0xff
 716     ret
 717 ENDF  __usmuluha3
 718 #endif /* L_usmuluha3 */
 719
 720 /***********************************************************
 721     Fixed signed saturated Multiplication  s8.7 x s8.7
 722 ***********************************************************/
 723
 724 #if defined (L_ssmulha3)
 725 DEFUN __ssmulha3
 726     ;; Widening multiply
 727 #ifdef __AVR_HAVE_MUL__
 728     ;; Adjust interface
 729     movw    R26, R22
 730     movw    R18, R24
 731 #endif /* HAVE MUL */
 732     XCALL   __mulhisi3
 733     ;; Adjust decimal point
 734     lsl     C0
 735     rol     C1
 736     rol     C2
 737     brvs .LsatC3.3
 738     ;; The 9 MSBs must be the same
 739     rol     C3
 740     sbc     SS, SS
 741     cp      C3, SS
 742     brne .LsatSS
 743     ;; Round
 744     lsl     C0
 745     adc     C1, __zero_reg__
 746     adc     C2, __zero_reg__
 747     brvs .Lmax
 748     ;; Move result into place
 749     mov    C3, C2
 750     mov    C2, C1
 751     ret
 752 .Lmax:
 753     ;; Load 0x7fff
 754     clr     C3
 755 .LsatC3.3:
 756     ;; C3 <  0 -->  0x8000
 757     ;; C3 >= 0 -->  0x7fff
 758     mov     SS, C3
 759 .LsatSS:
 760     ;; Load min / max value:
 761     ;; SS = -1  -->  0x8000
 762     ;; SS =  0  -->  0x7fff
 763     ldi     C3, 0x7f
 764     ldi     C2, 0xff
 765     sbrc    SS, 7
 766     adiw    C2, 1
 767     ret
 768 ENDF  __ssmulha3
 769 #endif /* L_ssmulha3 */
 770
 771 #undef C0
 772 #undef C1
 773 #undef C2
 774 #undef C3
 775 #undef SS
 776
 777 /***********************************************************
 778     Fixed  unsigned saturated Multiplication  16.16 x 16.16
 779 ***********************************************************/
 780
 781 #define C0  18
 782 #define C1  C0+1
 783 #define C2  C0+2
 784 #define C3  C0+3
 785 #define C4  C0+4
 786 #define C5  C0+5
 787 #define C6  C0+6
 788 #define C7  C0+7
 789 #define SS __tmp_reg__
 790
 791 #if defined (L_usmulusa3)
 792 ;; R22[4] = R22[4] *{ssat} R18[4]
 793 ;; Ordinary ABI function
 794 DEFUN __usmulusa3
 795     ;; Widening multiply
 796     XCALL   __umulsidi3
 797     or      C7, C6
 798     brne .Lmax
 799     ;; Round, target is in C2..C5
 800     lsl     C1
 801     adc     C2, __zero_reg__
 802     adc     C3, __zero_reg__
 803     adc     C4, __zero_reg__
 804     adc     C5, __zero_reg__
 805     brcs .Lmax
 806     ;; Move result into place
 807     wmov    C6, C4
 808     wmov    C4, C2
 809     ret
 810 .Lmax:
 811     ;; Saturate
 812     ldi     C7, 0xff
 813     ldi     C6, 0xff
 814     wmov    C4, C6
 815     ret
 816 ENDF  __usmulusa3
 817 #endif /* L_usmulusa3 */
 818
 819 /***********************************************************
 820     Fixed signed saturated Multiplication  s16.15 x s16.15
 821 ***********************************************************/
 822
 823 #if defined (L_ssmulsa3)
 824 ;; R22[4] = R22[4] *{ssat} R18[4]
 825 ;; Ordinary ABI function
 826 DEFUN __ssmulsa3
 827     ;; Widening multiply
 828     XCALL   __mulsidi3
 829     ;; Adjust decimal point
 830     lsl     C1
 831     rol     C2
 832     rol     C3
 833     rol     C4
 834     rol     C5
 835     brvs .LsatC7.7
 836     ;; The 17 MSBs must be the same
 837     rol     C6
 838     rol     C7
 839     sbc     SS, SS
 840     cp      C6, SS
 841     cpc     C7, SS
 842     brne .LsatSS
 843     ;; Round
 844     lsl     C1
 845     adc     C2, __zero_reg__
 846     adc     C3, __zero_reg__
 847     adc     C4, __zero_reg__
 848     adc     C5, __zero_reg__
 849     brvs .Lmax
 850     ;; Move result into place
 851     wmov    C6, C4
 852     wmov    C4, C2
 853     ret
 854
 855 .Lmax:
 856     ;; Load 0x7fffffff
 857     clr     C7
 858 .LsatC7.7:
 859     ;; C7 <  0 -->  0x80000000
 860     ;; C7 >= 0 -->  0x7fffffff
 861     lsl     C7
 862     sbc     SS, SS
 863 .LsatSS:
 864     ;; Load min / max value:
 865     ;; SS = -1  -->  0x80000000
 866     ;; SS =  0  -->  0x7fffffff
 867     com     SS
 868     mov     C4, SS
 869     mov     C5, C4
 870     wmov    C6, C4
 871     subi    C7, 0x80
 872     ret
 873 ENDF  __ssmulsa3
 874 #endif /* L_ssmulsa3 */
 875
 876 #undef C0
 877 #undef C1
 878 #undef C2
 879 #undef C3
 880 #undef C4
 881 #undef C5
 882 #undef C6
 883 #undef C7
 884 #undef SS
 885
 886 /*******************************************************
 887       Fractional Division 8 / 8
 888 *******************************************************/
 889
 890 #define r_divd  r25     /* dividend */
 891 #define r_quo   r24     /* quotient */
 892 #define r_div   r22     /* divisor */
 893 #define r_sign  __tmp_reg__
 894
 895 #if defined (L_divqq3)
 896 DEFUN   __divqq3
 897     mov     r_sign, r_divd
 898     eor     r_sign, r_div
 899     sbrc    r_div, 7
 900     neg     r_div
 901     sbrc    r_divd, 7
 902     neg     r_divd
 903     XCALL   __divqq_helper
 904     lsr     r_quo
 905     sbrc    r_sign, 7   ; negate result if needed
 906     neg     r_quo
 907     ret
 908 ENDF __divqq3
 909 #endif  /* L_divqq3 */
 910
 911 #if defined (L_udivuqq3)
 912 DEFUN   __udivuqq3
 913     cp      r_divd, r_div
 914     brsh    0f
 915     XJMP __divqq_helper
 916     ;; Result is out of [0, 1)  ==>  Return 1 - eps.
 917 0:  ldi     r_quo, 0xff
 918     ret
 919 ENDF __udivuqq3
 920 #endif  /* L_udivuqq3 */
 921
 922
 923 #if defined (L_divqq_helper)
 924 DEFUN   __divqq_helper
 925     clr     r_quo           ; clear quotient
 926     inc     __zero_reg__    ; init loop counter, used per shift
 927 __udivuqq3_loop:
 928     lsl     r_divd          ; shift dividend
 929     brcs    0f              ; dividend overflow
 930     cp      r_divd,r_div    ; compare dividend & divisor
 931     brcc    0f              ; dividend >= divisor
 932     rol     r_quo           ; shift quotient (with CARRY)
 933     rjmp    __udivuqq3_cont
 934 0:
 935     sub     r_divd,r_div    ; restore dividend
 936     lsl     r_quo           ; shift quotient (without CARRY)
 937 __udivuqq3_cont:
 938     lsl     __zero_reg__    ; shift loop-counter bit
 939     brne    __udivuqq3_loop
 940     com     r_quo           ; complement result
 941                             ; because C flag was complemented in loop
 942     ret
 943 ENDF __divqq_helper
 944 #endif  /* L_divqq_helper */
 945
 946 #undef  r_divd
 947 #undef  r_quo
 948 #undef  r_div
 949 #undef  r_sign
 950
 951
 952 /*******************************************************
 953     Fractional Division 16 / 16
 954 *******************************************************/
 955 #define r_divdL 26     /* dividend Low */
 956 #define r_divdH 27     /* dividend Hig */
 957 #define r_quoL  24     /* quotient Low */
 958 #define r_quoH  25     /* quotient High */
 959 #define r_divL  22     /* divisor */
 960 #define r_divH  23     /* divisor */
 961 #define r_cnt   21
 962
 963 #if defined (L_divhq3)
 964 DEFUN   __divhq3
 965     mov     r0, r_divdH
 966     eor     r0, r_divH
 967     sbrs    r_divH, 7
 968     rjmp    1f
 969     NEG2    r_divL
 970 1:
 971     sbrs    r_divdH, 7
 972     rjmp    2f
 973     NEG2    r_divdL
 974 2:
 975     cp      r_divdL, r_divL
 976     cpc     r_divdH, r_divH
 977     breq    __divhq3_minus1  ; if equal return -1
 978     XCALL   __udivuhq3
 979     lsr     r_quoH
 980     ror     r_quoL
 981     brpl    9f
 982     ;; negate result if needed
 983     NEG2    r_quoL
 984 9:
 985     ret
 986 __divhq3_minus1:
 987     ldi     r_quoH, 0x80
 988     clr     r_quoL
 989     ret
 990 ENDF __divhq3
 991 #endif  /* defined (L_divhq3) */
 992
 993 #if defined (L_udivuhq3)
 994 DEFUN   __udivuhq3
 995     sub     r_quoH,r_quoH   ; clear quotient and carry
 996     ;; FALLTHRU
 997 ENDF __udivuhq3
 998
 999 DEFUN   __udivuha3_common
1000     clr     r_quoL          ; clear quotient
1001     ldi     r_cnt,16        ; init loop counter
1002 __udivuhq3_loop:
1003     rol     r_divdL         ; shift dividend (with CARRY)
1004     rol     r_divdH
1005     brcs    __udivuhq3_ep   ; dividend overflow
1006     cp      r_divdL,r_divL  ; compare dividend & divisor
1007     cpc     r_divdH,r_divH
1008     brcc    __udivuhq3_ep   ; dividend >= divisor
1009     rol     r_quoL          ; shift quotient (with CARRY)
1010     rjmp    __udivuhq3_cont
1011 __udivuhq3_ep:
1012     sub     r_divdL,r_divL  ; restore dividend
1013     sbc     r_divdH,r_divH
1014     lsl     r_quoL          ; shift quotient (without CARRY)
1015 __udivuhq3_cont:
1016     rol     r_quoH          ; shift quotient
1017     dec     r_cnt           ; decrement loop counter
1018     brne    __udivuhq3_loop
1019     com     r_quoL          ; complement result
1020     com     r_quoH          ; because C flag was complemented in loop
1021     ret
1022 ENDF __udivuha3_common
1023 #endif  /* defined (L_udivuhq3) */
1024
1025 /*******************************************************
1026     Fixed Division 8.8 / 8.8
1027 *******************************************************/
1028 #if defined (L_divha3)
1029 DEFUN   __divha3
1030     mov     r0, r_divdH
1031     eor     r0, r_divH
1032     sbrs    r_divH, 7
1033     rjmp    1f
1034     NEG2    r_divL
1035 1:
1036     sbrs    r_divdH, 7
1037     rjmp    2f
1038     NEG2    r_divdL
1039 2:
1040     XCALL   __udivuha3
1041     lsr     r_quoH  ; adjust to 7 fractional bits
1042     ror     r_quoL
1043     sbrs    r0, 7   ; negate result if needed
1044     ret
1045     NEG2    r_quoL
1046     ret
1047 ENDF __divha3
1048 #endif  /* defined (L_divha3) */
1049
1050 #if defined (L_udivuha3)
1051 DEFUN   __udivuha3
1052     mov     r_quoH, r_divdL
1053     mov     r_divdL, r_divdH
1054     clr     r_divdH
1055     lsl     r_quoH     ; shift quotient into carry
1056     XJMP    __udivuha3_common ; same as fractional after rearrange
1057 ENDF __udivuha3
1058 #endif  /* defined (L_udivuha3) */
1059
1060 #undef  r_divdL
1061 #undef  r_divdH
1062 #undef  r_quoL
1063 #undef  r_quoH
1064 #undef  r_divL
1065 #undef  r_divH
1066 #undef  r_cnt
1067
1068 /*******************************************************
1069     Fixed Division 16.16 / 16.16
1070 *******************************************************/
1071
1072 #define r_arg1L  24    /* arg1 gets passed already in place */
1073 #define r_arg1H  25
1074 #define r_arg1HL 26
1075 #define r_arg1HH 27
1076 #define r_divdL  26    /* dividend Low */
1077 #define r_divdH  27
1078 #define r_divdHL 30
1079 #define r_divdHH 31    /* dividend High */
1080 #define r_quoL   22    /* quotient Low */
1081 #define r_quoH   23
1082 #define r_quoHL  24
1083 #define r_quoHH  25    /* quotient High */
1084 #define r_divL   18    /* divisor Low */
1085 #define r_divH   19
1086 #define r_divHL  20
1087 #define r_divHH  21    /* divisor High */
1088 #define r_cnt  __zero_reg__  /* loop count (0 after the loop!) */
1089
1090 #if defined (L_divsa3)
1091 DEFUN   __divsa3
1092     mov     r0, r_arg1HH
1093     eor     r0, r_divHH
1094     sbrs    r_divHH, 7
1095     rjmp    1f
1096     NEG4    r_divL
1097 1:
1098     sbrs    r_arg1HH, 7
1099     rjmp    2f
1100     NEG4    r_arg1L
1101 2:
1102     XCALL   __udivusa3
1103     lsr     r_quoHH ; adjust to 15 fractional bits
1104     ror     r_quoHL
1105     ror     r_quoH
1106     ror     r_quoL
1107     sbrs    r0, 7   ; negate result if needed
1108     ret
1109     ;; negate r_quoL
1110     XJMP    __negsi2
1111 ENDF __divsa3
1112 #endif  /* defined (L_divsa3) */
1113
1114 #if defined (L_udivusa3)
1115 DEFUN   __udivusa3
1116     ldi     r_divdHL, 32    ; init loop counter
1117     mov     r_cnt, r_divdHL
1118     clr     r_divdHL
1119     clr     r_divdHH
1120     wmov    r_quoL, r_divdHL
1121     lsl     r_quoHL         ; shift quotient into carry
1122     rol     r_quoHH
1123 __udivusa3_loop:
1124     rol     r_divdL         ; shift dividend (with CARRY)
1125     rol     r_divdH
1126     rol     r_divdHL
1127     rol     r_divdHH
1128     brcs    __udivusa3_ep   ; dividend overflow
1129     cp      r_divdL,r_divL  ; compare dividend & divisor
1130     cpc     r_divdH,r_divH
1131     cpc     r_divdHL,r_divHL
1132     cpc     r_divdHH,r_divHH
1133     brcc    __udivusa3_ep   ; dividend >= divisor
1134     rol     r_quoL          ; shift quotient (with CARRY)
1135     rjmp    __udivusa3_cont
1136 __udivusa3_ep:
1137     sub     r_divdL,r_divL  ; restore dividend
1138     sbc     r_divdH,r_divH
1139     sbc     r_divdHL,r_divHL
1140     sbc     r_divdHH,r_divHH
1141     lsl     r_quoL          ; shift quotient (without CARRY)
1142 __udivusa3_cont:
1143     rol     r_quoH          ; shift quotient
1144     rol     r_quoHL
1145     rol     r_quoHH
1146     dec     r_cnt           ; decrement loop counter
1147     brne    __udivusa3_loop
1148     com     r_quoL          ; complement result
1149     com     r_quoH          ; because C flag was complemented in loop
1150     com     r_quoHL
1151     com     r_quoHH
1152     ret
1153 ENDF __udivusa3
1154 #endif  /* defined (L_udivusa3) */
1155
1156 #undef  r_arg1L
1157 #undef  r_arg1H
1158 #undef  r_arg1HL
1159 #undef  r_arg1HH
1160 #undef  r_divdL
1161 #undef  r_divdH
1162 #undef  r_divdHL
1163 #undef  r_divdHH
1164 #undef  r_quoL
1165 #undef  r_quoH
1166 #undef  r_quoHL
1167 #undef  r_quoHH
1168 #undef  r_divL
1169 #undef  r_divH
1170 #undef  r_divHL
1171 #undef  r_divHH
1172 #undef  r_cnt
1173
1174 \f
1175 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1176 ;; Saturation, 1 Byte
1177 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1178
1179 ;; First Argument and Return Register
1180 #define A0  24
1181
1182 #if defined (L_ssabs_1)
1183 DEFUN __ssabs_1
1184     sbrs    A0, 7
1185     ret
1186     neg     A0
1187     sbrc    A0,7
1188     dec     A0
1189     ret
1190 ENDF __ssabs_1
1191 #endif /* L_ssabs_1 */
1192
1193 #undef A0
1194
1195
1196 \f
1197 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1198 ;; Saturation, 2 Bytes
1199 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1200
1201 ;; First Argument and Return Register
1202 #define A0  24
1203 #define A1  A0+1
1204
1205 #if defined (L_ssneg_2)
1206 DEFUN __ssneg_2
1207     NEG2    A0
1208     brvc 0f
1209     sbiw    A0, 1
1210 0:  ret
1211 ENDF __ssneg_2
1212 #endif /* L_ssneg_2 */
1213
1214 #if defined (L_ssabs_2)
1215 DEFUN __ssabs_2
1216     sbrs    A1, 7
1217     ret
1218     XJMP    __ssneg_2
1219 ENDF __ssabs_2
1220 #endif /* L_ssabs_2 */
1221
1222 #undef A0
1223 #undef A1
1224
1225
1226 \f
1227 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1228 ;; Saturation, 4 Bytes
1229 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1230
1231 ;; First Argument and Return Register
1232 #define A0  22
1233 #define A1  A0+1
1234 #define A2  A0+2
1235 #define A3  A0+3
1236
1237 #if defined (L_ssneg_4)
1238 DEFUN __ssneg_4
1239     XCALL   __negsi2
1240     brvc 0f
1241     ldi     A3, 0x7f
1242     ldi     A2, 0xff
1243     ldi     A1, 0xff
1244     ldi     A0, 0xff
1245 0:  ret
1246 ENDF __ssneg_4
1247 #endif /* L_ssneg_4 */
1248
1249 #if defined (L_ssabs_4)
1250 DEFUN __ssabs_4
1251     sbrs    A3, 7
1252     ret
1253     XJMP    __ssneg_4
1254 ENDF __ssabs_4
1255 #endif /* L_ssabs_4 */
1256
1257 #undef A0
1258 #undef A1
1259 #undef A2
1260 #undef A3
1261
1262
1263 \f
1264 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1265 ;; Saturation, 8 Bytes
1266 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1267
1268 ;; First Argument and Return Register
1269 #define A0  18
1270 #define A1  A0+1
1271 #define A2  A0+2
1272 #define A3  A0+3
1273 #define A4  A0+4
1274 #define A5  A0+5
1275 #define A6  A0+6
1276 #define A7  A0+7
1277
1278 #if defined (L_clr_8)
1279 FALIAS __usneguta2
1280 FALIAS __usneguda2
1281 FALIAS __usnegudq2
1282
1283 ;; Clear Carry and all Bytes
1284 DEFUN __clr_8
1285     ;; Clear Carry and set Z
1286     sub     A7, A7
1287     ;; FALLTHRU
1288 ENDF  __clr_8
1289 ;; Propagate Carry to all Bytes, Carry unaltered
1290 DEFUN __sbc_8
1291     sbc     A7, A7
1292     sbc     A6, A6
1293     wmov    A4, A6
1294     wmov    A2, A6
1295     wmov    A0, A6
1296     ret
1297 ENDF __sbc_8
1298 #endif /* L_clr_8 */
1299
1300 #if defined (L_ssneg_8)
1301 FALIAS __ssnegta2
1302 FALIAS __ssnegda2
1303 FALIAS __ssnegdq2
1304
1305 DEFUN __ssneg_8
1306     XCALL   __negdi2
1307     brvc 0f
1308     ;; A[] = 0x7fffffff
1309     sec
1310     XCALL   __sbc_8
1311     ldi     A7, 0x7f
1312 0:  ret
1313 ENDF __ssneg_8
1314 #endif /* L_ssneg_8 */
1315
1316 #if defined (L_ssabs_8)
1317 FALIAS __ssabsta2
1318 FALIAS __ssabsda2
1319 FALIAS __ssabsdq2
1320
1321 DEFUN __ssabs_8
1322     sbrs    A7, 7
1323     ret
1324     XJMP    __ssneg_8
1325 ENDF __ssabs_8
1326 #endif /* L_ssabs_8 */
1327
1328 ;; Second Argument
1329 #define B0  10
1330 #define B1  B0+1
1331 #define B2  B0+2
1332 #define B3  B0+3
1333 #define B4  B0+4
1334 #define B5  B0+5
1335 #define B6  B0+6
1336 #define B7  B0+7
1337
1338 #if defined (L_usadd_8)
1339 FALIAS __usadduta3
1340 FALIAS __usadduda3
1341 FALIAS __usaddudq3
1342
1343 DEFUN __usadd_8
1344     XCALL   __adddi3
1345     brcs 0f
1346     ret
1347 0:  ;; A[] = 0xffffffff
1348     XJMP    __sbc_8
1349 ENDF __usadd_8
1350 #endif /* L_usadd_8 */
1351
1352 #if defined (L_ussub_8)
1353 FALIAS __ussubuta3
1354 FALIAS __ussubuda3
1355 FALIAS __ussubudq3
1356
1357 DEFUN __ussub_8
1358     XCALL   __subdi3
1359     brcs 0f
1360     ret
1361 0:  ;; A[] = 0
1362     XJMP    __clr_8
1363 ENDF __ussub_8
1364 #endif /* L_ussub_8 */
1365
1366 #if defined (L_ssadd_8)
1367 FALIAS __ssaddta3
1368 FALIAS __ssaddda3
1369 FALIAS __ssadddq3
1370
1371 DEFUN __ssadd_8
1372     XCALL   __adddi3
1373     brvc 0f
1374     ;; A = (B >= 0) ? INT64_MAX : INT64_MIN
1375     cpi     B7, 0x80
1376     XCALL   __sbc_8
1377     subi    A7, 0x80
1378 0:  ret
1379 ENDF __ssadd_8
1380 #endif /* L_ssadd_8 */
1381
1382 #if defined (L_sssub_8)
1383 FALIAS __sssubta3
1384 FALIAS __sssubda3
1385 FALIAS __sssubdq3
1386
1387 DEFUN __sssub_8
1388     XCALL   __subdi3
1389     brvc 0f
1390     ;; A = (B < 0) ? INT64_MAX : INT64_MIN
1391     ldi     A7, 0x7f
1392     cp      A7, B7
1393     XCALL   __sbc_8
1394     subi    A7, 0x80
1395 0:  ret
1396 ENDF __sssub_8
1397 #endif /* L_sssub_8 */
1398
1399 #undef A0
1400 #undef A1
1401 #undef A2
1402 #undef A3
1403 #undef A4
1404 #undef A5
1405 #undef A6
1406 #undef A7
1407 #undef B0
1408 #undef B1
1409 #undef B2
1410 #undef B3
1411 #undef B4
1412 #undef B5
1413 #undef B6
1414 #undef B7
1415
1416 \f
1417 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1418 ;; Rounding Helpers
1419 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1420
1421 #ifdef L_mask1
1422
1423 #define AA 24
1424 #define CC 25
1425
1426 ;; R25 = 1 << (R24 & 7)
1427 ;; CC  = 1 << (AA  & 7)
1428 ;; Clobbers: None
1429 DEFUN __mask1
1430     ;; CC = 2 ^ AA.1
1431     ldi     CC, 1 << 2
1432     sbrs    AA, 1
1433     ldi     CC, 1 << 0
1434     ;; CC *= 2 ^ AA.0
1435     sbrc    AA, 0
1436     lsl     CC
1437     ;; CC *= 2 ^ AA.2
1438     sbrc    AA, 2
1439     swap    CC
1440     ret
1441 ENDF __mask1
1442
1443 #undef AA
1444 #undef CC
1445 #endif /* L_mask1 */
1446
1447 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1448
1449 ;; The rounding point. Any bits smaller than
1450 ;; 2^{-RP} will be cleared.
1451 #define RP R24
1452
1453 #define A0 22
1454 #define A1 A0 + 1
1455
1456 #define C0 24
1457 #define C1 C0 + 1
1458
1459 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1460 ;; Rounding, 1 Byte
1461 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1462
1463 #ifdef L_roundqq3
1464
1465 ;; R24 = round (R22, R24)
1466 ;; Clobbers: R22, __tmp_reg__
1467 DEFUN  __roundqq3
1468     mov     __tmp_reg__, C1
1469     subi    RP, __QQ_FBIT__ - 1
1470     neg     RP
1471     ;; R25 = 1 << RP  (Total offset is FBIT-1 - RP)
1472     XCALL   __mask1
1473     mov     C0, C1
1474     ;; Add-Saturate 2^{-RP-1}
1475     add     A0, C0
1476     brvc 0f
1477     ldi     C0, 0x7f
1478     rjmp 9f
1479 0:  ;; Mask out bits beyond RP
1480     lsl     C0
1481     neg     C0
1482     and     C0, A0
1483 9:  mov     C1, __tmp_reg__
1484     ret
1485 ENDF  __roundqq3
1486 #endif /* L_roundqq3 */
1487
1488 #ifdef L_rounduqq3
1489
1490 ;; R24 = round (R22, R24)
1491 ;; Clobbers: R22, __tmp_reg__
1492 DEFUN  __rounduqq3
1493     mov     __tmp_reg__, C1
1494     subi    RP, __UQQ_FBIT__ - 1
1495     neg     RP
1496     ;; R25 = 1 << RP  (Total offset is FBIT-1 - RP)
1497     XCALL   __mask1
1498     mov     C0, C1
1499     ;; Add-Saturate 2^{-RP-1}
1500     add     A0, C0
1501     brcc 0f
1502     ldi     C0, 0xff
1503     rjmp 9f
1504 0:  ;; Mask out bits beyond RP
1505     lsl     C0
1506     neg     C0
1507     and     C0, A0
1508 9:  mov     C1, __tmp_reg__
1509     ret
1510 ENDF  __rounduqq3
1511 #endif /* L_rounduqq3 */
1512
1513 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1514 ;; Rounding, 2 Bytes
1515 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1516
1517 #ifdef L_addmask_2
1518
1519 ;; [ R25:R24 =  1 << (R24 & 15)
1520 ;;   R23:R22 += 1 << (R24 & 15) ]
1521 ;; SREG is set according to the addition
1522 DEFUN __addmask_2
1523     ;; R25 = 1 << (R24 & 7)
1524     XCALL   __mask1
1525     cpi     RP, 1 << 3
1526     sbc     C0, C0
1527     ;; Swap C0 and C1 if RP.3 was set
1528     and     C0, C1
1529     eor     C1, C0
1530     ;; Finally, add the power-of-two:  A[] += C[]
1531     add     A0, C0
1532     adc     A1, C1
1533     ret
1534 ENDF  __addmask_2
1535 #endif /* L_addmask_2 */
1536
1537 #ifdef L_round_s2
1538
1539 ;; R25:R24 = round (R23:R22, R24)
1540 ;; Clobbers: R23, R22
1541 DEFUN  __roundhq3
1542     subi    RP, __HQ_FBIT__ - __HA_FBIT__
1543 ENDF   __roundhq3
1544 DEFUN  __roundha3
1545     subi    RP, __HA_FBIT__ - 1
1546     neg     RP
1547     ;; [ R25:R24  = 1 << (FBIT-1 - RP)
1548     ;;   R23:R22 += 1 << (FBIT-1 - RP) ]
1549     XCALL   __addmask_2
1550     XJMP    __round_s2_const
1551 ENDF  __roundha3
1552
1553 #endif /* L_round_s2 */
1554
1555 #ifdef L_round_u2
1556
1557 ;; R25:R24 = round (R23:R22, R24)
1558 ;; Clobbers: R23, R22
1559 DEFUN  __rounduhq3
1560     subi    RP, __UHQ_FBIT__ - __UHA_FBIT__
1561 ENDF   __rounduhq3
1562 DEFUN  __rounduha3
1563     subi    RP, __UHA_FBIT__ - 1
1564     neg     RP
1565     ;; [ R25:R24  = 1 << (FBIT-1 - RP)
1566     ;;   R23:R22 += 1 << (FBIT-1 - RP) ]
1567     XCALL   __addmask_2
1568     XJMP    __round_u2_const
1569 ENDF  __rounduha3
1570
1571 #endif /* L_round_u2 */
1572
1573
1574 #ifdef L_round_2_const
1575
1576 ;; Helpers for 2 byte wide rounding
1577
1578 DEFUN  __round_s2_const
1579     brvc 2f
1580     ldi     C1, 0x7f
1581     rjmp 1f
1582     ;; FALLTHRU (Barrier)
1583 ENDF  __round_s2_const
1584
1585 DEFUN __round_u2_const
1586     brcc 2f
1587     ldi     C1, 0xff
1588 1:
1589     ldi     C0, 0xff
1590     rjmp 9f
1591 2:
1592     ;; Saturation is performed now.
1593     ;; Currently, we have C[] = 2^{-RP-1}
1594     ;; C[] = 2^{-RP}
1595     lsl     C0
1596     rol     C1
1597     ;;
1598     NEG2    C0
1599     ;; Clear the bits beyond the rounding point.
1600     and     C0, A0
1601     and     C1, A1
1602 9:  ret
1603 ENDF  __round_u2_const
1604
1605 #endif /* L_round_2_const */
1606
1607 #undef A0
1608 #undef A1
1609 #undef C0
1610 #undef C1
1611
1612 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1613 ;; Rounding, 4 Bytes
1614 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1615
1616 #define A0 18
1617 #define A1 A0 + 1
1618 #define A2 A0 + 2
1619 #define A3 A0 + 3
1620
1621 #define C0 22
1622 #define C1 C0 + 1
1623 #define C2 C0 + 2
1624 #define C3 C0 + 3
1625
1626 #ifdef L_addmask_4
1627
1628 ;; [ R25:R22 =  1 << (R24 & 31)
1629 ;;   R21:R18 += 1 << (R24 & 31) ]
1630 ;; SREG is set according to the addition
1631 DEFUN __addmask_4
1632     ;; R25 = 1 << (R24 & 7)
1633     XCALL   __mask1
1634     cpi     RP, 1 << 4
1635     sbc     C0, C0
1636     sbc     C1, C1
1637     ;; Swap C2 with C3 if RP.3 is not set
1638     cpi     RP, 1 << 3
1639     sbc     C2, C2
1640     and     C2, C3
1641     eor     C3, C2
1642     ;; Swap C3:C2 with C1:C0 if RP.4 is not set
1643     and     C0, C2  $  eor     C2, C0
1644     and     C1, C3  $  eor     C3, C1
1645     ;; Finally, add the power-of-two:  A[] += C[]
1646     add     A0, C0
1647     adc     A1, C1
1648     adc     A2, C2
1649     adc     A3, C3
1650     ret
1651 ENDF  __addmask_4
1652 #endif /* L_addmask_4 */
1653
1654 #ifdef L_round_s4
1655
1656 ;; R25:R22 = round (R21:R18, R24)
1657 ;; Clobbers: R18...R21
1658 DEFUN  __roundsq3
1659     subi    RP, __SQ_FBIT__ - __SA_FBIT__
1660 ENDF   __roundsq3
1661 DEFUN  __roundsa3
1662     subi    RP, __SA_FBIT__ - 1
1663     neg     RP
1664     ;; [ R25:R22  = 1 << (FBIT-1 - RP)
1665     ;;   R21:R18 += 1 << (FBIT-1 - RP) ]
1666     XCALL   __addmask_4
1667     XJMP    __round_s4_const
1668 ENDF  __roundsa3
1669
1670 #endif /* L_round_s4 */
1671
1672 #ifdef L_round_u4
1673
1674 ;; R25:R22 = round (R21:R18, R24)
1675 ;; Clobbers: R18...R21
1676 DEFUN  __roundusq3
1677     subi    RP, __USQ_FBIT__ - __USA_FBIT__
1678 ENDF   __roundusq3
1679 DEFUN  __roundusa3
1680     subi    RP, __USA_FBIT__ - 1
1681     neg     RP
1682     ;; [ R25:R22  = 1 << (FBIT-1 - RP)
1683     ;;   R21:R18 += 1 << (FBIT-1 - RP) ]
1684     XCALL   __addmask_4
1685     XJMP    __round_u4_const
1686 ENDF  __roundusa3
1687
1688 #endif /* L_round_u4 */
1689
1690
1691 #ifdef L_round_4_const
1692
1693 ;; Helpers for 4 byte wide rounding
1694
1695 DEFUN  __round_s4_const
1696     brvc 2f
1697     ldi     C3, 0x7f
1698     rjmp 1f
1699     ;; FALLTHRU (Barrier)
1700 ENDF  __round_s4_const
1701
1702 DEFUN __round_u4_const
1703     brcc 2f
1704     ldi     C3, 0xff
1705 1:
1706     ldi     C2, 0xff
1707     ldi     C1, 0xff
1708     ldi     C0, 0xff
1709     rjmp 9f
1710 2:
1711     ;; Saturation is performed now.
1712     ;; Currently, we have C[] = 2^{-RP-1}
1713     ;; C[] = 2^{-RP}
1714     lsl     C0
1715     rol     C1
1716     rol     C2
1717     rol     C3
1718     XCALL   __negsi2
1719     ;; Clear the bits beyond the rounding point.
1720     and     C0, A0
1721     and     C1, A1
1722     and     C2, A2
1723     and     C3, A3
1724 9:  ret
1725 ENDF  __round_u4_const
1726
1727 #endif /* L_round_4_const */
1728
1729 #undef A0
1730 #undef A1
1731 #undef A2
1732 #undef A3
1733 #undef C0
1734 #undef C1
1735 #undef C2
1736 #undef C3
1737
1738 #undef RP
1739
1740 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1741 ;; Rounding, 8 Bytes
1742 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1743
1744 #define RP     16
1745 #define FBITm1 31
1746
1747 #define C0 18
1748 #define C1 C0 + 1
1749 #define C2 C0 + 2
1750 #define C3 C0 + 3
1751 #define C4 C0 + 4
1752 #define C5 C0 + 5
1753 #define C6 C0 + 6
1754 #define C7 C0 + 7
1755
1756 #define A0 16
1757 #define A1 17
1758 #define A2 26
1759 #define A3 27
1760 #define A4 28
1761 #define A5 29
1762 #define A6 30
1763 #define A7 31
1764
1765
1766 #ifdef L_rounddq3
1767 ;; R25:R18 = round (R25:R18, R16)
1768 ;; Clobbers: ABI
1769 DEFUN  __rounddq3
1770     ldi     FBITm1, __DQ_FBIT__ - 1
1771     clt
1772     XJMP    __round_x8
1773 ENDF  __rounddq3
1774 #endif /* L_rounddq3 */
1775
1776 #ifdef L_roundudq3
1777 ;; R25:R18 = round (R25:R18, R16)
1778 ;; Clobbers: ABI
1779 DEFUN  __roundudq3
1780     ldi     FBITm1, __UDQ_FBIT__ - 1
1781     set
1782     XJMP    __round_x8
1783 ENDF  __roundudq3
1784 #endif /* L_roundudq3 */
1785
1786 #ifdef L_roundda3
1787 ;; R25:R18 = round (R25:R18, R16)
1788 ;; Clobbers: ABI
1789 DEFUN  __roundda3
1790     ldi     FBITm1, __DA_FBIT__ - 1
1791     clt
1792     XJMP    __round_x8
1793 ENDF  __roundda3
1794 #endif /* L_roundda3 */
1795
1796 #ifdef L_rounduda3
1797 ;; R25:R18 = round (R25:R18, R16)
1798 ;; Clobbers: ABI
1799 DEFUN  __rounduda3
1800     ldi     FBITm1, __UDA_FBIT__ - 1
1801     set
1802     XJMP    __round_x8
1803 ENDF  __rounduda3
1804 #endif /* L_rounduda3 */
1805
1806 #ifdef L_roundta3
1807 ;; R25:R18 = round (R25:R18, R16)
1808 ;; Clobbers: ABI
1809 DEFUN  __roundta3
1810     ldi     FBITm1, __TA_FBIT__ - 1
1811     clt
1812     XJMP    __round_x8
1813 ENDF  __roundta3
1814 #endif /* L_roundta3 */
1815
1816 #ifdef L_rounduta3
1817 ;; R25:R18 = round (R25:R18, R16)
1818 ;; Clobbers: ABI
1819 DEFUN  __rounduta3
1820     ldi     FBITm1, __UTA_FBIT__ - 1
1821     set
1822     XJMP    __round_x8
1823 ENDF  __rounduta3
1824 #endif /* L_rounduta3 */
1825
1826
1827 #ifdef L_round_x8
1828 DEFUN __round_x8
1829     push r16
1830     push r17
1831     push r28
1832     push r29
1833     ;; Compute log2 of addend from rounding point
1834     sub     RP, FBITm1
1835     neg     RP
1836     ;; Move input to work register A[]
1837     push    C0
1838     mov     A1, C1
1839     wmov    A2, C2
1840     wmov    A4, C4
1841     wmov    A6, C6
1842     ;; C[] = 1 << (FBIT-1 - RP)
1843     XCALL   __clr_8
1844     inc     C0
1845     XCALL   __ashldi3
1846     pop     A0
1847     ;; A[] += C[]
1848     add     A0, C0
1849     adc     A1, C1
1850     adc     A2, C2
1851     adc     A3, C3
1852     adc     A4, C4
1853     adc     A5, C5
1854     adc     A6, C6
1855     adc     A7, C7
1856     brts    1f
1857     ;; Signed
1858     brvc    3f
1859     ;; Signed overflow: A[] = 0x7f...
1860     brvs    2f
1861 1:  ;; Unsigned
1862     brcc    3f
1863     ;; Unsigned overflow: A[] = 0xff...
1864 2:  ldi     C7, 0xff
1865     ldi     C6, 0xff
1866     wmov    C0, C6
1867     wmov    C2, C6
1868     wmov    C4, C6
1869     bld     C7, 7
1870     rjmp 9f
1871 3:
1872     ;;  C[] = -C[] - C[]
1873     push    A0
1874     ldi     r16, 1
1875     XCALL   __ashldi3
1876     pop     A0
1877     XCALL   __negdi2
1878     ;; Clear the bits beyond the rounding point.
1879     and     C0, A0
1880     and     C1, A1
1881     and     C2, A2
1882     and     C3, A3
1883     and     C4, A4
1884     and     C5, A5
1885     and     C6, A6
1886     and     C7, A7
1887 9:  ;; Epilogue
1888     pop r29
1889     pop r28
1890     pop r17
1891     pop r16
1892     ret
1893 ENDF  __round_x8
1894
1895 #endif /* L_round_x8 */
1896
1897 #undef A0
1898 #undef A1
1899 #undef A2
1900 #undef A3
1901 #undef A4
1902 #undef A5
1903 #undef A6
1904 #undef A7
1905
1906 #undef C0
1907 #undef C1
1908 #undef C2
1909 #undef C3
1910 #undef C4
1911 #undef C5
1912 #undef C6
1913 #undef C7
1914
1915 #undef RP
1916 #undef FBITm1
1917
1918
1919 ;; Supply implementations / symbols for the bit-banging functions
1920 ;; __builtin_avr_bitsfx and __builtin_avr_fxbits
1921 #ifdef L_ret
1922 DEFUN __ret
1923     ret
1924 ENDF  __ret
1925 #endif /* L_ret */
1926
1927 #endif /* if not __AVR_TINY__ */