libgcc/config/xtensa/lib1funcs.S

   1 /* Assembly functions for the Xtensa version of libgcc1.
   2    Copyright (C) 2001-2021 Free Software Foundation, Inc.
   3    Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 Under Section 7 of GPL version 3, you are granted additional
  18 permissions described in the GCC Runtime Library Exception, version
  19 3.1, as published by the Free Software Foundation.
  20
  21 You should have received a copy of the GNU General Public License and
  22 a copy of the GCC Runtime Library Exception along with this program;
  23 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  24 <http://www.gnu.org/licenses/>.  */
  25
  26 #include "xtensa-config.h"
  27
  28 /* Define macros for the ABS and ADDX* instructions to handle cases
  29    where they are not included in the Xtensa processor configuration.  */
  30
  31         .macro  do_abs dst, src, tmp
  32 #if XCHAL_HAVE_ABS
  33         abs     \dst, \src
  34 #else
  35         neg     \tmp, \src
  36         movgez  \tmp, \src, \src
  37         mov     \dst, \tmp
  38 #endif
  39         .endm
  40
  41         .macro  do_addx2 dst, as, at, tmp
  42 #if XCHAL_HAVE_ADDX
  43         addx2   \dst, \as, \at
  44 #else
  45         slli    \tmp, \as, 1
  46         add     \dst, \tmp, \at
  47 #endif
  48         .endm
  49
  50         .macro  do_addx4 dst, as, at, tmp
  51 #if XCHAL_HAVE_ADDX
  52         addx4   \dst, \as, \at
  53 #else
  54         slli    \tmp, \as, 2
  55         add     \dst, \tmp, \at
  56 #endif
  57         .endm
  58
  59         .macro  do_addx8 dst, as, at, tmp
  60 #if XCHAL_HAVE_ADDX
  61         addx8   \dst, \as, \at
  62 #else
  63         slli    \tmp, \as, 3
  64         add     \dst, \tmp, \at
  65 #endif
  66         .endm
  67
  68 /* Define macros for leaf function entry and return, supporting either the
  69    standard register windowed ABI or the non-windowed call0 ABI.  These
  70    macros do not allocate any extra stack space, so they only work for
  71    leaf functions that do not need to spill anything to the stack.  */
  72
  73         .macro leaf_entry reg, size
  74 #if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
  75         entry \reg, \size
  76 #else
  77         /* do nothing */
  78 #endif
  79         .endm
  80
  81         .macro leaf_return
  82 #if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
  83         retw
  84 #else
  85         ret
  86 #endif
  87         .endm
  88
  89
  90 #ifdef L_mulsi3
  91         .align  4
  92         .global __mulsi3
  93         .type   __mulsi3, @function
  94 __mulsi3:
  95         leaf_entry sp, 16
  96
  97 #if XCHAL_HAVE_MUL32
  98         mull    a2, a2, a3
  99
 100 #elif XCHAL_HAVE_MUL16
 101         or      a4, a2, a3
 102         srai    a4, a4, 16
 103         bnez    a4, .LMUL16
 104         mul16u  a2, a2, a3
 105         leaf_return
 106 .LMUL16:
 107         srai    a4, a2, 16
 108         srai    a5, a3, 16
 109         mul16u  a7, a4, a3
 110         mul16u  a6, a5, a2
 111         mul16u  a4, a2, a3
 112         add     a7, a7, a6
 113         slli    a7, a7, 16
 114         add     a2, a7, a4
 115
 116 #elif XCHAL_HAVE_MAC16
 117         mul.aa.hl a2, a3
 118         mula.aa.lh a2, a3
 119         rsr     a5, ACCLO
 120         umul.aa.ll a2, a3
 121         rsr     a4, ACCLO
 122         slli    a5, a5, 16
 123         add     a2, a4, a5
 124
 125 #else /* !MUL32 && !MUL16 && !MAC16 */
 126
 127         /* Multiply one bit at a time, but unroll the loop 4x to better
 128            exploit the addx instructions and avoid overhead.
 129            Peel the first iteration to save a cycle on init.  */
 130
 131         /* Avoid negative numbers.  */
 132         xor     a5, a2, a3      /* Top bit is 1 if one input is negative.  */
 133         do_abs  a3, a3, a6
 134         do_abs  a2, a2, a6
 135
 136         /* Swap so the second argument is smaller.  */
 137         sub     a7, a2, a3
 138         mov     a4, a3
 139         movgez  a4, a2, a7      /* a4 = max (a2, a3) */
 140         movltz  a3, a2, a7      /* a3 = min (a2, a3) */
 141
 142         movi    a2, 0
 143         extui   a6, a3, 0, 1
 144         movnez  a2, a4, a6
 145
 146         do_addx2 a7, a4, a2, a7
 147         extui   a6, a3, 1, 1
 148         movnez  a2, a7, a6
 149
 150         do_addx4 a7, a4, a2, a7
 151         extui   a6, a3, 2, 1
 152         movnez  a2, a7, a6
 153
 154         do_addx8 a7, a4, a2, a7
 155         extui   a6, a3, 3, 1
 156         movnez  a2, a7, a6
 157
 158         bgeui   a3, 16, .Lmult_main_loop
 159         neg     a3, a2
 160         movltz  a2, a3, a5
 161         leaf_return
 162
 163         .align  4
 164 .Lmult_main_loop:
 165         srli    a3, a3, 4
 166         slli    a4, a4, 4
 167
 168         add     a7, a4, a2
 169         extui   a6, a3, 0, 1
 170         movnez  a2, a7, a6
 171
 172         do_addx2 a7, a4, a2, a7
 173         extui   a6, a3, 1, 1
 174         movnez  a2, a7, a6
 175
 176         do_addx4 a7, a4, a2, a7
 177         extui   a6, a3, 2, 1
 178         movnez  a2, a7, a6
 179
 180         do_addx8 a7, a4, a2, a7
 181         extui   a6, a3, 3, 1
 182         movnez  a2, a7, a6
 183
 184         bgeui   a3, 16, .Lmult_main_loop
 185
 186         neg     a3, a2
 187         movltz  a2, a3, a5
 188
 189 #endif /* !MUL32 && !MUL16 && !MAC16 */
 190
 191         leaf_return
 192         .size   __mulsi3, . - __mulsi3
 193
 194 #endif /* L_mulsi3 */
 195
 196
 197 #ifdef L_umulsidi3
 198
 199 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
 200 #define XCHAL_NO_MUL 1
 201 #endif
 202
 203         .align  4
 204         .global __umulsidi3
 205         .type   __umulsidi3, @function
 206 __umulsidi3:
 207 #if __XTENSA_CALL0_ABI__
 208         leaf_entry sp, 32
 209         addi    sp, sp, -32
 210         s32i    a12, sp, 16
 211         s32i    a13, sp, 20
 212         s32i    a14, sp, 24
 213         s32i    a15, sp, 28
 214 #elif XCHAL_NO_MUL
 215         /* This is not really a leaf function; allocate enough stack space
 216            to allow CALL12s to a helper function.  */
 217         leaf_entry sp, 48
 218 #else
 219         leaf_entry sp, 16
 220 #endif
 221
 222 #ifdef __XTENSA_EB__
 223 #define wh a2
 224 #define wl a3
 225 #else
 226 #define wh a3
 227 #define wl a2
 228 #endif /* __XTENSA_EB__ */
 229
 230         /* This code is taken from the mulsf3 routine in ieee754-sf.S.
 231            See more comments there.  */
 232
 233 #if XCHAL_HAVE_MUL32_HIGH
 234         mull    a6, a2, a3
 235         muluh   wh, a2, a3
 236         mov     wl, a6
 237
 238 #else /* ! MUL32_HIGH */
 239
 240 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
 241         /* a0 and a8 will be clobbered by calling the multiply function
 242            but a8 is not used here and need not be saved.  */
 243         s32i    a0, sp, 0
 244 #endif
 245
 246 #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
 247
 248 #define a2h a4
 249 #define a3h a5
 250
 251         /* Get the high halves of the inputs into registers.  */
 252         srli    a2h, a2, 16
 253         srli    a3h, a3, 16
 254
 255 #define a2l a2
 256 #define a3l a3
 257
 258 #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
 259         /* Clear the high halves of the inputs.  This does not matter
 260            for MUL16 because the high bits are ignored.  */
 261         extui   a2, a2, 0, 16
 262         extui   a3, a3, 0, 16
 263 #endif
 264 #endif /* MUL16 || MUL32 */
 265
 266
 267 #if XCHAL_HAVE_MUL16
 268
 269 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 270         mul16u  dst, xreg ## xhalf, yreg ## yhalf
 271
 272 #elif XCHAL_HAVE_MUL32
 273
 274 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 275         mull    dst, xreg ## xhalf, yreg ## yhalf
 276
 277 #elif XCHAL_HAVE_MAC16
 278
 279 /* The preprocessor insists on inserting a space when concatenating after
 280    a period in the definition of do_mul below.  These macros are a workaround
 281    using underscores instead of periods when doing the concatenation.  */
 282 #define umul_aa_ll umul.aa.ll
 283 #define umul_aa_lh umul.aa.lh
 284 #define umul_aa_hl umul.aa.hl
 285 #define umul_aa_hh umul.aa.hh
 286
 287 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 288         umul_aa_ ## xhalf ## yhalf      xreg, yreg; \
 289         rsr     dst, ACCLO
 290
 291 #else /* no multiply hardware */
 292
 293 #define set_arg_l(dst, src) \
 294         extui   dst, src, 0, 16
 295 #define set_arg_h(dst, src) \
 296         srli    dst, src, 16
 297
 298 #if __XTENSA_CALL0_ABI__
 299 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 300         set_arg_ ## xhalf (a13, xreg); \
 301         set_arg_ ## yhalf (a14, yreg); \
 302         call0   .Lmul_mulsi3; \
 303         mov     dst, a12
 304 #else
 305 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 306         set_arg_ ## xhalf (a14, xreg); \
 307         set_arg_ ## yhalf (a15, yreg); \
 308         call12  .Lmul_mulsi3; \
 309         mov     dst, a14
 310 #endif /* __XTENSA_CALL0_ABI__ */
 311
 312 #endif /* no multiply hardware */
 313
 314         /* Add pp1 and pp2 into a6 with carry-out in a9.  */
 315         do_mul(a6, a2, l, a3, h)        /* pp 1 */
 316         do_mul(a11, a2, h, a3, l)       /* pp 2 */
 317         movi    a9, 0
 318         add     a6, a6, a11
 319         bgeu    a6, a11, 1f
 320         addi    a9, a9, 1
 321 1:
 322         /* Shift the high half of a9/a6 into position in a9.  Note that
 323            this value can be safely incremented without any carry-outs.  */
 324         ssai    16
 325         src     a9, a9, a6
 326
 327         /* Compute the low word into a6.  */
 328         do_mul(a11, a2, l, a3, l)       /* pp 0 */
 329         sll     a6, a6
 330         add     a6, a6, a11
 331         bgeu    a6, a11, 1f
 332         addi    a9, a9, 1
 333 1:
 334         /* Compute the high word into wh.  */
 335         do_mul(wh, a2, h, a3, h)        /* pp 3 */
 336         add     wh, wh, a9
 337         mov     wl, a6
 338
 339 #endif /* !MUL32_HIGH */
 340
 341 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
 342         /* Restore the original return address.  */
 343         l32i    a0, sp, 0
 344 #endif
 345 #if __XTENSA_CALL0_ABI__
 346         l32i    a12, sp, 16
 347         l32i    a13, sp, 20
 348         l32i    a14, sp, 24
 349         l32i    a15, sp, 28
 350         addi    sp, sp, 32
 351 #endif
 352         leaf_return
 353
 354 #if XCHAL_NO_MUL
 355
 356         /* For Xtensa processors with no multiply hardware, this simplified
 357            version of _mulsi3 is used for multiplying 16-bit chunks of
 358            the floating-point mantissas.  When using CALL0, this function
 359            uses a custom ABI: the inputs are passed in a13 and a14, the
 360            result is returned in a12, and a8 and a15 are clobbered.  */
 361         .align  4
 362 .Lmul_mulsi3:
 363         leaf_entry sp, 16
 364         .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
 365         movi    \dst, 0
 366 1:      add     \tmp1, \src2, \dst
 367         extui   \tmp2, \src1, 0, 1
 368         movnez  \dst, \tmp1, \tmp2
 369
 370         do_addx2 \tmp1, \src2, \dst, \tmp1
 371         extui   \tmp2, \src1, 1, 1
 372         movnez  \dst, \tmp1, \tmp2
 373
 374         do_addx4 \tmp1, \src2, \dst, \tmp1
 375         extui   \tmp2, \src1, 2, 1
 376         movnez  \dst, \tmp1, \tmp2
 377
 378         do_addx8 \tmp1, \src2, \dst, \tmp1
 379         extui   \tmp2, \src1, 3, 1
 380         movnez  \dst, \tmp1, \tmp2
 381
 382         srli    \src1, \src1, 4
 383         slli    \src2, \src2, 4
 384         bnez    \src1, 1b
 385         .endm
 386 #if __XTENSA_CALL0_ABI__
 387         mul_mulsi3_body a12, a13, a14, a15, a8
 388 #else
 389         /* The result will be written into a2, so save that argument in a4.  */
 390         mov     a4, a2
 391         mul_mulsi3_body a2, a4, a3, a5, a6
 392 #endif
 393         leaf_return
 394 #endif /* XCHAL_NO_MUL */
 395
 396         .size   __umulsidi3, . - __umulsidi3
 397
 398 #endif /* L_umulsidi3 */
 399
 400
 401 /* Define a macro for the NSAU (unsigned normalize shift amount)
 402    instruction, which computes the number of leading zero bits,
 403    to handle cases where it is not included in the Xtensa processor
 404    configuration.  */
 405
 406         .macro  do_nsau cnt, val, tmp, a
 407 #if XCHAL_HAVE_NSA
 408         nsau    \cnt, \val
 409 #else
 410         mov     \a, \val
 411         movi    \cnt, 0
 412         extui   \tmp, \a, 16, 16
 413         bnez    \tmp, 0f
 414         movi    \cnt, 16
 415         slli    \a, \a, 16
 416 0:
 417         extui   \tmp, \a, 24, 8
 418         bnez    \tmp, 1f
 419         addi    \cnt, \cnt, 8
 420         slli    \a, \a, 8
 421 1:
 422         movi    \tmp, __nsau_data
 423         extui   \a, \a, 24, 8
 424         add     \tmp, \tmp, \a
 425         l8ui    \tmp, \tmp, 0
 426         add     \cnt, \cnt, \tmp
 427 #endif /* !XCHAL_HAVE_NSA */
 428         .endm
 429
 430 #ifdef L_clz
 431         .section .rodata
 432         .align  4
 433         .global __nsau_data
 434         .type   __nsau_data, @object
 435 __nsau_data:
 436 #if !XCHAL_HAVE_NSA
 437         .byte   8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
 438         .byte   3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
 439         .byte   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
 440         .byte   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
 441         .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 442         .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 443         .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 444         .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 445         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 446         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 447         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 448         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 449         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 450         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 451         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 452         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 453 #endif /* !XCHAL_HAVE_NSA */
 454         .size   __nsau_data, . - __nsau_data
 455         .hidden __nsau_data
 456 #endif /* L_clz */
 457
 458
 459 #ifdef L_clzsi2
 460         .align  4
 461         .global __clzsi2
 462         .type   __clzsi2, @function
 463 __clzsi2:
 464         leaf_entry sp, 16
 465         do_nsau a2, a2, a3, a4
 466         leaf_return
 467         .size   __clzsi2, . - __clzsi2
 468
 469 #endif /* L_clzsi2 */
 470
 471
 472 #ifdef L_ctzsi2
 473         .align  4
 474         .global __ctzsi2
 475         .type   __ctzsi2, @function
 476 __ctzsi2:
 477         leaf_entry sp, 16
 478         neg     a3, a2
 479         and     a3, a3, a2
 480         do_nsau a2, a3, a4, a5
 481         neg     a2, a2
 482         addi    a2, a2, 31
 483         leaf_return
 484         .size   __ctzsi2, . - __ctzsi2
 485
 486 #endif /* L_ctzsi2 */
 487
 488
 489 #ifdef L_ffssi2
 490         .align  4
 491         .global __ffssi2
 492         .type   __ffssi2, @function
 493 __ffssi2:
 494         leaf_entry sp, 16
 495         neg     a3, a2
 496         and     a3, a3, a2
 497         do_nsau a2, a3, a4, a5
 498         neg     a2, a2
 499         addi    a2, a2, 32
 500         leaf_return
 501         .size   __ffssi2, . - __ffssi2
 502
 503 #endif /* L_ffssi2 */
 504
 505
 506 #ifdef L_udivsi3
 507         .align  4
 508         .global __udivsi3
 509         .type   __udivsi3, @function
 510 __udivsi3:
 511         leaf_entry sp, 16
 512 #if XCHAL_HAVE_DIV32
 513         quou    a2, a2, a3
 514 #else
 515         bltui   a3, 2, .Lle_one /* check if the divisor <= 1 */
 516
 517         mov     a6, a2          /* keep dividend in a6 */
 518         do_nsau a5, a6, a2, a7  /* dividend_shift = nsau (dividend) */
 519         do_nsau a4, a3, a2, a7  /* divisor_shift = nsau (divisor) */
 520         bgeu    a5, a4, .Lspecial
 521
 522         sub     a4, a4, a5      /* count = divisor_shift - dividend_shift */
 523         ssl     a4
 524         sll     a3, a3          /* divisor <<= count */
 525         movi    a2, 0           /* quotient = 0 */
 526
 527         /* test-subtract-and-shift loop; one quotient bit on each iteration */
 528 #if XCHAL_HAVE_LOOPS
 529         loopnez a4, .Lloopend
 530 #endif /* XCHAL_HAVE_LOOPS */
 531 .Lloop:
 532         bltu    a6, a3, .Lzerobit
 533         sub     a6, a6, a3
 534         addi    a2, a2, 1
 535 .Lzerobit:
 536         slli    a2, a2, 1
 537         srli    a3, a3, 1
 538 #if !XCHAL_HAVE_LOOPS
 539         addi    a4, a4, -1
 540         bnez    a4, .Lloop
 541 #endif /* !XCHAL_HAVE_LOOPS */
 542 .Lloopend:
 543
 544         bltu    a6, a3, .Lreturn
 545         addi    a2, a2, 1       /* increment quotient if dividend >= divisor */
 546 .Lreturn:
 547         leaf_return
 548
 549 .Lle_one:
 550         beqz    a3, .Lerror     /* if divisor == 1, return the dividend */
 551         leaf_return
 552
 553 .Lspecial:
 554         /* return dividend >= divisor */
 555         bltu    a6, a3, .Lreturn0
 556         movi    a2, 1
 557         leaf_return
 558
 559 .Lerror:
 560         /* Divide by zero: Use an illegal instruction to force an exception.
 561            The subsequent "DIV0" string can be recognized by the exception
 562            handler to identify the real cause of the exception.  */
 563         ill
 564         .ascii  "DIV0"
 565
 566 .Lreturn0:
 567         movi    a2, 0
 568 #endif /* XCHAL_HAVE_DIV32 */
 569         leaf_return
 570         .size   __udivsi3, . - __udivsi3
 571
 572 #endif /* L_udivsi3 */
 573
 574
 575 #ifdef L_divsi3
 576         .align  4
 577         .global __divsi3
 578         .type   __divsi3, @function
 579 __divsi3:
 580         leaf_entry sp, 16
 581 #if XCHAL_HAVE_DIV32
 582         quos    a2, a2, a3
 583 #else
 584         xor     a7, a2, a3      /* sign = dividend ^ divisor */
 585         do_abs  a6, a2, a4      /* udividend = abs (dividend) */
 586         do_abs  a3, a3, a4      /* udivisor = abs (divisor) */
 587         bltui   a3, 2, .Lle_one /* check if udivisor <= 1 */
 588         do_nsau a5, a6, a2, a8  /* udividend_shift = nsau (udividend) */
 589         do_nsau a4, a3, a2, a8  /* udivisor_shift = nsau (udivisor) */
 590         bgeu    a5, a4, .Lspecial
 591
 592         sub     a4, a4, a5      /* count = udivisor_shift - udividend_shift */
 593         ssl     a4
 594         sll     a3, a3          /* udivisor <<= count */
 595         movi    a2, 0           /* quotient = 0 */
 596
 597         /* test-subtract-and-shift loop; one quotient bit on each iteration */
 598 #if XCHAL_HAVE_LOOPS
 599         loopnez a4, .Lloopend
 600 #endif /* XCHAL_HAVE_LOOPS */
 601 .Lloop:
 602         bltu    a6, a3, .Lzerobit
 603         sub     a6, a6, a3
 604         addi    a2, a2, 1
 605 .Lzerobit:
 606         slli    a2, a2, 1
 607         srli    a3, a3, 1
 608 #if !XCHAL_HAVE_LOOPS
 609         addi    a4, a4, -1
 610         bnez    a4, .Lloop
 611 #endif /* !XCHAL_HAVE_LOOPS */
 612 .Lloopend:
 613
 614         bltu    a6, a3, .Lreturn
 615         addi    a2, a2, 1       /* increment if udividend >= udivisor */
 616 .Lreturn:
 617         neg     a5, a2
 618         movltz  a2, a5, a7      /* return (sign < 0) ? -quotient : quotient */
 619         leaf_return
 620
 621 .Lle_one:
 622         beqz    a3, .Lerror
 623         neg     a2, a6          /* if udivisor == 1, then return... */
 624         movgez  a2, a6, a7      /* (sign < 0) ? -udividend : udividend */
 625         leaf_return
 626
 627 .Lspecial:
 628         bltu    a6, a3, .Lreturn0 /* if dividend < divisor, return 0 */
 629         movi    a2, 1
 630         movi    a4, -1
 631         movltz  a2, a4, a7      /* else return (sign < 0) ? -1 : 1 */
 632         leaf_return
 633
 634 .Lerror:
 635         /* Divide by zero: Use an illegal instruction to force an exception.
 636            The subsequent "DIV0" string can be recognized by the exception
 637            handler to identify the real cause of the exception.  */
 638         ill
 639         .ascii  "DIV0"
 640
 641 .Lreturn0:
 642         movi    a2, 0
 643 #endif /* XCHAL_HAVE_DIV32 */
 644         leaf_return
 645         .size   __divsi3, . - __divsi3
 646
 647 #endif /* L_divsi3 */
 648
 649
 650 #ifdef L_umodsi3
 651         .align  4
 652         .global __umodsi3
 653         .type   __umodsi3, @function
 654 __umodsi3:
 655         leaf_entry sp, 16
 656 #if XCHAL_HAVE_DIV32
 657         remu    a2, a2, a3
 658 #else
 659         bltui   a3, 2, .Lle_one /* check if the divisor is <= 1 */
 660
 661         do_nsau a5, a2, a6, a7  /* dividend_shift = nsau (dividend) */
 662         do_nsau a4, a3, a6, a7  /* divisor_shift = nsau (divisor) */
 663         bgeu    a5, a4, .Lspecial
 664
 665         sub     a4, a4, a5      /* count = divisor_shift - dividend_shift */
 666         ssl     a4
 667         sll     a3, a3          /* divisor <<= count */
 668
 669         /* test-subtract-and-shift loop */
 670 #if XCHAL_HAVE_LOOPS
 671         loopnez a4, .Lloopend
 672 #endif /* XCHAL_HAVE_LOOPS */
 673 .Lloop:
 674         bltu    a2, a3, .Lzerobit
 675         sub     a2, a2, a3
 676 .Lzerobit:
 677         srli    a3, a3, 1
 678 #if !XCHAL_HAVE_LOOPS
 679         addi    a4, a4, -1
 680         bnez    a4, .Lloop
 681 #endif /* !XCHAL_HAVE_LOOPS */
 682 .Lloopend:
 683
 684 .Lspecial:
 685         bltu    a2, a3, .Lreturn
 686         sub     a2, a2, a3      /* subtract once more if dividend >= divisor */
 687 .Lreturn:
 688         leaf_return
 689
 690 .Lle_one:
 691         bnez    a3, .Lreturn0
 692
 693         /* Divide by zero: Use an illegal instruction to force an exception.
 694            The subsequent "DIV0" string can be recognized by the exception
 695            handler to identify the real cause of the exception.  */
 696         ill
 697         .ascii  "DIV0"
 698
 699 .Lreturn0:
 700         movi    a2, 0
 701 #endif /* XCHAL_HAVE_DIV32 */
 702         leaf_return
 703         .size   __umodsi3, . - __umodsi3
 704
 705 #endif /* L_umodsi3 */
 706
 707
 708 #ifdef L_modsi3
 709         .align  4
 710         .global __modsi3
 711         .type   __modsi3, @function
 712 __modsi3:
 713         leaf_entry sp, 16
 714 #if XCHAL_HAVE_DIV32
 715         rems    a2, a2, a3
 716 #else
 717         mov     a7, a2          /* save original (signed) dividend */
 718         do_abs  a2, a2, a4      /* udividend = abs (dividend) */
 719         do_abs  a3, a3, a4      /* udivisor = abs (divisor) */
 720         bltui   a3, 2, .Lle_one /* check if udivisor <= 1 */
 721         do_nsau a5, a2, a6, a8  /* udividend_shift = nsau (udividend) */
 722         do_nsau a4, a3, a6, a8  /* udivisor_shift = nsau (udivisor) */
 723         bgeu    a5, a4, .Lspecial
 724
 725         sub     a4, a4, a5      /* count = udivisor_shift - udividend_shift */
 726         ssl     a4
 727         sll     a3, a3          /* udivisor <<= count */
 728
 729         /* test-subtract-and-shift loop */
 730 #if XCHAL_HAVE_LOOPS
 731         loopnez a4, .Lloopend
 732 #endif /* XCHAL_HAVE_LOOPS */
 733 .Lloop:
 734         bltu    a2, a3, .Lzerobit
 735         sub     a2, a2, a3
 736 .Lzerobit:
 737         srli    a3, a3, 1
 738 #if !XCHAL_HAVE_LOOPS
 739         addi    a4, a4, -1
 740         bnez    a4, .Lloop
 741 #endif /* !XCHAL_HAVE_LOOPS */
 742 .Lloopend:
 743
 744 .Lspecial:
 745         bltu    a2, a3, .Lreturn
 746         sub     a2, a2, a3      /* subtract again if udividend >= udivisor */
 747 .Lreturn:
 748         bgez    a7, .Lpositive
 749         neg     a2, a2          /* if (dividend < 0), return -udividend */
 750 .Lpositive:
 751         leaf_return
 752
 753 .Lle_one:
 754         bnez    a3, .Lreturn0
 755
 756         /* Divide by zero: Use an illegal instruction to force an exception.
 757            The subsequent "DIV0" string can be recognized by the exception
 758            handler to identify the real cause of the exception.  */
 759         ill
 760         .ascii  "DIV0"
 761
 762 .Lreturn0:
 763         movi    a2, 0
 764 #endif /* XCHAL_HAVE_DIV32 */
 765         leaf_return
 766         .size   __modsi3, . - __modsi3
 767
 768 #endif /* L_modsi3 */
 769
 770
 771 #ifdef __XTENSA_EB__
 772 #define uh a2
 773 #define ul a3
 774 #else
 775 #define uh a3
 776 #define ul a2
 777 #endif /* __XTENSA_EB__ */
 778
 779
 780 #ifdef L_ashldi3
 781         .align  4
 782         .global __ashldi3
 783         .type   __ashldi3, @function
 784 __ashldi3:
 785         leaf_entry sp, 16
 786         ssl     a4
 787         bgei    a4, 32, .Llow_only
 788         src     uh, uh, ul
 789         sll     ul, ul
 790         leaf_return
 791
 792 .Llow_only:
 793         sll     uh, ul
 794         movi    ul, 0
 795         leaf_return
 796         .size   __ashldi3, . - __ashldi3
 797
 798 #endif /* L_ashldi3 */
 799
 800
 801 #ifdef L_ashrdi3
 802         .align  4
 803         .global __ashrdi3
 804         .type   __ashrdi3, @function
 805 __ashrdi3:
 806         leaf_entry sp, 16
 807         ssr     a4
 808         bgei    a4, 32, .Lhigh_only
 809         src     ul, uh, ul
 810         sra     uh, uh
 811         leaf_return
 812
 813 .Lhigh_only:
 814         sra     ul, uh
 815         srai    uh, uh, 31
 816         leaf_return
 817         .size   __ashrdi3, . - __ashrdi3
 818
 819 #endif /* L_ashrdi3 */
 820
 821
 822 #ifdef L_lshrdi3
 823         .align  4
 824         .global __lshrdi3
 825         .type   __lshrdi3, @function
 826 __lshrdi3:
 827         leaf_entry sp, 16
 828         ssr     a4
 829         bgei    a4, 32, .Lhigh_only1
 830         src     ul, uh, ul
 831         srl     uh, uh
 832         leaf_return
 833
 834 .Lhigh_only1:
 835         srl     ul, uh
 836         movi    uh, 0
 837         leaf_return
 838         .size   __lshrdi3, . - __lshrdi3
 839
 840 #endif /* L_lshrdi3 */
 841
 842
 843 #ifdef L_bswapsi2
 844         .align  4
 845         .global __bswapsi2
 846         .type   __bswapsi2, @function
 847 __bswapsi2:
 848         leaf_entry sp, 16
 849         ssai    8
 850         srli    a3, a2, 16
 851         src     a3, a3, a2
 852         src     a3, a3, a3
 853         src     a2, a2, a3
 854         leaf_return
 855         .size   __bswapsi2, . - __bswapsi2
 856
 857 #endif /* L_bswapsi2 */
 858
 859
 860 #ifdef L_bswapdi2
 861         .align  4
 862         .global __bswapdi2
 863         .type   __bswapdi2, @function
 864 __bswapdi2:
 865         leaf_entry sp, 16
 866         ssai    8
 867         srli    a4, a2, 16
 868         src     a4, a4, a2
 869         src     a4, a4, a4
 870         src     a4, a2, a4
 871         srli    a2, a3, 16
 872         src     a2, a2, a3
 873         src     a2, a2, a2
 874         src     a2, a3, a2
 875         mov     a3, a4
 876         leaf_return
 877         .size   __bswapdi2, . - __bswapdi2
 878
 879 #endif /* L_bswapdi2 */
 880
 881
 882 #include "ieee754-df.S"
 883 #include "ieee754-sf.S"