]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgcc/config/xtensa/lib1funcs.S
gcc: xtensa: implement bswapsi2, bswapdi2 and helpers
[thirdparty/gcc.git] / libgcc / config / xtensa / lib1funcs.S
CommitLineData
03984308 1/* Assembly functions for the Xtensa version of libgcc1.
8d9254fc 2 Copyright (C) 2001-2020 Free Software Foundation, Inc.
03984308
BW
3 Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
748086b7 9Software Foundation; either version 3, or (at your option) any later
03984308
BW
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
748086b7
JJ
17Under Section 7 of GPL version 3, you are granted additional
18permissions described in the GCC Runtime Library Exception, version
193.1, as published by the Free Software Foundation.
20
21You should have received a copy of the GNU General Public License and
22a copy of the GCC Runtime Library Exception along with this program;
23see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24<http://www.gnu.org/licenses/>. */
03984308 25
e677f70c 26#include "xtensa-config.h"
03984308 27
b544aa7b
BW
28/* Define macros for the ABS and ADDX* instructions to handle cases
29 where they are not included in the Xtensa processor configuration. */
6c2e8d1c
BW
30
31 .macro do_abs dst, src, tmp
32#if XCHAL_HAVE_ABS
33 abs \dst, \src
34#else
35 neg \tmp, \src
36 movgez \tmp, \src, \src
37 mov \dst, \tmp
38#endif
39 .endm
40
41 .macro do_addx2 dst, as, at, tmp
42#if XCHAL_HAVE_ADDX
43 addx2 \dst, \as, \at
44#else
45 slli \tmp, \as, 1
46 add \dst, \tmp, \at
47#endif
48 .endm
49
50 .macro do_addx4 dst, as, at, tmp
51#if XCHAL_HAVE_ADDX
52 addx4 \dst, \as, \at
53#else
54 slli \tmp, \as, 2
55 add \dst, \tmp, \at
56#endif
57 .endm
58
59 .macro do_addx8 dst, as, at, tmp
60#if XCHAL_HAVE_ADDX
61 addx8 \dst, \as, \at
62#else
63 slli \tmp, \as, 3
64 add \dst, \tmp, \at
65#endif
66 .endm
67
b544aa7b
BW
68/* Define macros for leaf function entry and return, supporting either the
69 standard register windowed ABI or the non-windowed call0 ABI. These
70 macros do not allocate any extra stack space, so they only work for
71 leaf functions that do not need to spill anything to the stack. */
dfccfad9 72
b7974b3a 73 .macro leaf_entry reg, size
dfccfad9 74#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
03c8b1be 75 entry \reg, \size
dfccfad9
BW
76#else
77 /* do nothing */
78#endif
79 .endm
80
b7974b3a 81 .macro leaf_return
dfccfad9
BW
82#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
83 retw
84#else
85 ret
86#endif
87 .endm
88
89
03984308
BW
90#ifdef L_mulsi3
91 .align 4
92 .global __mulsi3
125253d9 93 .type __mulsi3, @function
03984308 94__mulsi3:
b7974b3a 95 leaf_entry sp, 16
03984308 96
09fa8841
BW
97#if XCHAL_HAVE_MUL32
98 mull a2, a2, a3
99
100#elif XCHAL_HAVE_MUL16
03984308
BW
101 or a4, a2, a3
102 srai a4, a4, 16
103 bnez a4, .LMUL16
104 mul16u a2, a2, a3
b7974b3a 105 leaf_return
03984308
BW
106.LMUL16:
107 srai a4, a2, 16
108 srai a5, a3, 16
109 mul16u a7, a4, a3
110 mul16u a6, a5, a2
111 mul16u a4, a2, a3
112 add a7, a7, a6
113 slli a7, a7, 16
114 add a2, a7, a4
115
116#elif XCHAL_HAVE_MAC16
117 mul.aa.hl a2, a3
118 mula.aa.lh a2, a3
12a8ee33 119 rsr a5, ACCLO
03984308 120 umul.aa.ll a2, a3
12a8ee33 121 rsr a4, ACCLO
03984308
BW
122 slli a5, a5, 16
123 add a2, a4, a5
124
09fa8841 125#else /* !MUL32 && !MUL16 && !MAC16 */
03984308 126
b544aa7b
BW
127 /* Multiply one bit at a time, but unroll the loop 4x to better
128 exploit the addx instructions and avoid overhead.
129 Peel the first iteration to save a cycle on init. */
03984308 130
b544aa7b
BW
131 /* Avoid negative numbers. */
132 xor a5, a2, a3 /* Top bit is 1 if one input is negative. */
6c2e8d1c
BW
133 do_abs a3, a3, a6
134 do_abs a2, a2, a6
03984308 135
b544aa7b 136 /* Swap so the second argument is smaller. */
6c2e8d1c
BW
137 sub a7, a2, a3
138 mov a4, a3
b544aa7b
BW
139 movgez a4, a2, a7 /* a4 = max (a2, a3) */
140 movltz a3, a2, a7 /* a3 = min (a2, a3) */
03984308 141
6c2e8d1c
BW
142 movi a2, 0
143 extui a6, a3, 0, 1
144 movnez a2, a4, a6
03984308 145
6c2e8d1c
BW
146 do_addx2 a7, a4, a2, a7
147 extui a6, a3, 1, 1
148 movnez a2, a7, a6
03984308 149
6c2e8d1c
BW
150 do_addx4 a7, a4, a2, a7
151 extui a6, a3, 2, 1
152 movnez a2, a7, a6
03984308 153
6c2e8d1c
BW
154 do_addx8 a7, a4, a2, a7
155 extui a6, a3, 3, 1
156 movnez a2, a7, a6
03984308 157
6c2e8d1c
BW
158 bgeui a3, 16, .Lmult_main_loop
159 neg a3, a2
160 movltz a2, a3, a5
b7974b3a 161 leaf_return
03984308 162
6c2e8d1c 163 .align 4
03984308 164.Lmult_main_loop:
6c2e8d1c
BW
165 srli a3, a3, 4
166 slli a4, a4, 4
03984308 167
6c2e8d1c
BW
168 add a7, a4, a2
169 extui a6, a3, 0, 1
170 movnez a2, a7, a6
03984308 171
6c2e8d1c
BW
172 do_addx2 a7, a4, a2, a7
173 extui a6, a3, 1, 1
174 movnez a2, a7, a6
03984308 175
6c2e8d1c
BW
176 do_addx4 a7, a4, a2, a7
177 extui a6, a3, 2, 1
178 movnez a2, a7, a6
03984308 179
6c2e8d1c
BW
180 do_addx8 a7, a4, a2, a7
181 extui a6, a3, 3, 1
182 movnez a2, a7, a6
03984308 183
6c2e8d1c 184 bgeui a3, 16, .Lmult_main_loop
03984308 185
6c2e8d1c
BW
186 neg a3, a2
187 movltz a2, a3, a5
03984308 188
09fa8841 189#endif /* !MUL32 && !MUL16 && !MAC16 */
03984308 190
b7974b3a 191 leaf_return
125253d9 192 .size __mulsi3, . - __mulsi3
03984308
BW
193
194#endif /* L_mulsi3 */
195
196
09fa8841 197#ifdef L_umulsidi3
7f0ee694
BW
198
199#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
200#define XCHAL_NO_MUL 1
201#endif
202
09fa8841
BW
203 .align 4
204 .global __umulsidi3
125253d9 205 .type __umulsidi3, @function
09fa8841 206__umulsidi3:
09fa8841 207#if __XTENSA_CALL0_ABI__
7f0ee694 208 leaf_entry sp, 32
09fa8841
BW
209 addi sp, sp, -32
210 s32i a12, sp, 16
211 s32i a13, sp, 20
212 s32i a14, sp, 24
213 s32i a15, sp, 28
7f0ee694
BW
214#elif XCHAL_NO_MUL
215 /* This is not really a leaf function; allocate enough stack space
216 to allow CALL12s to a helper function. */
217 leaf_entry sp, 48
218#else
219 leaf_entry sp, 16
09fa8841
BW
220#endif
221
222#ifdef __XTENSA_EB__
223#define wh a2
224#define wl a3
225#else
226#define wh a3
227#define wl a2
228#endif /* __XTENSA_EB__ */
229
230 /* This code is taken from the mulsf3 routine in ieee754-sf.S.
231 See more comments there. */
232
233#if XCHAL_HAVE_MUL32_HIGH
234 mull a6, a2, a3
235 muluh wh, a2, a3
236 mov wl, a6
237
238#else /* ! MUL32_HIGH */
239
7f0ee694 240#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
09fa8841
BW
241 /* a0 and a8 will be clobbered by calling the multiply function
242 but a8 is not used here and need not be saved. */
243 s32i a0, sp, 0
244#endif
245
246#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
247
248#define a2h a4
249#define a3h a5
250
251 /* Get the high halves of the inputs into registers. */
252 srli a2h, a2, 16
253 srli a3h, a3, 16
254
255#define a2l a2
256#define a3l a3
257
258#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
259 /* Clear the high halves of the inputs. This does not matter
260 for MUL16 because the high bits are ignored. */
261 extui a2, a2, 0, 16
262 extui a3, a3, 0, 16
263#endif
264#endif /* MUL16 || MUL32 */
265
266
267#if XCHAL_HAVE_MUL16
268
269#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
270 mul16u dst, xreg ## xhalf, yreg ## yhalf
271
272#elif XCHAL_HAVE_MUL32
273
274#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
275 mull dst, xreg ## xhalf, yreg ## yhalf
276
277#elif XCHAL_HAVE_MAC16
278
279/* The preprocessor insists on inserting a space when concatenating after
280 a period in the definition of do_mul below. These macros are a workaround
281 using underscores instead of periods when doing the concatenation. */
282#define umul_aa_ll umul.aa.ll
283#define umul_aa_lh umul.aa.lh
284#define umul_aa_hl umul.aa.hl
285#define umul_aa_hh umul.aa.hh
286
287#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
288 umul_aa_ ## xhalf ## yhalf xreg, yreg; \
289 rsr dst, ACCLO
290
291#else /* no multiply hardware */
125253d9 292
09fa8841
BW
293#define set_arg_l(dst, src) \
294 extui dst, src, 0, 16
295#define set_arg_h(dst, src) \
296 srli dst, src, 16
297
7f0ee694 298#if __XTENSA_CALL0_ABI__
09fa8841
BW
299#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
300 set_arg_ ## xhalf (a13, xreg); \
301 set_arg_ ## yhalf (a14, yreg); \
302 call0 .Lmul_mulsi3; \
303 mov dst, a12
7f0ee694
BW
304#else
305#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
306 set_arg_ ## xhalf (a14, xreg); \
307 set_arg_ ## yhalf (a15, yreg); \
308 call12 .Lmul_mulsi3; \
309 mov dst, a14
310#endif /* __XTENSA_CALL0_ABI__ */
311
312#endif /* no multiply hardware */
09fa8841
BW
313
314 /* Add pp1 and pp2 into a6 with carry-out in a9. */
315 do_mul(a6, a2, l, a3, h) /* pp 1 */
316 do_mul(a11, a2, h, a3, l) /* pp 2 */
317 movi a9, 0
318 add a6, a6, a11
319 bgeu a6, a11, 1f
320 addi a9, a9, 1
3211:
322 /* Shift the high half of a9/a6 into position in a9. Note that
323 this value can be safely incremented without any carry-outs. */
324 ssai 16
325 src a9, a9, a6
326
327 /* Compute the low word into a6. */
328 do_mul(a11, a2, l, a3, l) /* pp 0 */
329 sll a6, a6
330 add a6, a6, a11
331 bgeu a6, a11, 1f
332 addi a9, a9, 1
3331:
334 /* Compute the high word into wh. */
335 do_mul(wh, a2, h, a3, h) /* pp 3 */
336 add wh, wh, a9
337 mov wl, a6
338
339#endif /* !MUL32_HIGH */
340
7f0ee694 341#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
05dcbf00
BW
342 /* Restore the original return address. */
343 l32i a0, sp, 0
f8383f28
BW
344#endif
345#if __XTENSA_CALL0_ABI__
346 l32i a12, sp, 16
347 l32i a13, sp, 20
348 l32i a14, sp, 24
349 l32i a15, sp, 28
350 addi sp, sp, 32
05dcbf00 351#endif
09fa8841
BW
352 leaf_return
353
7f0ee694 354#if XCHAL_NO_MUL
125253d9 355
09fa8841
BW
356 /* For Xtensa processors with no multiply hardware, this simplified
357 version of _mulsi3 is used for multiplying 16-bit chunks of
7f0ee694
BW
358 the floating-point mantissas. When using CALL0, this function
359 uses a custom ABI: the inputs are passed in a13 and a14, the
360 result is returned in a12, and a8 and a15 are clobbered. */
09fa8841
BW
361 .align 4
362.Lmul_mulsi3:
7f0ee694
BW
363 leaf_entry sp, 16
364 .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
365 movi \dst, 0
3661: add \tmp1, \src2, \dst
367 extui \tmp2, \src1, 0, 1
368 movnez \dst, \tmp1, \tmp2
369
370 do_addx2 \tmp1, \src2, \dst, \tmp1
371 extui \tmp2, \src1, 1, 1
372 movnez \dst, \tmp1, \tmp2
373
374 do_addx4 \tmp1, \src2, \dst, \tmp1
375 extui \tmp2, \src1, 2, 1
376 movnez \dst, \tmp1, \tmp2
377
378 do_addx8 \tmp1, \src2, \dst, \tmp1
379 extui \tmp2, \src1, 3, 1
380 movnez \dst, \tmp1, \tmp2
381
382 srli \src1, \src1, 4
383 slli \src2, \src2, 4
384 bnez \src1, 1b
385 .endm
386#if __XTENSA_CALL0_ABI__
387 mul_mulsi3_body a12, a13, a14, a15, a8
388#else
389 /* The result will be written into a2, so save that argument in a4. */
390 mov a4, a2
391 mul_mulsi3_body a2, a4, a3, a5, a6
392#endif
393 leaf_return
394#endif /* XCHAL_NO_MUL */
09fa8841 395
125253d9 396 .size __umulsidi3, . - __umulsidi3
09fa8841
BW
397
398#endif /* L_umulsidi3 */
399
400
b544aa7b
BW
401/* Define a macro for the NSAU (unsigned normalize shift amount)
402 instruction, which computes the number of leading zero bits,
403 to handle cases where it is not included in the Xtensa processor
404 configuration. */
07154156 405
6c2e8d1c
BW
406 .macro do_nsau cnt, val, tmp, a
407#if XCHAL_HAVE_NSA
408 nsau \cnt, \val
409#else
03984308
BW
410 mov \a, \val
411 movi \cnt, 0
412 extui \tmp, \a, 16, 16
413 bnez \tmp, 0f
414 movi \cnt, 16
415 slli \a, \a, 16
125253d9 4160:
03984308
BW
417 extui \tmp, \a, 24, 8
418 bnez \tmp, 1f
419 addi \cnt, \cnt, 8
420 slli \a, \a, 8
125253d9 4211:
03984308
BW
422 movi \tmp, __nsau_data
423 extui \a, \a, 24, 8
424 add \tmp, \tmp, \a
425 l8ui \tmp, \tmp, 0
426 add \cnt, \cnt, \tmp
03984308 427#endif /* !XCHAL_HAVE_NSA */
6c2e8d1c 428 .endm
03984308 429
09fa8841 430#ifdef L_clz
03984308
BW
431 .section .rodata
432 .align 4
433 .global __nsau_data
125253d9
BW
434 .type __nsau_data, @object
435__nsau_data:
03984308
BW
436#if !XCHAL_HAVE_NSA
437 .byte 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
438 .byte 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
439 .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
440 .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
441 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
442 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
443 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
444 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
445 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
446 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
447 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
448 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
449 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
450 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
451 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
452 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
453#endif /* !XCHAL_HAVE_NSA */
125253d9 454 .size __nsau_data, . - __nsau_data
03984308 455 .hidden __nsau_data
09fa8841
BW
456#endif /* L_clz */
457
458
459#ifdef L_clzsi2
460 .align 4
461 .global __clzsi2
125253d9 462 .type __clzsi2, @function
09fa8841
BW
463__clzsi2:
464 leaf_entry sp, 16
465 do_nsau a2, a2, a3, a4
466 leaf_return
125253d9 467 .size __clzsi2, . - __clzsi2
09fa8841
BW
468
469#endif /* L_clzsi2 */
470
471
472#ifdef L_ctzsi2
473 .align 4
474 .global __ctzsi2
125253d9 475 .type __ctzsi2, @function
09fa8841
BW
476__ctzsi2:
477 leaf_entry sp, 16
478 neg a3, a2
479 and a3, a3, a2
480 do_nsau a2, a3, a4, a5
481 neg a2, a2
482 addi a2, a2, 31
483 leaf_return
125253d9 484 .size __ctzsi2, . - __ctzsi2
09fa8841
BW
485
486#endif /* L_ctzsi2 */
487
488
489#ifdef L_ffssi2
490 .align 4
491 .global __ffssi2
125253d9 492 .type __ffssi2, @function
09fa8841
BW
493__ffssi2:
494 leaf_entry sp, 16
495 neg a3, a2
496 and a3, a3, a2
497 do_nsau a2, a3, a4, a5
498 neg a2, a2
499 addi a2, a2, 32
500 leaf_return
125253d9 501 .size __ffssi2, . - __ffssi2
09fa8841
BW
502
503#endif /* L_ffssi2 */
03984308
BW
504
505
506#ifdef L_udivsi3
507 .align 4
508 .global __udivsi3
125253d9 509 .type __udivsi3, @function
03984308 510__udivsi3:
b7974b3a 511 leaf_entry sp, 16
582711fe
BW
512#if XCHAL_HAVE_DIV32
513 quou a2, a2, a3
514#else
b544aa7b 515 bltui a3, 2, .Lle_one /* check if the divisor <= 1 */
03984308 516
b544aa7b
BW
517 mov a6, a2 /* keep dividend in a6 */
518 do_nsau a5, a6, a2, a7 /* dividend_shift = nsau (dividend) */
519 do_nsau a4, a3, a2, a7 /* divisor_shift = nsau (divisor) */
03984308
BW
520 bgeu a5, a4, .Lspecial
521
b544aa7b 522 sub a4, a4, a5 /* count = divisor_shift - dividend_shift */
03984308 523 ssl a4
b544aa7b
BW
524 sll a3, a3 /* divisor <<= count */
525 movi a2, 0 /* quotient = 0 */
03984308 526
b544aa7b 527 /* test-subtract-and-shift loop; one quotient bit on each iteration */
e2fb85da 528#if XCHAL_HAVE_LOOPS
03984308 529 loopnez a4, .Lloopend
e2fb85da
BW
530#endif /* XCHAL_HAVE_LOOPS */
531.Lloop:
03984308
BW
532 bltu a6, a3, .Lzerobit
533 sub a6, a6, a3
534 addi a2, a2, 1
535.Lzerobit:
536 slli a2, a2, 1
537 srli a3, a3, 1
e2fb85da
BW
538#if !XCHAL_HAVE_LOOPS
539 addi a4, a4, -1
540 bnez a4, .Lloop
541#endif /* !XCHAL_HAVE_LOOPS */
03984308
BW
542.Lloopend:
543
544 bltu a6, a3, .Lreturn
b544aa7b 545 addi a2, a2, 1 /* increment quotient if dividend >= divisor */
03984308 546.Lreturn:
b7974b3a 547 leaf_return
03984308 548
53e0077e 549.Lle_one:
b544aa7b 550 beqz a3, .Lerror /* if divisor == 1, return the dividend */
b7974b3a 551 leaf_return
53e0077e 552
03984308 553.Lspecial:
b544aa7b 554 /* return dividend >= divisor */
53e0077e 555 bltu a6, a3, .Lreturn0
03984308 556 movi a2, 1
b7974b3a 557 leaf_return
03984308 558
03984308 559.Lerror:
a2c2290b
BW
560 /* Divide by zero: Use an illegal instruction to force an exception.
561 The subsequent "DIV0" string can be recognized by the exception
562 handler to identify the real cause of the exception. */
563 ill
564 .ascii "DIV0"
53e0077e
BW
565
566.Lreturn0:
567 movi a2, 0
582711fe 568#endif /* XCHAL_HAVE_DIV32 */
b7974b3a 569 leaf_return
125253d9 570 .size __udivsi3, . - __udivsi3
03984308
BW
571
572#endif /* L_udivsi3 */
573
574
575#ifdef L_divsi3
576 .align 4
577 .global __divsi3
125253d9 578 .type __divsi3, @function
03984308 579__divsi3:
b7974b3a 580 leaf_entry sp, 16
582711fe
BW
581#if XCHAL_HAVE_DIV32
582 quos a2, a2, a3
583#else
b544aa7b
BW
584 xor a7, a2, a3 /* sign = dividend ^ divisor */
585 do_abs a6, a2, a4 /* udividend = abs (dividend) */
586 do_abs a3, a3, a4 /* udivisor = abs (divisor) */
587 bltui a3, 2, .Lle_one /* check if udivisor <= 1 */
588 do_nsau a5, a6, a2, a8 /* udividend_shift = nsau (udividend) */
589 do_nsau a4, a3, a2, a8 /* udivisor_shift = nsau (udivisor) */
03984308
BW
590 bgeu a5, a4, .Lspecial
591
b544aa7b 592 sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */
03984308 593 ssl a4
b544aa7b
BW
594 sll a3, a3 /* udivisor <<= count */
595 movi a2, 0 /* quotient = 0 */
03984308 596
b544aa7b 597 /* test-subtract-and-shift loop; one quotient bit on each iteration */
e2fb85da 598#if XCHAL_HAVE_LOOPS
03984308 599 loopnez a4, .Lloopend
e2fb85da
BW
600#endif /* XCHAL_HAVE_LOOPS */
601.Lloop:
03984308
BW
602 bltu a6, a3, .Lzerobit
603 sub a6, a6, a3
604 addi a2, a2, 1
605.Lzerobit:
606 slli a2, a2, 1
607 srli a3, a3, 1
e2fb85da
BW
608#if !XCHAL_HAVE_LOOPS
609 addi a4, a4, -1
610 bnez a4, .Lloop
611#endif /* !XCHAL_HAVE_LOOPS */
03984308
BW
612.Lloopend:
613
614 bltu a6, a3, .Lreturn
b544aa7b 615 addi a2, a2, 1 /* increment if udividend >= udivisor */
03984308
BW
616.Lreturn:
617 neg a5, a2
b544aa7b 618 movltz a2, a5, a7 /* return (sign < 0) ? -quotient : quotient */
b7974b3a 619 leaf_return
03984308 620
53e0077e
BW
621.Lle_one:
622 beqz a3, .Lerror
b544aa7b
BW
623 neg a2, a6 /* if udivisor == 1, then return... */
624 movgez a2, a6, a7 /* (sign < 0) ? -udividend : udividend */
b7974b3a 625 leaf_return
53e0077e 626
03984308 627.Lspecial:
b544aa7b 628 bltu a6, a3, .Lreturn0 /* if dividend < divisor, return 0 */
03984308
BW
629 movi a2, 1
630 movi a4, -1
b544aa7b 631 movltz a2, a4, a7 /* else return (sign < 0) ? -1 : 1 */
b7974b3a 632 leaf_return
03984308 633
03984308 634.Lerror:
a2c2290b
BW
635 /* Divide by zero: Use an illegal instruction to force an exception.
636 The subsequent "DIV0" string can be recognized by the exception
637 handler to identify the real cause of the exception. */
638 ill
639 .ascii "DIV0"
53e0077e
BW
640
641.Lreturn0:
642 movi a2, 0
582711fe 643#endif /* XCHAL_HAVE_DIV32 */
b7974b3a 644 leaf_return
125253d9 645 .size __divsi3, . - __divsi3
03984308
BW
646
647#endif /* L_divsi3 */
648
649
650#ifdef L_umodsi3
651 .align 4
652 .global __umodsi3
125253d9 653 .type __umodsi3, @function
03984308 654__umodsi3:
b7974b3a 655 leaf_entry sp, 16
582711fe
BW
656#if XCHAL_HAVE_DIV32
657 remu a2, a2, a3
658#else
b544aa7b 659 bltui a3, 2, .Lle_one /* check if the divisor is <= 1 */
03984308 660
b544aa7b
BW
661 do_nsau a5, a2, a6, a7 /* dividend_shift = nsau (dividend) */
662 do_nsau a4, a3, a6, a7 /* divisor_shift = nsau (divisor) */
03984308
BW
663 bgeu a5, a4, .Lspecial
664
b544aa7b 665 sub a4, a4, a5 /* count = divisor_shift - dividend_shift */
03984308 666 ssl a4
b544aa7b 667 sll a3, a3 /* divisor <<= count */
03984308 668
b544aa7b 669 /* test-subtract-and-shift loop */
e2fb85da 670#if XCHAL_HAVE_LOOPS
03984308 671 loopnez a4, .Lloopend
e2fb85da
BW
672#endif /* XCHAL_HAVE_LOOPS */
673.Lloop:
03984308
BW
674 bltu a2, a3, .Lzerobit
675 sub a2, a2, a3
676.Lzerobit:
677 srli a3, a3, 1
e2fb85da
BW
678#if !XCHAL_HAVE_LOOPS
679 addi a4, a4, -1
680 bnez a4, .Lloop
681#endif /* !XCHAL_HAVE_LOOPS */
03984308
BW
682.Lloopend:
683
53e0077e 684.Lspecial:
03984308 685 bltu a2, a3, .Lreturn
b544aa7b 686 sub a2, a2, a3 /* subtract once more if dividend >= divisor */
03984308 687.Lreturn:
b7974b3a 688 leaf_return
03984308 689
03984308 690.Lle_one:
0a21c1d2 691 bnez a3, .Lreturn0
a2c2290b 692
a2c2290b
BW
693 /* Divide by zero: Use an illegal instruction to force an exception.
694 The subsequent "DIV0" string can be recognized by the exception
695 handler to identify the real cause of the exception. */
696 ill
697 .ascii "DIV0"
0a21c1d2
BW
698
699.Lreturn0:
700 movi a2, 0
582711fe 701#endif /* XCHAL_HAVE_DIV32 */
0a21c1d2 702 leaf_return
125253d9 703 .size __umodsi3, . - __umodsi3
03984308
BW
704
705#endif /* L_umodsi3 */
706
707
708#ifdef L_modsi3
709 .align 4
710 .global __modsi3
125253d9 711 .type __modsi3, @function
03984308 712__modsi3:
b7974b3a 713 leaf_entry sp, 16
582711fe
BW
714#if XCHAL_HAVE_DIV32
715 rems a2, a2, a3
716#else
b544aa7b
BW
717 mov a7, a2 /* save original (signed) dividend */
718 do_abs a2, a2, a4 /* udividend = abs (dividend) */
719 do_abs a3, a3, a4 /* udivisor = abs (divisor) */
720 bltui a3, 2, .Lle_one /* check if udivisor <= 1 */
721 do_nsau a5, a2, a6, a8 /* udividend_shift = nsau (udividend) */
722 do_nsau a4, a3, a6, a8 /* udivisor_shift = nsau (udivisor) */
03984308
BW
723 bgeu a5, a4, .Lspecial
724
b544aa7b 725 sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */
03984308 726 ssl a4
b544aa7b 727 sll a3, a3 /* udivisor <<= count */
03984308 728
b544aa7b 729 /* test-subtract-and-shift loop */
e2fb85da 730#if XCHAL_HAVE_LOOPS
03984308 731 loopnez a4, .Lloopend
e2fb85da
BW
732#endif /* XCHAL_HAVE_LOOPS */
733.Lloop:
03984308
BW
734 bltu a2, a3, .Lzerobit
735 sub a2, a2, a3
736.Lzerobit:
737 srli a3, a3, 1
e2fb85da
BW
738#if !XCHAL_HAVE_LOOPS
739 addi a4, a4, -1
740 bnez a4, .Lloop
741#endif /* !XCHAL_HAVE_LOOPS */
03984308
BW
742.Lloopend:
743
53e0077e 744.Lspecial:
03984308 745 bltu a2, a3, .Lreturn
b544aa7b 746 sub a2, a2, a3 /* subtract again if udividend >= udivisor */
03984308
BW
747.Lreturn:
748 bgez a7, .Lpositive
b544aa7b 749 neg a2, a2 /* if (dividend < 0), return -udividend */
125253d9 750.Lpositive:
b7974b3a 751 leaf_return
03984308 752
03984308 753.Lle_one:
0a21c1d2 754 bnez a3, .Lreturn0
a2c2290b 755
a2c2290b
BW
756 /* Divide by zero: Use an illegal instruction to force an exception.
757 The subsequent "DIV0" string can be recognized by the exception
758 handler to identify the real cause of the exception. */
759 ill
760 .ascii "DIV0"
0a21c1d2
BW
761
762.Lreturn0:
763 movi a2, 0
582711fe 764#endif /* XCHAL_HAVE_DIV32 */
0a21c1d2 765 leaf_return
125253d9 766 .size __modsi3, . - __modsi3
03984308
BW
767
768#endif /* L_modsi3 */
134c8a50 769
582711fe
BW
770
771#ifdef __XTENSA_EB__
772#define uh a2
773#define ul a3
774#else
775#define uh a3
776#define ul a2
777#endif /* __XTENSA_EB__ */
778
779
780#ifdef L_ashldi3
781 .align 4
782 .global __ashldi3
783 .type __ashldi3, @function
784__ashldi3:
785 leaf_entry sp, 16
786 ssl a4
787 bgei a4, 32, .Llow_only
788 src uh, uh, ul
789 sll ul, ul
790 leaf_return
791
792.Llow_only:
793 sll uh, ul
794 movi ul, 0
795 leaf_return
796 .size __ashldi3, . - __ashldi3
797
798#endif /* L_ashldi3 */
799
800
801#ifdef L_ashrdi3
802 .align 4
803 .global __ashrdi3
804 .type __ashrdi3, @function
805__ashrdi3:
806 leaf_entry sp, 16
807 ssr a4
808 bgei a4, 32, .Lhigh_only
809 src ul, uh, ul
810 sra uh, uh
811 leaf_return
812
813.Lhigh_only:
814 sra ul, uh
815 srai uh, uh, 31
816 leaf_return
817 .size __ashrdi3, . - __ashrdi3
818
819#endif /* L_ashrdi3 */
820
821
822#ifdef L_lshrdi3
823 .align 4
824 .global __lshrdi3
825 .type __lshrdi3, @function
826__lshrdi3:
827 leaf_entry sp, 16
828 ssr a4
829 bgei a4, 32, .Lhigh_only1
830 src ul, uh, ul
831 srl uh, uh
832 leaf_return
833
834.Lhigh_only1:
835 srl ul, uh
836 movi uh, 0
837 leaf_return
838 .size __lshrdi3, . - __lshrdi3
839
840#endif /* L_lshrdi3 */
841
842
18e86fae
MF
843#ifdef L_bswapsi2
844 .align 4
845 .global __bswapsi2
846 .type __bswapsi2, @function
847__bswapsi2:
848 leaf_entry sp, 16
849 ssai 8
850 srli a3, a2, 16
851 src a3, a3, a2
852 src a3, a3, a3
853 src a2, a2, a3
854 leaf_return
855 .size __bswapsi2, . - __bswapsi2
856
857#endif /* L_bswapsi2 */
858
859
860#ifdef L_bswapdi2
861 .align 4
862 .global __bswapdi2
863 .type __bswapdi2, @function
864__bswapdi2:
865 leaf_entry sp, 16
866 ssai 8
867 srli a4, a2, 16
868 src a4, a4, a2
869 src a4, a4, a4
870 src a4, a2, a4
871 srli a2, a3, 16
872 src a2, a2, a3
873 src a2, a2, a2
874 src a2, a3, a2
875 mov a3, a4
876 leaf_return
877 .size __bswapdi2, . - __bswapdi2
878
879#endif /* L_bswapdi2 */
880
881
134c8a50
BW
882#include "ieee754-df.S"
883#include "ieee754-sf.S"