]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgcc/config/xtensa/lib1funcs.S
Update copyright years.
[thirdparty/gcc.git] / libgcc / config / xtensa / lib1funcs.S
1 /* Assembly functions for the Xtensa version of libgcc1.
2 Copyright (C) 2001-2021 Free Software Foundation, Inc.
3 Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 #include "xtensa-config.h"
27
28 /* Define macros for the ABS and ADDX* instructions to handle cases
29 where they are not included in the Xtensa processor configuration. */
30
31 .macro do_abs dst, src, tmp
32 #if XCHAL_HAVE_ABS
33 abs \dst, \src
34 #else
35 neg \tmp, \src
36 movgez \tmp, \src, \src
37 mov \dst, \tmp
38 #endif
39 .endm
40
41 .macro do_addx2 dst, as, at, tmp
42 #if XCHAL_HAVE_ADDX
43 addx2 \dst, \as, \at
44 #else
45 slli \tmp, \as, 1
46 add \dst, \tmp, \at
47 #endif
48 .endm
49
50 .macro do_addx4 dst, as, at, tmp
51 #if XCHAL_HAVE_ADDX
52 addx4 \dst, \as, \at
53 #else
54 slli \tmp, \as, 2
55 add \dst, \tmp, \at
56 #endif
57 .endm
58
59 .macro do_addx8 dst, as, at, tmp
60 #if XCHAL_HAVE_ADDX
61 addx8 \dst, \as, \at
62 #else
63 slli \tmp, \as, 3
64 add \dst, \tmp, \at
65 #endif
66 .endm
67
68 /* Define macros for leaf function entry and return, supporting either the
69 standard register windowed ABI or the non-windowed call0 ABI. These
70 macros do not allocate any extra stack space, so they only work for
71 leaf functions that do not need to spill anything to the stack. */
72
73 .macro leaf_entry reg, size
74 #if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
75 entry \reg, \size
76 #else
77 /* do nothing */
78 #endif
79 .endm
80
81 .macro leaf_return
82 #if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
83 retw
84 #else
85 ret
86 #endif
87 .endm
88
89
90 #ifdef L_mulsi3
91 .align 4
92 .global __mulsi3
93 .type __mulsi3, @function
94 __mulsi3:
95 leaf_entry sp, 16
96
97 #if XCHAL_HAVE_MUL32
98 mull a2, a2, a3
99
100 #elif XCHAL_HAVE_MUL16
101 or a4, a2, a3
102 srai a4, a4, 16
103 bnez a4, .LMUL16
104 mul16u a2, a2, a3
105 leaf_return
106 .LMUL16:
107 srai a4, a2, 16
108 srai a5, a3, 16
109 mul16u a7, a4, a3
110 mul16u a6, a5, a2
111 mul16u a4, a2, a3
112 add a7, a7, a6
113 slli a7, a7, 16
114 add a2, a7, a4
115
116 #elif XCHAL_HAVE_MAC16
117 mul.aa.hl a2, a3
118 mula.aa.lh a2, a3
119 rsr a5, ACCLO
120 umul.aa.ll a2, a3
121 rsr a4, ACCLO
122 slli a5, a5, 16
123 add a2, a4, a5
124
125 #else /* !MUL32 && !MUL16 && !MAC16 */
126
127 /* Multiply one bit at a time, but unroll the loop 4x to better
128 exploit the addx instructions and avoid overhead.
129 Peel the first iteration to save a cycle on init. */
130
131 /* Avoid negative numbers. */
132 xor a5, a2, a3 /* Top bit is 1 if one input is negative. */
133 do_abs a3, a3, a6
134 do_abs a2, a2, a6
135
136 /* Swap so the second argument is smaller. */
137 sub a7, a2, a3
138 mov a4, a3
139 movgez a4, a2, a7 /* a4 = max (a2, a3) */
140 movltz a3, a2, a7 /* a3 = min (a2, a3) */
141
142 movi a2, 0
143 extui a6, a3, 0, 1
144 movnez a2, a4, a6
145
146 do_addx2 a7, a4, a2, a7
147 extui a6, a3, 1, 1
148 movnez a2, a7, a6
149
150 do_addx4 a7, a4, a2, a7
151 extui a6, a3, 2, 1
152 movnez a2, a7, a6
153
154 do_addx8 a7, a4, a2, a7
155 extui a6, a3, 3, 1
156 movnez a2, a7, a6
157
158 bgeui a3, 16, .Lmult_main_loop
159 neg a3, a2
160 movltz a2, a3, a5
161 leaf_return
162
163 .align 4
164 .Lmult_main_loop:
165 srli a3, a3, 4
166 slli a4, a4, 4
167
168 add a7, a4, a2
169 extui a6, a3, 0, 1
170 movnez a2, a7, a6
171
172 do_addx2 a7, a4, a2, a7
173 extui a6, a3, 1, 1
174 movnez a2, a7, a6
175
176 do_addx4 a7, a4, a2, a7
177 extui a6, a3, 2, 1
178 movnez a2, a7, a6
179
180 do_addx8 a7, a4, a2, a7
181 extui a6, a3, 3, 1
182 movnez a2, a7, a6
183
184 bgeui a3, 16, .Lmult_main_loop
185
186 neg a3, a2
187 movltz a2, a3, a5
188
189 #endif /* !MUL32 && !MUL16 && !MAC16 */
190
191 leaf_return
192 .size __mulsi3, . - __mulsi3
193
194 #endif /* L_mulsi3 */
195
196
197 #ifdef L_umulsidi3
198
199 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
200 #define XCHAL_NO_MUL 1
201 #endif
202
203 .align 4
204 .global __umulsidi3
205 .type __umulsidi3, @function
206 __umulsidi3:
207 #if __XTENSA_CALL0_ABI__
208 leaf_entry sp, 32
209 addi sp, sp, -32
210 s32i a12, sp, 16
211 s32i a13, sp, 20
212 s32i a14, sp, 24
213 s32i a15, sp, 28
214 #elif XCHAL_NO_MUL
215 /* This is not really a leaf function; allocate enough stack space
216 to allow CALL12s to a helper function. */
217 leaf_entry sp, 48
218 #else
219 leaf_entry sp, 16
220 #endif
221
222 #ifdef __XTENSA_EB__
223 #define wh a2
224 #define wl a3
225 #else
226 #define wh a3
227 #define wl a2
228 #endif /* __XTENSA_EB__ */
229
230 /* This code is taken from the mulsf3 routine in ieee754-sf.S.
231 See more comments there. */
232
233 #if XCHAL_HAVE_MUL32_HIGH
234 mull a6, a2, a3
235 muluh wh, a2, a3
236 mov wl, a6
237
238 #else /* ! MUL32_HIGH */
239
240 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
241 /* a0 and a8 will be clobbered by calling the multiply function
242 but a8 is not used here and need not be saved. */
243 s32i a0, sp, 0
244 #endif
245
246 #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
247
248 #define a2h a4
249 #define a3h a5
250
251 /* Get the high halves of the inputs into registers. */
252 srli a2h, a2, 16
253 srli a3h, a3, 16
254
255 #define a2l a2
256 #define a3l a3
257
258 #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
259 /* Clear the high halves of the inputs. This does not matter
260 for MUL16 because the high bits are ignored. */
261 extui a2, a2, 0, 16
262 extui a3, a3, 0, 16
263 #endif
264 #endif /* MUL16 || MUL32 */
265
266
267 #if XCHAL_HAVE_MUL16
268
269 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
270 mul16u dst, xreg ## xhalf, yreg ## yhalf
271
272 #elif XCHAL_HAVE_MUL32
273
274 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
275 mull dst, xreg ## xhalf, yreg ## yhalf
276
277 #elif XCHAL_HAVE_MAC16
278
279 /* The preprocessor insists on inserting a space when concatenating after
280 a period in the definition of do_mul below. These macros are a workaround
281 using underscores instead of periods when doing the concatenation. */
282 #define umul_aa_ll umul.aa.ll
283 #define umul_aa_lh umul.aa.lh
284 #define umul_aa_hl umul.aa.hl
285 #define umul_aa_hh umul.aa.hh
286
287 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
288 umul_aa_ ## xhalf ## yhalf xreg, yreg; \
289 rsr dst, ACCLO
290
291 #else /* no multiply hardware */
292
293 #define set_arg_l(dst, src) \
294 extui dst, src, 0, 16
295 #define set_arg_h(dst, src) \
296 srli dst, src, 16
297
298 #if __XTENSA_CALL0_ABI__
299 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
300 set_arg_ ## xhalf (a13, xreg); \
301 set_arg_ ## yhalf (a14, yreg); \
302 call0 .Lmul_mulsi3; \
303 mov dst, a12
304 #else
305 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
306 set_arg_ ## xhalf (a14, xreg); \
307 set_arg_ ## yhalf (a15, yreg); \
308 call12 .Lmul_mulsi3; \
309 mov dst, a14
310 #endif /* __XTENSA_CALL0_ABI__ */
311
312 #endif /* no multiply hardware */
313
314 /* Add pp1 and pp2 into a6 with carry-out in a9. */
315 do_mul(a6, a2, l, a3, h) /* pp 1 */
316 do_mul(a11, a2, h, a3, l) /* pp 2 */
317 movi a9, 0
318 add a6, a6, a11
319 bgeu a6, a11, 1f
320 addi a9, a9, 1
321 1:
322 /* Shift the high half of a9/a6 into position in a9. Note that
323 this value can be safely incremented without any carry-outs. */
324 ssai 16
325 src a9, a9, a6
326
327 /* Compute the low word into a6. */
328 do_mul(a11, a2, l, a3, l) /* pp 0 */
329 sll a6, a6
330 add a6, a6, a11
331 bgeu a6, a11, 1f
332 addi a9, a9, 1
333 1:
334 /* Compute the high word into wh. */
335 do_mul(wh, a2, h, a3, h) /* pp 3 */
336 add wh, wh, a9
337 mov wl, a6
338
339 #endif /* !MUL32_HIGH */
340
341 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
342 /* Restore the original return address. */
343 l32i a0, sp, 0
344 #endif
345 #if __XTENSA_CALL0_ABI__
346 l32i a12, sp, 16
347 l32i a13, sp, 20
348 l32i a14, sp, 24
349 l32i a15, sp, 28
350 addi sp, sp, 32
351 #endif
352 leaf_return
353
354 #if XCHAL_NO_MUL
355
356 /* For Xtensa processors with no multiply hardware, this simplified
357 version of _mulsi3 is used for multiplying 16-bit chunks of
358 the floating-point mantissas. When using CALL0, this function
359 uses a custom ABI: the inputs are passed in a13 and a14, the
360 result is returned in a12, and a8 and a15 are clobbered. */
361 .align 4
362 .Lmul_mulsi3:
363 leaf_entry sp, 16
364 .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
365 movi \dst, 0
366 1: add \tmp1, \src2, \dst
367 extui \tmp2, \src1, 0, 1
368 movnez \dst, \tmp1, \tmp2
369
370 do_addx2 \tmp1, \src2, \dst, \tmp1
371 extui \tmp2, \src1, 1, 1
372 movnez \dst, \tmp1, \tmp2
373
374 do_addx4 \tmp1, \src2, \dst, \tmp1
375 extui \tmp2, \src1, 2, 1
376 movnez \dst, \tmp1, \tmp2
377
378 do_addx8 \tmp1, \src2, \dst, \tmp1
379 extui \tmp2, \src1, 3, 1
380 movnez \dst, \tmp1, \tmp2
381
382 srli \src1, \src1, 4
383 slli \src2, \src2, 4
384 bnez \src1, 1b
385 .endm
386 #if __XTENSA_CALL0_ABI__
387 mul_mulsi3_body a12, a13, a14, a15, a8
388 #else
389 /* The result will be written into a2, so save that argument in a4. */
390 mov a4, a2
391 mul_mulsi3_body a2, a4, a3, a5, a6
392 #endif
393 leaf_return
394 #endif /* XCHAL_NO_MUL */
395
396 .size __umulsidi3, . - __umulsidi3
397
398 #endif /* L_umulsidi3 */
399
400
401 /* Define a macro for the NSAU (unsigned normalize shift amount)
402 instruction, which computes the number of leading zero bits,
403 to handle cases where it is not included in the Xtensa processor
404 configuration. */
405
406 .macro do_nsau cnt, val, tmp, a
407 #if XCHAL_HAVE_NSA
408 nsau \cnt, \val
409 #else
410 mov \a, \val
411 movi \cnt, 0
412 extui \tmp, \a, 16, 16
413 bnez \tmp, 0f
414 movi \cnt, 16
415 slli \a, \a, 16
416 0:
417 extui \tmp, \a, 24, 8
418 bnez \tmp, 1f
419 addi \cnt, \cnt, 8
420 slli \a, \a, 8
421 1:
422 movi \tmp, __nsau_data
423 extui \a, \a, 24, 8
424 add \tmp, \tmp, \a
425 l8ui \tmp, \tmp, 0
426 add \cnt, \cnt, \tmp
427 #endif /* !XCHAL_HAVE_NSA */
428 .endm
429
430 #ifdef L_clz
431 .section .rodata
432 .align 4
433 .global __nsau_data
434 .type __nsau_data, @object
435 __nsau_data:
436 #if !XCHAL_HAVE_NSA
437 .byte 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
438 .byte 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
439 .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
440 .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
441 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
442 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
443 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
444 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
445 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
446 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
447 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
448 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
449 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
450 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
451 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
452 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
453 #endif /* !XCHAL_HAVE_NSA */
454 .size __nsau_data, . - __nsau_data
455 .hidden __nsau_data
456 #endif /* L_clz */
457
458
459 #ifdef L_clzsi2
460 .align 4
461 .global __clzsi2
462 .type __clzsi2, @function
463 __clzsi2:
464 leaf_entry sp, 16
465 do_nsau a2, a2, a3, a4
466 leaf_return
467 .size __clzsi2, . - __clzsi2
468
469 #endif /* L_clzsi2 */
470
471
472 #ifdef L_ctzsi2
473 .align 4
474 .global __ctzsi2
475 .type __ctzsi2, @function
476 __ctzsi2:
477 leaf_entry sp, 16
478 neg a3, a2
479 and a3, a3, a2
480 do_nsau a2, a3, a4, a5
481 neg a2, a2
482 addi a2, a2, 31
483 leaf_return
484 .size __ctzsi2, . - __ctzsi2
485
486 #endif /* L_ctzsi2 */
487
488
489 #ifdef L_ffssi2
490 .align 4
491 .global __ffssi2
492 .type __ffssi2, @function
493 __ffssi2:
494 leaf_entry sp, 16
495 neg a3, a2
496 and a3, a3, a2
497 do_nsau a2, a3, a4, a5
498 neg a2, a2
499 addi a2, a2, 32
500 leaf_return
501 .size __ffssi2, . - __ffssi2
502
503 #endif /* L_ffssi2 */
504
505
506 #ifdef L_udivsi3
507 .align 4
508 .global __udivsi3
509 .type __udivsi3, @function
510 __udivsi3:
511 leaf_entry sp, 16
512 #if XCHAL_HAVE_DIV32
513 quou a2, a2, a3
514 #else
515 bltui a3, 2, .Lle_one /* check if the divisor <= 1 */
516
517 mov a6, a2 /* keep dividend in a6 */
518 do_nsau a5, a6, a2, a7 /* dividend_shift = nsau (dividend) */
519 do_nsau a4, a3, a2, a7 /* divisor_shift = nsau (divisor) */
520 bgeu a5, a4, .Lspecial
521
522 sub a4, a4, a5 /* count = divisor_shift - dividend_shift */
523 ssl a4
524 sll a3, a3 /* divisor <<= count */
525 movi a2, 0 /* quotient = 0 */
526
527 /* test-subtract-and-shift loop; one quotient bit on each iteration */
528 #if XCHAL_HAVE_LOOPS
529 loopnez a4, .Lloopend
530 #endif /* XCHAL_HAVE_LOOPS */
531 .Lloop:
532 bltu a6, a3, .Lzerobit
533 sub a6, a6, a3
534 addi a2, a2, 1
535 .Lzerobit:
536 slli a2, a2, 1
537 srli a3, a3, 1
538 #if !XCHAL_HAVE_LOOPS
539 addi a4, a4, -1
540 bnez a4, .Lloop
541 #endif /* !XCHAL_HAVE_LOOPS */
542 .Lloopend:
543
544 bltu a6, a3, .Lreturn
545 addi a2, a2, 1 /* increment quotient if dividend >= divisor */
546 .Lreturn:
547 leaf_return
548
549 .Lle_one:
550 beqz a3, .Lerror /* if divisor == 1, return the dividend */
551 leaf_return
552
553 .Lspecial:
554 /* return dividend >= divisor */
555 bltu a6, a3, .Lreturn0
556 movi a2, 1
557 leaf_return
558
559 .Lerror:
560 /* Divide by zero: Use an illegal instruction to force an exception.
561 The subsequent "DIV0" string can be recognized by the exception
562 handler to identify the real cause of the exception. */
563 ill
564 .ascii "DIV0"
565
566 .Lreturn0:
567 movi a2, 0
568 #endif /* XCHAL_HAVE_DIV32 */
569 leaf_return
570 .size __udivsi3, . - __udivsi3
571
572 #endif /* L_udivsi3 */
573
574
575 #ifdef L_divsi3
576 .align 4
577 .global __divsi3
578 .type __divsi3, @function
579 __divsi3:
580 leaf_entry sp, 16
581 #if XCHAL_HAVE_DIV32
582 quos a2, a2, a3
583 #else
584 xor a7, a2, a3 /* sign = dividend ^ divisor */
585 do_abs a6, a2, a4 /* udividend = abs (dividend) */
586 do_abs a3, a3, a4 /* udivisor = abs (divisor) */
587 bltui a3, 2, .Lle_one /* check if udivisor <= 1 */
588 do_nsau a5, a6, a2, a8 /* udividend_shift = nsau (udividend) */
589 do_nsau a4, a3, a2, a8 /* udivisor_shift = nsau (udivisor) */
590 bgeu a5, a4, .Lspecial
591
592 sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */
593 ssl a4
594 sll a3, a3 /* udivisor <<= count */
595 movi a2, 0 /* quotient = 0 */
596
597 /* test-subtract-and-shift loop; one quotient bit on each iteration */
598 #if XCHAL_HAVE_LOOPS
599 loopnez a4, .Lloopend
600 #endif /* XCHAL_HAVE_LOOPS */
601 .Lloop:
602 bltu a6, a3, .Lzerobit
603 sub a6, a6, a3
604 addi a2, a2, 1
605 .Lzerobit:
606 slli a2, a2, 1
607 srli a3, a3, 1
608 #if !XCHAL_HAVE_LOOPS
609 addi a4, a4, -1
610 bnez a4, .Lloop
611 #endif /* !XCHAL_HAVE_LOOPS */
612 .Lloopend:
613
614 bltu a6, a3, .Lreturn
615 addi a2, a2, 1 /* increment if udividend >= udivisor */
616 .Lreturn:
617 neg a5, a2
618 movltz a2, a5, a7 /* return (sign < 0) ? -quotient : quotient */
619 leaf_return
620
621 .Lle_one:
622 beqz a3, .Lerror
623 neg a2, a6 /* if udivisor == 1, then return... */
624 movgez a2, a6, a7 /* (sign < 0) ? -udividend : udividend */
625 leaf_return
626
627 .Lspecial:
628 bltu a6, a3, .Lreturn0 /* if dividend < divisor, return 0 */
629 movi a2, 1
630 movi a4, -1
631 movltz a2, a4, a7 /* else return (sign < 0) ? -1 : 1 */
632 leaf_return
633
634 .Lerror:
635 /* Divide by zero: Use an illegal instruction to force an exception.
636 The subsequent "DIV0" string can be recognized by the exception
637 handler to identify the real cause of the exception. */
638 ill
639 .ascii "DIV0"
640
641 .Lreturn0:
642 movi a2, 0
643 #endif /* XCHAL_HAVE_DIV32 */
644 leaf_return
645 .size __divsi3, . - __divsi3
646
647 #endif /* L_divsi3 */
648
649
650 #ifdef L_umodsi3
651 .align 4
652 .global __umodsi3
653 .type __umodsi3, @function
654 __umodsi3:
655 leaf_entry sp, 16
656 #if XCHAL_HAVE_DIV32
657 remu a2, a2, a3
658 #else
659 bltui a3, 2, .Lle_one /* check if the divisor is <= 1 */
660
661 do_nsau a5, a2, a6, a7 /* dividend_shift = nsau (dividend) */
662 do_nsau a4, a3, a6, a7 /* divisor_shift = nsau (divisor) */
663 bgeu a5, a4, .Lspecial
664
665 sub a4, a4, a5 /* count = divisor_shift - dividend_shift */
666 ssl a4
667 sll a3, a3 /* divisor <<= count */
668
669 /* test-subtract-and-shift loop */
670 #if XCHAL_HAVE_LOOPS
671 loopnez a4, .Lloopend
672 #endif /* XCHAL_HAVE_LOOPS */
673 .Lloop:
674 bltu a2, a3, .Lzerobit
675 sub a2, a2, a3
676 .Lzerobit:
677 srli a3, a3, 1
678 #if !XCHAL_HAVE_LOOPS
679 addi a4, a4, -1
680 bnez a4, .Lloop
681 #endif /* !XCHAL_HAVE_LOOPS */
682 .Lloopend:
683
684 .Lspecial:
685 bltu a2, a3, .Lreturn
686 sub a2, a2, a3 /* subtract once more if dividend >= divisor */
687 .Lreturn:
688 leaf_return
689
690 .Lle_one:
691 bnez a3, .Lreturn0
692
693 /* Divide by zero: Use an illegal instruction to force an exception.
694 The subsequent "DIV0" string can be recognized by the exception
695 handler to identify the real cause of the exception. */
696 ill
697 .ascii "DIV0"
698
699 .Lreturn0:
700 movi a2, 0
701 #endif /* XCHAL_HAVE_DIV32 */
702 leaf_return
703 .size __umodsi3, . - __umodsi3
704
705 #endif /* L_umodsi3 */
706
707
708 #ifdef L_modsi3
709 .align 4
710 .global __modsi3
711 .type __modsi3, @function
712 __modsi3:
713 leaf_entry sp, 16
714 #if XCHAL_HAVE_DIV32
715 rems a2, a2, a3
716 #else
717 mov a7, a2 /* save original (signed) dividend */
718 do_abs a2, a2, a4 /* udividend = abs (dividend) */
719 do_abs a3, a3, a4 /* udivisor = abs (divisor) */
720 bltui a3, 2, .Lle_one /* check if udivisor <= 1 */
721 do_nsau a5, a2, a6, a8 /* udividend_shift = nsau (udividend) */
722 do_nsau a4, a3, a6, a8 /* udivisor_shift = nsau (udivisor) */
723 bgeu a5, a4, .Lspecial
724
725 sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */
726 ssl a4
727 sll a3, a3 /* udivisor <<= count */
728
729 /* test-subtract-and-shift loop */
730 #if XCHAL_HAVE_LOOPS
731 loopnez a4, .Lloopend
732 #endif /* XCHAL_HAVE_LOOPS */
733 .Lloop:
734 bltu a2, a3, .Lzerobit
735 sub a2, a2, a3
736 .Lzerobit:
737 srli a3, a3, 1
738 #if !XCHAL_HAVE_LOOPS
739 addi a4, a4, -1
740 bnez a4, .Lloop
741 #endif /* !XCHAL_HAVE_LOOPS */
742 .Lloopend:
743
744 .Lspecial:
745 bltu a2, a3, .Lreturn
746 sub a2, a2, a3 /* subtract again if udividend >= udivisor */
747 .Lreturn:
748 bgez a7, .Lpositive
749 neg a2, a2 /* if (dividend < 0), return -udividend */
750 .Lpositive:
751 leaf_return
752
753 .Lle_one:
754 bnez a3, .Lreturn0
755
756 /* Divide by zero: Use an illegal instruction to force an exception.
757 The subsequent "DIV0" string can be recognized by the exception
758 handler to identify the real cause of the exception. */
759 ill
760 .ascii "DIV0"
761
762 .Lreturn0:
763 movi a2, 0
764 #endif /* XCHAL_HAVE_DIV32 */
765 leaf_return
766 .size __modsi3, . - __modsi3
767
768 #endif /* L_modsi3 */
769
770
771 #ifdef __XTENSA_EB__
772 #define uh a2
773 #define ul a3
774 #else
775 #define uh a3
776 #define ul a2
777 #endif /* __XTENSA_EB__ */
778
779
780 #ifdef L_ashldi3
781 .align 4
782 .global __ashldi3
783 .type __ashldi3, @function
784 __ashldi3:
785 leaf_entry sp, 16
786 ssl a4
787 bgei a4, 32, .Llow_only
788 src uh, uh, ul
789 sll ul, ul
790 leaf_return
791
792 .Llow_only:
793 sll uh, ul
794 movi ul, 0
795 leaf_return
796 .size __ashldi3, . - __ashldi3
797
798 #endif /* L_ashldi3 */
799
800
801 #ifdef L_ashrdi3
802 .align 4
803 .global __ashrdi3
804 .type __ashrdi3, @function
805 __ashrdi3:
806 leaf_entry sp, 16
807 ssr a4
808 bgei a4, 32, .Lhigh_only
809 src ul, uh, ul
810 sra uh, uh
811 leaf_return
812
813 .Lhigh_only:
814 sra ul, uh
815 srai uh, uh, 31
816 leaf_return
817 .size __ashrdi3, . - __ashrdi3
818
819 #endif /* L_ashrdi3 */
820
821
822 #ifdef L_lshrdi3
823 .align 4
824 .global __lshrdi3
825 .type __lshrdi3, @function
826 __lshrdi3:
827 leaf_entry sp, 16
828 ssr a4
829 bgei a4, 32, .Lhigh_only1
830 src ul, uh, ul
831 srl uh, uh
832 leaf_return
833
834 .Lhigh_only1:
835 srl ul, uh
836 movi uh, 0
837 leaf_return
838 .size __lshrdi3, . - __lshrdi3
839
840 #endif /* L_lshrdi3 */
841
842
843 #ifdef L_bswapsi2
844 .align 4
845 .global __bswapsi2
846 .type __bswapsi2, @function
847 __bswapsi2:
848 leaf_entry sp, 16
849 ssai 8
850 srli a3, a2, 16
851 src a3, a3, a2
852 src a3, a3, a3
853 src a2, a2, a3
854 leaf_return
855 .size __bswapsi2, . - __bswapsi2
856
857 #endif /* L_bswapsi2 */
858
859
860 #ifdef L_bswapdi2
861 .align 4
862 .global __bswapdi2
863 .type __bswapdi2, @function
864 __bswapdi2:
865 leaf_entry sp, 16
866 ssai 8
867 srli a4, a2, 16
868 src a4, a4, a2
869 src a4, a4, a4
870 src a4, a2, a4
871 srli a2, a3, 16
872 src a2, a2, a3
873 src a2, a2, a2
874 src a2, a3, a2
875 mov a3, a4
876 leaf_return
877 .size __bswapdi2, . - __bswapdi2
878
879 #endif /* L_bswapdi2 */
880
881
882 #include "ieee754-df.S"
883 #include "ieee754-sf.S"