]>
Commit | Line | Data |
---|---|---|
03984308 | 1 | /* Assembly functions for the Xtensa version of libgcc1. |
8d9254fc | 2 | Copyright (C) 2001-2020 Free Software Foundation, Inc. |
03984308 BW |
3 | Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it under | |
8 | the terms of the GNU General Public License as published by the Free | |
748086b7 | 9 | Software Foundation; either version 3, or (at your option) any later |
03984308 BW |
10 | version. |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
16 | ||
748086b7 JJ |
17 | Under Section 7 of GPL version 3, you are granted additional |
18 | permissions described in the GCC Runtime Library Exception, version | |
19 | 3.1, as published by the Free Software Foundation. | |
20 | ||
21 | You should have received a copy of the GNU General Public License and | |
22 | a copy of the GCC Runtime Library Exception along with this program; | |
23 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
24 | <http://www.gnu.org/licenses/>. */ | |
03984308 | 25 | |
e677f70c | 26 | #include "xtensa-config.h" |
03984308 | 27 | |
b544aa7b BW |
28 | /* Define macros for the ABS and ADDX* instructions to handle cases |
29 | where they are not included in the Xtensa processor configuration. */ | |
6c2e8d1c BW |
30 | |
31 | .macro do_abs dst, src, tmp | |
32 | #if XCHAL_HAVE_ABS | |
33 | abs \dst, \src | |
34 | #else | |
35 | neg \tmp, \src | |
36 | movgez \tmp, \src, \src | |
37 | mov \dst, \tmp | |
38 | #endif | |
39 | .endm | |
40 | ||
41 | .macro do_addx2 dst, as, at, tmp | |
42 | #if XCHAL_HAVE_ADDX | |
43 | addx2 \dst, \as, \at | |
44 | #else | |
45 | slli \tmp, \as, 1 | |
46 | add \dst, \tmp, \at | |
47 | #endif | |
48 | .endm | |
49 | ||
50 | .macro do_addx4 dst, as, at, tmp | |
51 | #if XCHAL_HAVE_ADDX | |
52 | addx4 \dst, \as, \at | |
53 | #else | |
54 | slli \tmp, \as, 2 | |
55 | add \dst, \tmp, \at | |
56 | #endif | |
57 | .endm | |
58 | ||
59 | .macro do_addx8 dst, as, at, tmp | |
60 | #if XCHAL_HAVE_ADDX | |
61 | addx8 \dst, \as, \at | |
62 | #else | |
63 | slli \tmp, \as, 3 | |
64 | add \dst, \tmp, \at | |
65 | #endif | |
66 | .endm | |
67 | ||
b544aa7b BW |
68 | /* Define macros for leaf function entry and return, supporting either the |
69 | standard register windowed ABI or the non-windowed call0 ABI. These | |
70 | macros do not allocate any extra stack space, so they only work for | |
71 | leaf functions that do not need to spill anything to the stack. */ | |
dfccfad9 | 72 | |
b7974b3a | 73 | .macro leaf_entry reg, size |
dfccfad9 | 74 | #if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__ |
03c8b1be | 75 | entry \reg, \size |
dfccfad9 BW |
76 | #else |
77 | /* do nothing */ | |
78 | #endif | |
79 | .endm | |
80 | ||
b7974b3a | 81 | .macro leaf_return |
dfccfad9 BW |
82 | #if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__ |
83 | retw | |
84 | #else | |
85 | ret | |
86 | #endif | |
87 | .endm | |
88 | ||
89 | ||
03984308 BW |
90 | #ifdef L_mulsi3 |
91 | .align 4 | |
92 | .global __mulsi3 | |
125253d9 | 93 | .type __mulsi3, @function |
03984308 | 94 | __mulsi3: |
b7974b3a | 95 | leaf_entry sp, 16 |
03984308 | 96 | |
09fa8841 BW |
97 | #if XCHAL_HAVE_MUL32 |
98 | mull a2, a2, a3 | |
99 | ||
100 | #elif XCHAL_HAVE_MUL16 | |
03984308 BW |
101 | or a4, a2, a3 |
102 | srai a4, a4, 16 | |
103 | bnez a4, .LMUL16 | |
104 | mul16u a2, a2, a3 | |
b7974b3a | 105 | leaf_return |
03984308 BW |
106 | .LMUL16: |
107 | srai a4, a2, 16 | |
108 | srai a5, a3, 16 | |
109 | mul16u a7, a4, a3 | |
110 | mul16u a6, a5, a2 | |
111 | mul16u a4, a2, a3 | |
112 | add a7, a7, a6 | |
113 | slli a7, a7, 16 | |
114 | add a2, a7, a4 | |
115 | ||
116 | #elif XCHAL_HAVE_MAC16 | |
117 | mul.aa.hl a2, a3 | |
118 | mula.aa.lh a2, a3 | |
12a8ee33 | 119 | rsr a5, ACCLO |
03984308 | 120 | umul.aa.ll a2, a3 |
12a8ee33 | 121 | rsr a4, ACCLO |
03984308 BW |
122 | slli a5, a5, 16 |
123 | add a2, a4, a5 | |
124 | ||
09fa8841 | 125 | #else /* !MUL32 && !MUL16 && !MAC16 */ |
03984308 | 126 | |
b544aa7b BW |
127 | /* Multiply one bit at a time, but unroll the loop 4x to better |
128 | exploit the addx instructions and avoid overhead. | |
129 | Peel the first iteration to save a cycle on init. */ | |
03984308 | 130 | |
b544aa7b BW |
131 | /* Avoid negative numbers. */ |
132 | xor a5, a2, a3 /* Top bit is 1 if one input is negative. */ | |
6c2e8d1c BW |
133 | do_abs a3, a3, a6 |
134 | do_abs a2, a2, a6 | |
03984308 | 135 | |
b544aa7b | 136 | /* Swap so the second argument is smaller. */ |
6c2e8d1c BW |
137 | sub a7, a2, a3 |
138 | mov a4, a3 | |
b544aa7b BW |
139 | movgez a4, a2, a7 /* a4 = max (a2, a3) */ |
140 | movltz a3, a2, a7 /* a3 = min (a2, a3) */ | |
03984308 | 141 | |
6c2e8d1c BW |
142 | movi a2, 0 |
143 | extui a6, a3, 0, 1 | |
144 | movnez a2, a4, a6 | |
03984308 | 145 | |
6c2e8d1c BW |
146 | do_addx2 a7, a4, a2, a7 |
147 | extui a6, a3, 1, 1 | |
148 | movnez a2, a7, a6 | |
03984308 | 149 | |
6c2e8d1c BW |
150 | do_addx4 a7, a4, a2, a7 |
151 | extui a6, a3, 2, 1 | |
152 | movnez a2, a7, a6 | |
03984308 | 153 | |
6c2e8d1c BW |
154 | do_addx8 a7, a4, a2, a7 |
155 | extui a6, a3, 3, 1 | |
156 | movnez a2, a7, a6 | |
03984308 | 157 | |
6c2e8d1c BW |
158 | bgeui a3, 16, .Lmult_main_loop |
159 | neg a3, a2 | |
160 | movltz a2, a3, a5 | |
b7974b3a | 161 | leaf_return |
03984308 | 162 | |
6c2e8d1c | 163 | .align 4 |
03984308 | 164 | .Lmult_main_loop: |
6c2e8d1c BW |
165 | srli a3, a3, 4 |
166 | slli a4, a4, 4 | |
03984308 | 167 | |
6c2e8d1c BW |
168 | add a7, a4, a2 |
169 | extui a6, a3, 0, 1 | |
170 | movnez a2, a7, a6 | |
03984308 | 171 | |
6c2e8d1c BW |
172 | do_addx2 a7, a4, a2, a7 |
173 | extui a6, a3, 1, 1 | |
174 | movnez a2, a7, a6 | |
03984308 | 175 | |
6c2e8d1c BW |
176 | do_addx4 a7, a4, a2, a7 |
177 | extui a6, a3, 2, 1 | |
178 | movnez a2, a7, a6 | |
03984308 | 179 | |
6c2e8d1c BW |
180 | do_addx8 a7, a4, a2, a7 |
181 | extui a6, a3, 3, 1 | |
182 | movnez a2, a7, a6 | |
03984308 | 183 | |
6c2e8d1c | 184 | bgeui a3, 16, .Lmult_main_loop |
03984308 | 185 | |
6c2e8d1c BW |
186 | neg a3, a2 |
187 | movltz a2, a3, a5 | |
03984308 | 188 | |
09fa8841 | 189 | #endif /* !MUL32 && !MUL16 && !MAC16 */ |
03984308 | 190 | |
b7974b3a | 191 | leaf_return |
125253d9 | 192 | .size __mulsi3, . - __mulsi3 |
03984308 BW |
193 | |
194 | #endif /* L_mulsi3 */ | |
195 | ||
196 | ||
09fa8841 | 197 | #ifdef L_umulsidi3 |
7f0ee694 BW |
198 | |
199 | #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 | |
200 | #define XCHAL_NO_MUL 1 | |
201 | #endif | |
202 | ||
09fa8841 BW |
203 | .align 4 |
204 | .global __umulsidi3 | |
125253d9 | 205 | .type __umulsidi3, @function |
09fa8841 | 206 | __umulsidi3: |
09fa8841 | 207 | #if __XTENSA_CALL0_ABI__ |
7f0ee694 | 208 | leaf_entry sp, 32 |
09fa8841 BW |
209 | addi sp, sp, -32 |
210 | s32i a12, sp, 16 | |
211 | s32i a13, sp, 20 | |
212 | s32i a14, sp, 24 | |
213 | s32i a15, sp, 28 | |
7f0ee694 BW |
214 | #elif XCHAL_NO_MUL |
215 | /* This is not really a leaf function; allocate enough stack space | |
216 | to allow CALL12s to a helper function. */ | |
217 | leaf_entry sp, 48 | |
218 | #else | |
219 | leaf_entry sp, 16 | |
09fa8841 BW |
220 | #endif |
221 | ||
222 | #ifdef __XTENSA_EB__ | |
223 | #define wh a2 | |
224 | #define wl a3 | |
225 | #else | |
226 | #define wh a3 | |
227 | #define wl a2 | |
228 | #endif /* __XTENSA_EB__ */ | |
229 | ||
230 | /* This code is taken from the mulsf3 routine in ieee754-sf.S. | |
231 | See more comments there. */ | |
232 | ||
233 | #if XCHAL_HAVE_MUL32_HIGH | |
234 | mull a6, a2, a3 | |
235 | muluh wh, a2, a3 | |
236 | mov wl, a6 | |
237 | ||
238 | #else /* ! MUL32_HIGH */ | |
239 | ||
7f0ee694 | 240 | #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL |
09fa8841 BW |
241 | /* a0 and a8 will be clobbered by calling the multiply function |
242 | but a8 is not used here and need not be saved. */ | |
243 | s32i a0, sp, 0 | |
244 | #endif | |
245 | ||
246 | #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32 | |
247 | ||
248 | #define a2h a4 | |
249 | #define a3h a5 | |
250 | ||
251 | /* Get the high halves of the inputs into registers. */ | |
252 | srli a2h, a2, 16 | |
253 | srli a3h, a3, 16 | |
254 | ||
255 | #define a2l a2 | |
256 | #define a3l a3 | |
257 | ||
258 | #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16 | |
259 | /* Clear the high halves of the inputs. This does not matter | |
260 | for MUL16 because the high bits are ignored. */ | |
261 | extui a2, a2, 0, 16 | |
262 | extui a3, a3, 0, 16 | |
263 | #endif | |
264 | #endif /* MUL16 || MUL32 */ | |
265 | ||
266 | ||
267 | #if XCHAL_HAVE_MUL16 | |
268 | ||
269 | #define do_mul(dst, xreg, xhalf, yreg, yhalf) \ | |
270 | mul16u dst, xreg ## xhalf, yreg ## yhalf | |
271 | ||
272 | #elif XCHAL_HAVE_MUL32 | |
273 | ||
274 | #define do_mul(dst, xreg, xhalf, yreg, yhalf) \ | |
275 | mull dst, xreg ## xhalf, yreg ## yhalf | |
276 | ||
277 | #elif XCHAL_HAVE_MAC16 | |
278 | ||
279 | /* The preprocessor insists on inserting a space when concatenating after | |
280 | a period in the definition of do_mul below. These macros are a workaround | |
281 | using underscores instead of periods when doing the concatenation. */ | |
282 | #define umul_aa_ll umul.aa.ll | |
283 | #define umul_aa_lh umul.aa.lh | |
284 | #define umul_aa_hl umul.aa.hl | |
285 | #define umul_aa_hh umul.aa.hh | |
286 | ||
287 | #define do_mul(dst, xreg, xhalf, yreg, yhalf) \ | |
288 | umul_aa_ ## xhalf ## yhalf xreg, yreg; \ | |
289 | rsr dst, ACCLO | |
290 | ||
291 | #else /* no multiply hardware */ | |
125253d9 | 292 | |
09fa8841 BW |
293 | #define set_arg_l(dst, src) \ |
294 | extui dst, src, 0, 16 | |
295 | #define set_arg_h(dst, src) \ | |
296 | srli dst, src, 16 | |
297 | ||
7f0ee694 | 298 | #if __XTENSA_CALL0_ABI__ |
09fa8841 BW |
299 | #define do_mul(dst, xreg, xhalf, yreg, yhalf) \ |
300 | set_arg_ ## xhalf (a13, xreg); \ | |
301 | set_arg_ ## yhalf (a14, yreg); \ | |
302 | call0 .Lmul_mulsi3; \ | |
303 | mov dst, a12 | |
7f0ee694 BW |
304 | #else |
305 | #define do_mul(dst, xreg, xhalf, yreg, yhalf) \ | |
306 | set_arg_ ## xhalf (a14, xreg); \ | |
307 | set_arg_ ## yhalf (a15, yreg); \ | |
308 | call12 .Lmul_mulsi3; \ | |
309 | mov dst, a14 | |
310 | #endif /* __XTENSA_CALL0_ABI__ */ | |
311 | ||
312 | #endif /* no multiply hardware */ | |
09fa8841 BW |
313 | |
314 | /* Add pp1 and pp2 into a6 with carry-out in a9. */ | |
315 | do_mul(a6, a2, l, a3, h) /* pp 1 */ | |
316 | do_mul(a11, a2, h, a3, l) /* pp 2 */ | |
317 | movi a9, 0 | |
318 | add a6, a6, a11 | |
319 | bgeu a6, a11, 1f | |
320 | addi a9, a9, 1 | |
321 | 1: | |
322 | /* Shift the high half of a9/a6 into position in a9. Note that | |
323 | this value can be safely incremented without any carry-outs. */ | |
324 | ssai 16 | |
325 | src a9, a9, a6 | |
326 | ||
327 | /* Compute the low word into a6. */ | |
328 | do_mul(a11, a2, l, a3, l) /* pp 0 */ | |
329 | sll a6, a6 | |
330 | add a6, a6, a11 | |
331 | bgeu a6, a11, 1f | |
332 | addi a9, a9, 1 | |
333 | 1: | |
334 | /* Compute the high word into wh. */ | |
335 | do_mul(wh, a2, h, a3, h) /* pp 3 */ | |
336 | add wh, wh, a9 | |
337 | mov wl, a6 | |
338 | ||
339 | #endif /* !MUL32_HIGH */ | |
340 | ||
7f0ee694 | 341 | #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL |
05dcbf00 BW |
342 | /* Restore the original return address. */ |
343 | l32i a0, sp, 0 | |
f8383f28 BW |
344 | #endif |
345 | #if __XTENSA_CALL0_ABI__ | |
346 | l32i a12, sp, 16 | |
347 | l32i a13, sp, 20 | |
348 | l32i a14, sp, 24 | |
349 | l32i a15, sp, 28 | |
350 | addi sp, sp, 32 | |
05dcbf00 | 351 | #endif |
09fa8841 BW |
352 | leaf_return |
353 | ||
7f0ee694 | 354 | #if XCHAL_NO_MUL |
125253d9 | 355 | |
09fa8841 BW |
356 | /* For Xtensa processors with no multiply hardware, this simplified |
357 | version of _mulsi3 is used for multiplying 16-bit chunks of | |
7f0ee694 BW |
358 | the floating-point mantissas. When using CALL0, this function |
359 | uses a custom ABI: the inputs are passed in a13 and a14, the | |
360 | result is returned in a12, and a8 and a15 are clobbered. */ | |
09fa8841 BW |
361 | .align 4 |
362 | .Lmul_mulsi3: | |
7f0ee694 BW |
363 | leaf_entry sp, 16 |
364 | .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2 | |
365 | movi \dst, 0 | |
366 | 1: add \tmp1, \src2, \dst | |
367 | extui \tmp2, \src1, 0, 1 | |
368 | movnez \dst, \tmp1, \tmp2 | |
369 | ||
370 | do_addx2 \tmp1, \src2, \dst, \tmp1 | |
371 | extui \tmp2, \src1, 1, 1 | |
372 | movnez \dst, \tmp1, \tmp2 | |
373 | ||
374 | do_addx4 \tmp1, \src2, \dst, \tmp1 | |
375 | extui \tmp2, \src1, 2, 1 | |
376 | movnez \dst, \tmp1, \tmp2 | |
377 | ||
378 | do_addx8 \tmp1, \src2, \dst, \tmp1 | |
379 | extui \tmp2, \src1, 3, 1 | |
380 | movnez \dst, \tmp1, \tmp2 | |
381 | ||
382 | srli \src1, \src1, 4 | |
383 | slli \src2, \src2, 4 | |
384 | bnez \src1, 1b | |
385 | .endm | |
386 | #if __XTENSA_CALL0_ABI__ | |
387 | mul_mulsi3_body a12, a13, a14, a15, a8 | |
388 | #else | |
389 | /* The result will be written into a2, so save that argument in a4. */ | |
390 | mov a4, a2 | |
391 | mul_mulsi3_body a2, a4, a3, a5, a6 | |
392 | #endif | |
393 | leaf_return | |
394 | #endif /* XCHAL_NO_MUL */ | |
09fa8841 | 395 | |
125253d9 | 396 | .size __umulsidi3, . - __umulsidi3 |
09fa8841 BW |
397 | |
398 | #endif /* L_umulsidi3 */ | |
399 | ||
400 | ||
b544aa7b BW |
401 | /* Define a macro for the NSAU (unsigned normalize shift amount) |
402 | instruction, which computes the number of leading zero bits, | |
403 | to handle cases where it is not included in the Xtensa processor | |
404 | configuration. */ | |
07154156 | 405 | |
6c2e8d1c BW |
406 | .macro do_nsau cnt, val, tmp, a |
407 | #if XCHAL_HAVE_NSA | |
408 | nsau \cnt, \val | |
409 | #else | |
03984308 BW |
410 | mov \a, \val |
411 | movi \cnt, 0 | |
412 | extui \tmp, \a, 16, 16 | |
413 | bnez \tmp, 0f | |
414 | movi \cnt, 16 | |
415 | slli \a, \a, 16 | |
125253d9 | 416 | 0: |
03984308 BW |
417 | extui \tmp, \a, 24, 8 |
418 | bnez \tmp, 1f | |
419 | addi \cnt, \cnt, 8 | |
420 | slli \a, \a, 8 | |
125253d9 | 421 | 1: |
03984308 BW |
422 | movi \tmp, __nsau_data |
423 | extui \a, \a, 24, 8 | |
424 | add \tmp, \tmp, \a | |
425 | l8ui \tmp, \tmp, 0 | |
426 | add \cnt, \cnt, \tmp | |
03984308 | 427 | #endif /* !XCHAL_HAVE_NSA */ |
6c2e8d1c | 428 | .endm |
03984308 | 429 | |
09fa8841 | 430 | #ifdef L_clz |
03984308 BW |
431 | .section .rodata |
432 | .align 4 | |
433 | .global __nsau_data | |
125253d9 BW |
434 | .type __nsau_data, @object |
435 | __nsau_data: | |
03984308 BW |
436 | #if !XCHAL_HAVE_NSA |
437 | .byte 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4 | |
438 | .byte 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 | |
439 | .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 | |
440 | .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 | |
441 | .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 | |
442 | .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 | |
443 | .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 | |
444 | .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 | |
445 | .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 | |
446 | .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 | |
447 | .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 | |
448 | .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 | |
449 | .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 | |
450 | .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 | |
451 | .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 | |
452 | .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 | |
453 | #endif /* !XCHAL_HAVE_NSA */ | |
125253d9 | 454 | .size __nsau_data, . - __nsau_data |
03984308 | 455 | .hidden __nsau_data |
09fa8841 BW |
456 | #endif /* L_clz */ |
457 | ||
458 | ||
459 | #ifdef L_clzsi2 | |
460 | .align 4 | |
461 | .global __clzsi2 | |
125253d9 | 462 | .type __clzsi2, @function |
09fa8841 BW |
463 | __clzsi2: |
464 | leaf_entry sp, 16 | |
465 | do_nsau a2, a2, a3, a4 | |
466 | leaf_return | |
125253d9 | 467 | .size __clzsi2, . - __clzsi2 |
09fa8841 BW |
468 | |
469 | #endif /* L_clzsi2 */ | |
470 | ||
471 | ||
472 | #ifdef L_ctzsi2 | |
473 | .align 4 | |
474 | .global __ctzsi2 | |
125253d9 | 475 | .type __ctzsi2, @function |
09fa8841 BW |
476 | __ctzsi2: |
477 | leaf_entry sp, 16 | |
478 | neg a3, a2 | |
479 | and a3, a3, a2 | |
480 | do_nsau a2, a3, a4, a5 | |
481 | neg a2, a2 | |
482 | addi a2, a2, 31 | |
483 | leaf_return | |
125253d9 | 484 | .size __ctzsi2, . - __ctzsi2 |
09fa8841 BW |
485 | |
486 | #endif /* L_ctzsi2 */ | |
487 | ||
488 | ||
489 | #ifdef L_ffssi2 | |
490 | .align 4 | |
491 | .global __ffssi2 | |
125253d9 | 492 | .type __ffssi2, @function |
09fa8841 BW |
493 | __ffssi2: |
494 | leaf_entry sp, 16 | |
495 | neg a3, a2 | |
496 | and a3, a3, a2 | |
497 | do_nsau a2, a3, a4, a5 | |
498 | neg a2, a2 | |
499 | addi a2, a2, 32 | |
500 | leaf_return | |
125253d9 | 501 | .size __ffssi2, . - __ffssi2 |
09fa8841 BW |
502 | |
503 | #endif /* L_ffssi2 */ | |
03984308 BW |
504 | |
505 | ||
506 | #ifdef L_udivsi3 | |
507 | .align 4 | |
508 | .global __udivsi3 | |
125253d9 | 509 | .type __udivsi3, @function |
03984308 | 510 | __udivsi3: |
b7974b3a | 511 | leaf_entry sp, 16 |
582711fe BW |
512 | #if XCHAL_HAVE_DIV32 |
513 | quou a2, a2, a3 | |
514 | #else | |
b544aa7b | 515 | bltui a3, 2, .Lle_one /* check if the divisor <= 1 */ |
03984308 | 516 | |
b544aa7b BW |
517 | mov a6, a2 /* keep dividend in a6 */ |
518 | do_nsau a5, a6, a2, a7 /* dividend_shift = nsau (dividend) */ | |
519 | do_nsau a4, a3, a2, a7 /* divisor_shift = nsau (divisor) */ | |
03984308 BW |
520 | bgeu a5, a4, .Lspecial |
521 | ||
b544aa7b | 522 | sub a4, a4, a5 /* count = divisor_shift - dividend_shift */ |
03984308 | 523 | ssl a4 |
b544aa7b BW |
524 | sll a3, a3 /* divisor <<= count */ |
525 | movi a2, 0 /* quotient = 0 */ | |
03984308 | 526 | |
b544aa7b | 527 | /* test-subtract-and-shift loop; one quotient bit on each iteration */ |
e2fb85da | 528 | #if XCHAL_HAVE_LOOPS |
03984308 | 529 | loopnez a4, .Lloopend |
e2fb85da BW |
530 | #endif /* XCHAL_HAVE_LOOPS */ |
531 | .Lloop: | |
03984308 BW |
532 | bltu a6, a3, .Lzerobit |
533 | sub a6, a6, a3 | |
534 | addi a2, a2, 1 | |
535 | .Lzerobit: | |
536 | slli a2, a2, 1 | |
537 | srli a3, a3, 1 | |
e2fb85da BW |
538 | #if !XCHAL_HAVE_LOOPS |
539 | addi a4, a4, -1 | |
540 | bnez a4, .Lloop | |
541 | #endif /* !XCHAL_HAVE_LOOPS */ | |
03984308 BW |
542 | .Lloopend: |
543 | ||
544 | bltu a6, a3, .Lreturn | |
b544aa7b | 545 | addi a2, a2, 1 /* increment quotient if dividend >= divisor */ |
03984308 | 546 | .Lreturn: |
b7974b3a | 547 | leaf_return |
03984308 | 548 | |
53e0077e | 549 | .Lle_one: |
b544aa7b | 550 | beqz a3, .Lerror /* if divisor == 1, return the dividend */ |
b7974b3a | 551 | leaf_return |
53e0077e | 552 | |
03984308 | 553 | .Lspecial: |
b544aa7b | 554 | /* return dividend >= divisor */ |
53e0077e | 555 | bltu a6, a3, .Lreturn0 |
03984308 | 556 | movi a2, 1 |
b7974b3a | 557 | leaf_return |
03984308 | 558 | |
03984308 | 559 | .Lerror: |
a2c2290b BW |
560 | /* Divide by zero: Use an illegal instruction to force an exception. |
561 | The subsequent "DIV0" string can be recognized by the exception | |
562 | handler to identify the real cause of the exception. */ | |
563 | ill | |
564 | .ascii "DIV0" | |
53e0077e BW |
565 | |
566 | .Lreturn0: | |
567 | movi a2, 0 | |
582711fe | 568 | #endif /* XCHAL_HAVE_DIV32 */ |
b7974b3a | 569 | leaf_return |
125253d9 | 570 | .size __udivsi3, . - __udivsi3 |
03984308 BW |
571 | |
572 | #endif /* L_udivsi3 */ | |
573 | ||
574 | ||
575 | #ifdef L_divsi3 | |
576 | .align 4 | |
577 | .global __divsi3 | |
125253d9 | 578 | .type __divsi3, @function |
03984308 | 579 | __divsi3: |
b7974b3a | 580 | leaf_entry sp, 16 |
582711fe BW |
581 | #if XCHAL_HAVE_DIV32 |
582 | quos a2, a2, a3 | |
583 | #else | |
b544aa7b BW |
584 | xor a7, a2, a3 /* sign = dividend ^ divisor */ |
585 | do_abs a6, a2, a4 /* udividend = abs (dividend) */ | |
586 | do_abs a3, a3, a4 /* udivisor = abs (divisor) */ | |
587 | bltui a3, 2, .Lle_one /* check if udivisor <= 1 */ | |
588 | do_nsau a5, a6, a2, a8 /* udividend_shift = nsau (udividend) */ | |
589 | do_nsau a4, a3, a2, a8 /* udivisor_shift = nsau (udivisor) */ | |
03984308 BW |
590 | bgeu a5, a4, .Lspecial |
591 | ||
b544aa7b | 592 | sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */ |
03984308 | 593 | ssl a4 |
b544aa7b BW |
594 | sll a3, a3 /* udivisor <<= count */ |
595 | movi a2, 0 /* quotient = 0 */ | |
03984308 | 596 | |
b544aa7b | 597 | /* test-subtract-and-shift loop; one quotient bit on each iteration */ |
e2fb85da | 598 | #if XCHAL_HAVE_LOOPS |
03984308 | 599 | loopnez a4, .Lloopend |
e2fb85da BW |
600 | #endif /* XCHAL_HAVE_LOOPS */ |
601 | .Lloop: | |
03984308 BW |
602 | bltu a6, a3, .Lzerobit |
603 | sub a6, a6, a3 | |
604 | addi a2, a2, 1 | |
605 | .Lzerobit: | |
606 | slli a2, a2, 1 | |
607 | srli a3, a3, 1 | |
e2fb85da BW |
608 | #if !XCHAL_HAVE_LOOPS |
609 | addi a4, a4, -1 | |
610 | bnez a4, .Lloop | |
611 | #endif /* !XCHAL_HAVE_LOOPS */ | |
03984308 BW |
612 | .Lloopend: |
613 | ||
614 | bltu a6, a3, .Lreturn | |
b544aa7b | 615 | addi a2, a2, 1 /* increment if udividend >= udivisor */ |
03984308 BW |
616 | .Lreturn: |
617 | neg a5, a2 | |
b544aa7b | 618 | movltz a2, a5, a7 /* return (sign < 0) ? -quotient : quotient */ |
b7974b3a | 619 | leaf_return |
03984308 | 620 | |
53e0077e BW |
621 | .Lle_one: |
622 | beqz a3, .Lerror | |
b544aa7b BW |
623 | neg a2, a6 /* if udivisor == 1, then return... */ |
624 | movgez a2, a6, a7 /* (sign < 0) ? -udividend : udividend */ | |
b7974b3a | 625 | leaf_return |
53e0077e | 626 | |
03984308 | 627 | .Lspecial: |
b544aa7b | 628 | bltu a6, a3, .Lreturn0 /* if dividend < divisor, return 0 */ |
03984308 BW |
629 | movi a2, 1 |
630 | movi a4, -1 | |
b544aa7b | 631 | movltz a2, a4, a7 /* else return (sign < 0) ? -1 : 1 */ |
b7974b3a | 632 | leaf_return |
03984308 | 633 | |
03984308 | 634 | .Lerror: |
a2c2290b BW |
635 | /* Divide by zero: Use an illegal instruction to force an exception. |
636 | The subsequent "DIV0" string can be recognized by the exception | |
637 | handler to identify the real cause of the exception. */ | |
638 | ill | |
639 | .ascii "DIV0" | |
53e0077e BW |
640 | |
641 | .Lreturn0: | |
642 | movi a2, 0 | |
582711fe | 643 | #endif /* XCHAL_HAVE_DIV32 */ |
b7974b3a | 644 | leaf_return |
125253d9 | 645 | .size __divsi3, . - __divsi3 |
03984308 BW |
646 | |
647 | #endif /* L_divsi3 */ | |
648 | ||
649 | ||
650 | #ifdef L_umodsi3 | |
651 | .align 4 | |
652 | .global __umodsi3 | |
125253d9 | 653 | .type __umodsi3, @function |
03984308 | 654 | __umodsi3: |
b7974b3a | 655 | leaf_entry sp, 16 |
582711fe BW |
656 | #if XCHAL_HAVE_DIV32 |
657 | remu a2, a2, a3 | |
658 | #else | |
b544aa7b | 659 | bltui a3, 2, .Lle_one /* check if the divisor is <= 1 */ |
03984308 | 660 | |
b544aa7b BW |
661 | do_nsau a5, a2, a6, a7 /* dividend_shift = nsau (dividend) */ |
662 | do_nsau a4, a3, a6, a7 /* divisor_shift = nsau (divisor) */ | |
03984308 BW |
663 | bgeu a5, a4, .Lspecial |
664 | ||
b544aa7b | 665 | sub a4, a4, a5 /* count = divisor_shift - dividend_shift */ |
03984308 | 666 | ssl a4 |
b544aa7b | 667 | sll a3, a3 /* divisor <<= count */ |
03984308 | 668 | |
b544aa7b | 669 | /* test-subtract-and-shift loop */ |
e2fb85da | 670 | #if XCHAL_HAVE_LOOPS |
03984308 | 671 | loopnez a4, .Lloopend |
e2fb85da BW |
672 | #endif /* XCHAL_HAVE_LOOPS */ |
673 | .Lloop: | |
03984308 BW |
674 | bltu a2, a3, .Lzerobit |
675 | sub a2, a2, a3 | |
676 | .Lzerobit: | |
677 | srli a3, a3, 1 | |
e2fb85da BW |
678 | #if !XCHAL_HAVE_LOOPS |
679 | addi a4, a4, -1 | |
680 | bnez a4, .Lloop | |
681 | #endif /* !XCHAL_HAVE_LOOPS */ | |
03984308 BW |
682 | .Lloopend: |
683 | ||
53e0077e | 684 | .Lspecial: |
03984308 | 685 | bltu a2, a3, .Lreturn |
b544aa7b | 686 | sub a2, a2, a3 /* subtract once more if dividend >= divisor */ |
03984308 | 687 | .Lreturn: |
b7974b3a | 688 | leaf_return |
03984308 | 689 | |
03984308 | 690 | .Lle_one: |
0a21c1d2 | 691 | bnez a3, .Lreturn0 |
a2c2290b | 692 | |
a2c2290b BW |
693 | /* Divide by zero: Use an illegal instruction to force an exception. |
694 | The subsequent "DIV0" string can be recognized by the exception | |
695 | handler to identify the real cause of the exception. */ | |
696 | ill | |
697 | .ascii "DIV0" | |
0a21c1d2 BW |
698 | |
699 | .Lreturn0: | |
700 | movi a2, 0 | |
582711fe | 701 | #endif /* XCHAL_HAVE_DIV32 */ |
0a21c1d2 | 702 | leaf_return |
125253d9 | 703 | .size __umodsi3, . - __umodsi3 |
03984308 BW |
704 | |
705 | #endif /* L_umodsi3 */ | |
706 | ||
707 | ||
708 | #ifdef L_modsi3 | |
709 | .align 4 | |
710 | .global __modsi3 | |
125253d9 | 711 | .type __modsi3, @function |
03984308 | 712 | __modsi3: |
b7974b3a | 713 | leaf_entry sp, 16 |
582711fe BW |
714 | #if XCHAL_HAVE_DIV32 |
715 | rems a2, a2, a3 | |
716 | #else | |
b544aa7b BW |
717 | mov a7, a2 /* save original (signed) dividend */ |
718 | do_abs a2, a2, a4 /* udividend = abs (dividend) */ | |
719 | do_abs a3, a3, a4 /* udivisor = abs (divisor) */ | |
720 | bltui a3, 2, .Lle_one /* check if udivisor <= 1 */ | |
721 | do_nsau a5, a2, a6, a8 /* udividend_shift = nsau (udividend) */ | |
722 | do_nsau a4, a3, a6, a8 /* udivisor_shift = nsau (udivisor) */ | |
03984308 BW |
723 | bgeu a5, a4, .Lspecial |
724 | ||
b544aa7b | 725 | sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */ |
03984308 | 726 | ssl a4 |
b544aa7b | 727 | sll a3, a3 /* udivisor <<= count */ |
03984308 | 728 | |
b544aa7b | 729 | /* test-subtract-and-shift loop */ |
e2fb85da | 730 | #if XCHAL_HAVE_LOOPS |
03984308 | 731 | loopnez a4, .Lloopend |
e2fb85da BW |
732 | #endif /* XCHAL_HAVE_LOOPS */ |
733 | .Lloop: | |
03984308 BW |
734 | bltu a2, a3, .Lzerobit |
735 | sub a2, a2, a3 | |
736 | .Lzerobit: | |
737 | srli a3, a3, 1 | |
e2fb85da BW |
738 | #if !XCHAL_HAVE_LOOPS |
739 | addi a4, a4, -1 | |
740 | bnez a4, .Lloop | |
741 | #endif /* !XCHAL_HAVE_LOOPS */ | |
03984308 BW |
742 | .Lloopend: |
743 | ||
53e0077e | 744 | .Lspecial: |
03984308 | 745 | bltu a2, a3, .Lreturn |
b544aa7b | 746 | sub a2, a2, a3 /* subtract again if udividend >= udivisor */ |
03984308 BW |
747 | .Lreturn: |
748 | bgez a7, .Lpositive | |
b544aa7b | 749 | neg a2, a2 /* if (dividend < 0), return -udividend */ |
125253d9 | 750 | .Lpositive: |
b7974b3a | 751 | leaf_return |
03984308 | 752 | |
03984308 | 753 | .Lle_one: |
0a21c1d2 | 754 | bnez a3, .Lreturn0 |
a2c2290b | 755 | |
a2c2290b BW |
756 | /* Divide by zero: Use an illegal instruction to force an exception. |
757 | The subsequent "DIV0" string can be recognized by the exception | |
758 | handler to identify the real cause of the exception. */ | |
759 | ill | |
760 | .ascii "DIV0" | |
0a21c1d2 BW |
761 | |
762 | .Lreturn0: | |
763 | movi a2, 0 | |
582711fe | 764 | #endif /* XCHAL_HAVE_DIV32 */ |
0a21c1d2 | 765 | leaf_return |
125253d9 | 766 | .size __modsi3, . - __modsi3 |
03984308 BW |
767 | |
768 | #endif /* L_modsi3 */ | |
134c8a50 | 769 | |
582711fe BW |
770 | |
771 | #ifdef __XTENSA_EB__ | |
772 | #define uh a2 | |
773 | #define ul a3 | |
774 | #else | |
775 | #define uh a3 | |
776 | #define ul a2 | |
777 | #endif /* __XTENSA_EB__ */ | |
778 | ||
779 | ||
780 | #ifdef L_ashldi3 | |
781 | .align 4 | |
782 | .global __ashldi3 | |
783 | .type __ashldi3, @function | |
784 | __ashldi3: | |
785 | leaf_entry sp, 16 | |
786 | ssl a4 | |
787 | bgei a4, 32, .Llow_only | |
788 | src uh, uh, ul | |
789 | sll ul, ul | |
790 | leaf_return | |
791 | ||
792 | .Llow_only: | |
793 | sll uh, ul | |
794 | movi ul, 0 | |
795 | leaf_return | |
796 | .size __ashldi3, . - __ashldi3 | |
797 | ||
798 | #endif /* L_ashldi3 */ | |
799 | ||
800 | ||
801 | #ifdef L_ashrdi3 | |
802 | .align 4 | |
803 | .global __ashrdi3 | |
804 | .type __ashrdi3, @function | |
805 | __ashrdi3: | |
806 | leaf_entry sp, 16 | |
807 | ssr a4 | |
808 | bgei a4, 32, .Lhigh_only | |
809 | src ul, uh, ul | |
810 | sra uh, uh | |
811 | leaf_return | |
812 | ||
813 | .Lhigh_only: | |
814 | sra ul, uh | |
815 | srai uh, uh, 31 | |
816 | leaf_return | |
817 | .size __ashrdi3, . - __ashrdi3 | |
818 | ||
819 | #endif /* L_ashrdi3 */ | |
820 | ||
821 | ||
822 | #ifdef L_lshrdi3 | |
823 | .align 4 | |
824 | .global __lshrdi3 | |
825 | .type __lshrdi3, @function | |
826 | __lshrdi3: | |
827 | leaf_entry sp, 16 | |
828 | ssr a4 | |
829 | bgei a4, 32, .Lhigh_only1 | |
830 | src ul, uh, ul | |
831 | srl uh, uh | |
832 | leaf_return | |
833 | ||
834 | .Lhigh_only1: | |
835 | srl ul, uh | |
836 | movi uh, 0 | |
837 | leaf_return | |
838 | .size __lshrdi3, . - __lshrdi3 | |
839 | ||
840 | #endif /* L_lshrdi3 */ | |
841 | ||
842 | ||
18e86fae MF |
843 | #ifdef L_bswapsi2 |
844 | .align 4 | |
845 | .global __bswapsi2 | |
846 | .type __bswapsi2, @function | |
847 | __bswapsi2: | |
848 | leaf_entry sp, 16 | |
849 | ssai 8 | |
850 | srli a3, a2, 16 | |
851 | src a3, a3, a2 | |
852 | src a3, a3, a3 | |
853 | src a2, a2, a3 | |
854 | leaf_return | |
855 | .size __bswapsi2, . - __bswapsi2 | |
856 | ||
857 | #endif /* L_bswapsi2 */ | |
858 | ||
859 | ||
860 | #ifdef L_bswapdi2 | |
861 | .align 4 | |
862 | .global __bswapdi2 | |
863 | .type __bswapdi2, @function | |
864 | __bswapdi2: | |
865 | leaf_entry sp, 16 | |
866 | ssai 8 | |
867 | srli a4, a2, 16 | |
868 | src a4, a4, a2 | |
869 | src a4, a4, a4 | |
870 | src a4, a2, a4 | |
871 | srli a2, a3, 16 | |
872 | src a2, a2, a3 | |
873 | src a2, a2, a2 | |
874 | src a2, a3, a2 | |
875 | mov a3, a4 | |
876 | leaf_return | |
877 | .size __bswapdi2, . - __bswapdi2 | |
878 | ||
879 | #endif /* L_bswapdi2 */ | |
880 | ||
881 | ||
134c8a50 BW |
882 | #include "ieee754-df.S" |
883 | #include "ieee754-sf.S" |