]>
Commit | Line | Data |
---|---|---|
134c8a50 | 1 | /* IEEE-754 single-precision functions for Xtensa |
a5544970 | 2 | Copyright (C) 2006-2019 Free Software Foundation, Inc. |
134c8a50 BW |
3 | Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it | |
8 | under the terms of the GNU General Public License as published by | |
748086b7 | 9 | the Free Software Foundation; either version 3, or (at your option) |
134c8a50 BW |
10 | any later version. |
11 | ||
134c8a50 BW |
12 | GCC is distributed in the hope that it will be useful, but WITHOUT |
13 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
14 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public | |
15 | License for more details. | |
16 | ||
748086b7 JJ |
17 | Under Section 7 of GPL version 3, you are granted additional |
18 | permissions described in the GCC Runtime Library Exception, version | |
19 | 3.1, as published by the Free Software Foundation. | |
20 | ||
21 | You should have received a copy of the GNU General Public License and | |
22 | a copy of the GCC Runtime Library Exception along with this program; | |
23 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
24 | <http://www.gnu.org/licenses/>. */ | |
134c8a50 BW |
25 | |
26 | #ifdef __XTENSA_EB__ | |
27 | #define xh a2 | |
28 | #define xl a3 | |
29 | #define yh a4 | |
30 | #define yl a5 | |
31 | #else | |
32 | #define xh a3 | |
33 | #define xl a2 | |
34 | #define yh a5 | |
35 | #define yl a4 | |
36 | #endif | |
37 | ||
38 | /* Warning! The branch displacements for some Xtensa branch instructions | |
39 | are quite small, and this code has been carefully laid out to keep | |
40 | branch targets in range. If you change anything, be sure to check that | |
41 | the assembler is not relaxing anything to branch over a jump. */ | |
42 | ||
43 | #ifdef L_negsf2 | |
44 | ||
45 | .align 4 | |
46 | .global __negsf2 | |
47 | .type __negsf2, @function | |
48 | __negsf2: | |
b7974b3a | 49 | leaf_entry sp, 16 |
134c8a50 BW |
50 | movi a4, 0x80000000 |
51 | xor a2, a2, a4 | |
b7974b3a | 52 | leaf_return |
134c8a50 BW |
53 | |
54 | #endif /* L_negsf2 */ | |
55 | ||
56 | #ifdef L_addsubsf3 | |
57 | ||
faef260e | 58 | .literal_position |
134c8a50 BW |
59 | /* Addition */ |
60 | __addsf3_aux: | |
61 | ||
62 | /* Handle NaNs and Infinities. (This code is placed before the | |
63 | start of the function just to keep it in range of the limited | |
64 | branch displacements.) */ | |
65 | ||
66 | .Ladd_xnan_or_inf: | |
67 | /* If y is neither Infinity nor NaN, return x. */ | |
0889f168 | 68 | bnall a3, a6, .Ladd_return_nan_or_inf |
134c8a50 BW |
69 | /* If x is a NaN, return it. Otherwise, return y. */ |
70 | slli a7, a2, 9 | |
0889f168 | 71 | bnez a7, .Ladd_return_nan |
134c8a50 BW |
72 | |
73 | .Ladd_ynan_or_inf: | |
74 | /* Return y. */ | |
75 | mov a2, a3 | |
0889f168 MF |
76 | |
77 | .Ladd_return_nan_or_inf: | |
78 | slli a7, a2, 9 | |
79 | bnez a7, .Ladd_return_nan | |
80 | leaf_return | |
81 | ||
82 | .Ladd_return_nan: | |
83 | movi a6, 0x400000 /* make it a quiet NaN */ | |
84 | or a2, a2, a6 | |
b7974b3a | 85 | leaf_return |
134c8a50 BW |
86 | |
87 | .Ladd_opposite_signs: | |
88 | /* Operand signs differ. Do a subtraction. */ | |
89 | slli a7, a6, 8 | |
90 | xor a3, a3, a7 | |
91 | j .Lsub_same_sign | |
92 | ||
93 | .align 4 | |
94 | .global __addsf3 | |
95 | .type __addsf3, @function | |
96 | __addsf3: | |
b7974b3a | 97 | leaf_entry sp, 16 |
134c8a50 BW |
98 | movi a6, 0x7f800000 |
99 | ||
100 | /* Check if the two operands have the same sign. */ | |
101 | xor a7, a2, a3 | |
102 | bltz a7, .Ladd_opposite_signs | |
103 | ||
104 | .Ladd_same_sign: | |
105 | /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */ | |
106 | ball a2, a6, .Ladd_xnan_or_inf | |
107 | ball a3, a6, .Ladd_ynan_or_inf | |
108 | ||
109 | /* Compare the exponents. The smaller operand will be shifted | |
110 | right by the exponent difference and added to the larger | |
111 | one. */ | |
112 | extui a7, a2, 23, 9 | |
113 | extui a8, a3, 23, 9 | |
114 | bltu a7, a8, .Ladd_shiftx | |
115 | ||
116 | .Ladd_shifty: | |
117 | /* Check if the smaller (or equal) exponent is zero. */ | |
118 | bnone a3, a6, .Ladd_yexpzero | |
119 | ||
120 | /* Replace y sign/exponent with 0x008. */ | |
121 | or a3, a3, a6 | |
122 | slli a3, a3, 8 | |
123 | srli a3, a3, 8 | |
124 | ||
125 | .Ladd_yexpdiff: | |
126 | /* Compute the exponent difference. */ | |
127 | sub a10, a7, a8 | |
128 | ||
129 | /* Exponent difference > 32 -- just return the bigger value. */ | |
130 | bgeui a10, 32, 1f | |
131 | ||
132 | /* Shift y right by the exponent difference. Any bits that are | |
133 | shifted out of y are saved in a9 for rounding the result. */ | |
134 | ssr a10 | |
135 | movi a9, 0 | |
136 | src a9, a3, a9 | |
137 | srl a3, a3 | |
138 | ||
139 | /* Do the addition. */ | |
140 | add a2, a2, a3 | |
141 | ||
142 | /* Check if the add overflowed into the exponent. */ | |
143 | extui a10, a2, 23, 9 | |
144 | beq a10, a7, .Ladd_round | |
145 | mov a8, a7 | |
146 | j .Ladd_carry | |
147 | ||
148 | .Ladd_yexpzero: | |
149 | /* y is a subnormal value. Replace its sign/exponent with zero, | |
150 | i.e., no implicit "1.0", and increment the apparent exponent | |
151 | because subnormals behave as if they had the minimum (nonzero) | |
152 | exponent. Test for the case when both exponents are zero. */ | |
153 | slli a3, a3, 9 | |
154 | srli a3, a3, 9 | |
155 | bnone a2, a6, .Ladd_bothexpzero | |
156 | addi a8, a8, 1 | |
157 | j .Ladd_yexpdiff | |
158 | ||
159 | .Ladd_bothexpzero: | |
160 | /* Both exponents are zero. Handle this as a special case. There | |
161 | is no need to shift or round, and the normal code for handling | |
162 | a carry into the exponent field will not work because it | |
163 | assumes there is an implicit "1.0" that needs to be added. */ | |
164 | add a2, a2, a3 | |
b7974b3a | 165 | 1: leaf_return |
134c8a50 BW |
166 | |
167 | .Ladd_xexpzero: | |
168 | /* Same as "yexpzero" except skip handling the case when both | |
169 | exponents are zero. */ | |
170 | slli a2, a2, 9 | |
171 | srli a2, a2, 9 | |
172 | addi a7, a7, 1 | |
173 | j .Ladd_xexpdiff | |
174 | ||
175 | .Ladd_shiftx: | |
176 | /* Same thing as the "shifty" code, but with x and y swapped. Also, | |
177 | because the exponent difference is always nonzero in this version, | |
178 | the shift sequence can use SLL and skip loading a constant zero. */ | |
179 | bnone a2, a6, .Ladd_xexpzero | |
180 | ||
181 | or a2, a2, a6 | |
182 | slli a2, a2, 8 | |
183 | srli a2, a2, 8 | |
184 | ||
185 | .Ladd_xexpdiff: | |
186 | sub a10, a8, a7 | |
187 | bgeui a10, 32, .Ladd_returny | |
188 | ||
189 | ssr a10 | |
190 | sll a9, a2 | |
191 | srl a2, a2 | |
192 | ||
193 | add a2, a2, a3 | |
194 | ||
195 | /* Check if the add overflowed into the exponent. */ | |
196 | extui a10, a2, 23, 9 | |
197 | bne a10, a8, .Ladd_carry | |
198 | ||
199 | .Ladd_round: | |
200 | /* Round up if the leftover fraction is >= 1/2. */ | |
201 | bgez a9, 1f | |
202 | addi a2, a2, 1 | |
203 | ||
204 | /* Check if the leftover fraction is exactly 1/2. */ | |
205 | slli a9, a9, 1 | |
206 | beqz a9, .Ladd_exactlyhalf | |
b7974b3a | 207 | 1: leaf_return |
134c8a50 BW |
208 | |
209 | .Ladd_returny: | |
210 | mov a2, a3 | |
b7974b3a | 211 | leaf_return |
134c8a50 BW |
212 | |
213 | .Ladd_carry: | |
214 | /* The addition has overflowed into the exponent field, so the | |
215 | value needs to be renormalized. The mantissa of the result | |
216 | can be recovered by subtracting the original exponent and | |
217 | adding 0x800000 (which is the explicit "1.0" for the | |
218 | mantissa of the non-shifted operand -- the "1.0" for the | |
219 | shifted operand was already added). The mantissa can then | |
220 | be shifted right by one bit. The explicit "1.0" of the | |
221 | shifted mantissa then needs to be replaced by the exponent, | |
222 | incremented by one to account for the normalizing shift. | |
223 | It is faster to combine these operations: do the shift first | |
224 | and combine the additions and subtractions. If x is the | |
225 | original exponent, the result is: | |
226 | shifted mantissa - (x << 22) + (1 << 22) + (x << 23) | |
227 | or: | |
228 | shifted mantissa + ((x + 1) << 22) | |
229 | Note that the exponent is incremented here by leaving the | |
230 | explicit "1.0" of the mantissa in the exponent field. */ | |
231 | ||
232 | /* Shift x right by one bit. Save the lsb. */ | |
233 | mov a10, a2 | |
234 | srli a2, a2, 1 | |
235 | ||
236 | /* See explanation above. The original exponent is in a8. */ | |
237 | addi a8, a8, 1 | |
238 | slli a8, a8, 22 | |
239 | add a2, a2, a8 | |
240 | ||
241 | /* Return an Infinity if the exponent overflowed. */ | |
242 | ball a2, a6, .Ladd_infinity | |
243 | ||
244 | /* Same thing as the "round" code except the msb of the leftover | |
245 | fraction is bit 0 of a10, with the rest of the fraction in a9. */ | |
246 | bbci.l a10, 0, 1f | |
247 | addi a2, a2, 1 | |
248 | beqz a9, .Ladd_exactlyhalf | |
b7974b3a | 249 | 1: leaf_return |
134c8a50 BW |
250 | |
251 | .Ladd_infinity: | |
252 | /* Clear the mantissa. */ | |
253 | srli a2, a2, 23 | |
254 | slli a2, a2, 23 | |
255 | ||
256 | /* The sign bit may have been lost in a carry-out. Put it back. */ | |
257 | slli a8, a8, 1 | |
258 | or a2, a2, a8 | |
b7974b3a | 259 | leaf_return |
134c8a50 BW |
260 | |
261 | .Ladd_exactlyhalf: | |
262 | /* Round down to the nearest even value. */ | |
263 | srli a2, a2, 1 | |
264 | slli a2, a2, 1 | |
b7974b3a | 265 | leaf_return |
134c8a50 BW |
266 | |
267 | ||
268 | /* Subtraction */ | |
269 | __subsf3_aux: | |
270 | ||
271 | /* Handle NaNs and Infinities. (This code is placed before the | |
272 | start of the function just to keep it in range of the limited | |
273 | branch displacements.) */ | |
274 | ||
275 | .Lsub_xnan_or_inf: | |
276 | /* If y is neither Infinity nor NaN, return x. */ | |
0889f168 | 277 | bnall a3, a6, .Lsub_return_nan_or_inf |
134c8a50 | 278 | /* Both x and y are either NaN or Inf, so the result is NaN. */ |
0889f168 MF |
279 | |
280 | .Lsub_return_nan: | |
134c8a50 BW |
281 | movi a4, 0x400000 /* make it a quiet NaN */ |
282 | or a2, a2, a4 | |
0889f168 | 283 | leaf_return |
134c8a50 BW |
284 | |
285 | .Lsub_ynan_or_inf: | |
286 | /* Negate y and return it. */ | |
287 | slli a7, a6, 8 | |
288 | xor a2, a3, a7 | |
0889f168 MF |
289 | |
290 | .Lsub_return_nan_or_inf: | |
291 | slli a7, a2, 9 | |
292 | bnez a7, .Lsub_return_nan | |
b7974b3a | 293 | leaf_return |
134c8a50 BW |
294 | |
295 | .Lsub_opposite_signs: | |
296 | /* Operand signs differ. Do an addition. */ | |
297 | slli a7, a6, 8 | |
298 | xor a3, a3, a7 | |
299 | j .Ladd_same_sign | |
300 | ||
301 | .align 4 | |
302 | .global __subsf3 | |
303 | .type __subsf3, @function | |
304 | __subsf3: | |
b7974b3a | 305 | leaf_entry sp, 16 |
134c8a50 BW |
306 | movi a6, 0x7f800000 |
307 | ||
308 | /* Check if the two operands have the same sign. */ | |
309 | xor a7, a2, a3 | |
310 | bltz a7, .Lsub_opposite_signs | |
311 | ||
312 | .Lsub_same_sign: | |
313 | /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */ | |
314 | ball a2, a6, .Lsub_xnan_or_inf | |
315 | ball a3, a6, .Lsub_ynan_or_inf | |
316 | ||
317 | /* Compare the operands. In contrast to addition, the entire | |
318 | value matters here. */ | |
319 | extui a7, a2, 23, 8 | |
320 | extui a8, a3, 23, 8 | |
321 | bltu a2, a3, .Lsub_xsmaller | |
322 | ||
323 | .Lsub_ysmaller: | |
324 | /* Check if the smaller (or equal) exponent is zero. */ | |
325 | bnone a3, a6, .Lsub_yexpzero | |
326 | ||
327 | /* Replace y sign/exponent with 0x008. */ | |
328 | or a3, a3, a6 | |
329 | slli a3, a3, 8 | |
330 | srli a3, a3, 8 | |
331 | ||
332 | .Lsub_yexpdiff: | |
333 | /* Compute the exponent difference. */ | |
334 | sub a10, a7, a8 | |
335 | ||
336 | /* Exponent difference > 32 -- just return the bigger value. */ | |
337 | bgeui a10, 32, 1f | |
338 | ||
339 | /* Shift y right by the exponent difference. Any bits that are | |
340 | shifted out of y are saved in a9 for rounding the result. */ | |
341 | ssr a10 | |
342 | movi a9, 0 | |
343 | src a9, a3, a9 | |
344 | srl a3, a3 | |
345 | ||
346 | sub a2, a2, a3 | |
347 | ||
348 | /* Subtract the leftover bits in a9 from zero and propagate any | |
349 | borrow from a2. */ | |
350 | neg a9, a9 | |
351 | addi a10, a2, -1 | |
352 | movnez a2, a10, a9 | |
353 | ||
354 | /* Check if the subtract underflowed into the exponent. */ | |
355 | extui a10, a2, 23, 8 | |
356 | beq a10, a7, .Lsub_round | |
357 | j .Lsub_borrow | |
358 | ||
359 | .Lsub_yexpzero: | |
360 | /* Return zero if the inputs are equal. (For the non-subnormal | |
361 | case, subtracting the "1.0" will cause a borrow from the exponent | |
362 | and this case can be detected when handling the borrow.) */ | |
363 | beq a2, a3, .Lsub_return_zero | |
364 | ||
365 | /* y is a subnormal value. Replace its sign/exponent with zero, | |
366 | i.e., no implicit "1.0". Unless x is also a subnormal, increment | |
367 | y's apparent exponent because subnormals behave as if they had | |
368 | the minimum (nonzero) exponent. */ | |
369 | slli a3, a3, 9 | |
370 | srli a3, a3, 9 | |
371 | bnone a2, a6, .Lsub_yexpdiff | |
372 | addi a8, a8, 1 | |
373 | j .Lsub_yexpdiff | |
374 | ||
375 | .Lsub_returny: | |
376 | /* Negate and return y. */ | |
377 | slli a7, a6, 8 | |
378 | xor a2, a3, a7 | |
b7974b3a | 379 | 1: leaf_return |
134c8a50 BW |
380 | |
381 | .Lsub_xsmaller: | |
382 | /* Same thing as the "ysmaller" code, but with x and y swapped and | |
383 | with y negated. */ | |
384 | bnone a2, a6, .Lsub_xexpzero | |
385 | ||
386 | or a2, a2, a6 | |
387 | slli a2, a2, 8 | |
388 | srli a2, a2, 8 | |
389 | ||
390 | .Lsub_xexpdiff: | |
391 | sub a10, a8, a7 | |
392 | bgeui a10, 32, .Lsub_returny | |
393 | ||
394 | ssr a10 | |
395 | movi a9, 0 | |
396 | src a9, a2, a9 | |
397 | srl a2, a2 | |
398 | ||
399 | /* Negate y. */ | |
400 | slli a11, a6, 8 | |
401 | xor a3, a3, a11 | |
402 | ||
403 | sub a2, a3, a2 | |
404 | ||
405 | neg a9, a9 | |
406 | addi a10, a2, -1 | |
407 | movnez a2, a10, a9 | |
408 | ||
409 | /* Check if the subtract underflowed into the exponent. */ | |
410 | extui a10, a2, 23, 8 | |
411 | bne a10, a8, .Lsub_borrow | |
412 | ||
413 | .Lsub_round: | |
414 | /* Round up if the leftover fraction is >= 1/2. */ | |
415 | bgez a9, 1f | |
416 | addi a2, a2, 1 | |
417 | ||
418 | /* Check if the leftover fraction is exactly 1/2. */ | |
419 | slli a9, a9, 1 | |
420 | beqz a9, .Lsub_exactlyhalf | |
b7974b3a | 421 | 1: leaf_return |
134c8a50 BW |
422 | |
423 | .Lsub_xexpzero: | |
424 | /* Same as "yexpzero". */ | |
425 | beq a2, a3, .Lsub_return_zero | |
426 | slli a2, a2, 9 | |
427 | srli a2, a2, 9 | |
428 | bnone a3, a6, .Lsub_xexpdiff | |
429 | addi a7, a7, 1 | |
430 | j .Lsub_xexpdiff | |
431 | ||
432 | .Lsub_return_zero: | |
433 | movi a2, 0 | |
b7974b3a | 434 | leaf_return |
134c8a50 BW |
435 | |
436 | .Lsub_borrow: | |
437 | /* The subtraction has underflowed into the exponent field, so the | |
438 | value needs to be renormalized. Shift the mantissa left as | |
439 | needed to remove any leading zeros and adjust the exponent | |
440 | accordingly. If the exponent is not large enough to remove | |
441 | all the leading zeros, the result will be a subnormal value. */ | |
442 | ||
443 | slli a8, a2, 9 | |
444 | beqz a8, .Lsub_xzero | |
445 | do_nsau a6, a8, a7, a11 | |
446 | srli a8, a8, 9 | |
447 | bge a6, a10, .Lsub_subnormal | |
448 | addi a6, a6, 1 | |
449 | ||
450 | .Lsub_normalize_shift: | |
451 | /* Shift the mantissa (a8/a9) left by a6. */ | |
452 | ssl a6 | |
453 | src a8, a8, a9 | |
454 | sll a9, a9 | |
455 | ||
456 | /* Combine the shifted mantissa with the sign and exponent, | |
457 | decrementing the exponent by a6. (The exponent has already | |
458 | been decremented by one due to the borrow from the subtraction, | |
459 | but adding the mantissa will increment the exponent by one.) */ | |
460 | srli a2, a2, 23 | |
461 | sub a2, a2, a6 | |
462 | slli a2, a2, 23 | |
463 | add a2, a2, a8 | |
464 | j .Lsub_round | |
465 | ||
466 | .Lsub_exactlyhalf: | |
467 | /* Round down to the nearest even value. */ | |
468 | srli a2, a2, 1 | |
469 | slli a2, a2, 1 | |
b7974b3a | 470 | leaf_return |
134c8a50 BW |
471 | |
472 | .Lsub_xzero: | |
473 | /* If there was a borrow from the exponent, and the mantissa and | |
474 | guard digits are all zero, then the inputs were equal and the | |
475 | result should be zero. */ | |
476 | beqz a9, .Lsub_return_zero | |
477 | ||
478 | /* Only the guard digit is nonzero. Shift by min(24, a10). */ | |
479 | addi a11, a10, -24 | |
480 | movi a6, 24 | |
481 | movltz a6, a10, a11 | |
482 | j .Lsub_normalize_shift | |
483 | ||
484 | .Lsub_subnormal: | |
485 | /* The exponent is too small to shift away all the leading zeros. | |
486 | Set a6 to the current exponent (which has already been | |
487 | decremented by the borrow) so that the exponent of the result | |
488 | will be zero. Do not add 1 to a6 in this case, because: (1) | |
489 | adding the mantissa will not increment the exponent, so there is | |
490 | no need to subtract anything extra from the exponent to | |
491 | compensate, and (2) the effective exponent of a subnormal is 1 | |
492 | not 0 so the shift amount must be 1 smaller than normal. */ | |
493 | mov a6, a10 | |
494 | j .Lsub_normalize_shift | |
495 | ||
496 | #endif /* L_addsubsf3 */ | |
497 | ||
498 | #ifdef L_mulsf3 | |
499 | ||
500 | /* Multiplication */ | |
7f0ee694 BW |
501 | #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 |
502 | #define XCHAL_NO_MUL 1 | |
503 | #endif | |
504 | ||
9bfcbdee | 505 | .literal_position |
134c8a50 BW |
506 | __mulsf3_aux: |
507 | ||
508 | /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). | |
509 | (This code is placed before the start of the function just to | |
510 | keep it in range of the limited branch displacements.) */ | |
511 | ||
512 | .Lmul_xexpzero: | |
513 | /* Clear the sign bit of x. */ | |
514 | slli a2, a2, 1 | |
515 | srli a2, a2, 1 | |
516 | ||
517 | /* If x is zero, return zero. */ | |
518 | beqz a2, .Lmul_return_zero | |
519 | ||
520 | /* Normalize x. Adjust the exponent in a8. */ | |
521 | do_nsau a10, a2, a11, a12 | |
522 | addi a10, a10, -8 | |
523 | ssl a10 | |
524 | sll a2, a2 | |
525 | movi a8, 1 | |
526 | sub a8, a8, a10 | |
527 | j .Lmul_xnormalized | |
528 | ||
529 | .Lmul_yexpzero: | |
530 | /* Clear the sign bit of y. */ | |
531 | slli a3, a3, 1 | |
532 | srli a3, a3, 1 | |
533 | ||
534 | /* If y is zero, return zero. */ | |
535 | beqz a3, .Lmul_return_zero | |
536 | ||
537 | /* Normalize y. Adjust the exponent in a9. */ | |
538 | do_nsau a10, a3, a11, a12 | |
539 | addi a10, a10, -8 | |
540 | ssl a10 | |
541 | sll a3, a3 | |
542 | movi a9, 1 | |
543 | sub a9, a9, a10 | |
544 | j .Lmul_ynormalized | |
545 | ||
546 | .Lmul_return_zero: | |
547 | /* Return zero with the appropriate sign bit. */ | |
548 | srli a2, a7, 31 | |
549 | slli a2, a2, 31 | |
550 | j .Lmul_done | |
551 | ||
552 | .Lmul_xnan_or_inf: | |
553 | /* If y is zero, return NaN. */ | |
554 | slli a8, a3, 1 | |
0889f168 | 555 | beqz a8, .Lmul_return_nan |
134c8a50 BW |
556 | /* If y is NaN, return y. */ |
557 | bnall a3, a6, .Lmul_returnx | |
558 | slli a8, a3, 9 | |
559 | beqz a8, .Lmul_returnx | |
560 | ||
561 | .Lmul_returny: | |
562 | mov a2, a3 | |
563 | ||
564 | .Lmul_returnx: | |
0889f168 MF |
565 | slli a8, a2, 9 |
566 | bnez a8, .Lmul_return_nan | |
134c8a50 BW |
567 | /* Set the sign bit and return. */ |
568 | extui a7, a7, 31, 1 | |
569 | slli a2, a2, 1 | |
570 | ssai 1 | |
571 | src a2, a7, a2 | |
572 | j .Lmul_done | |
573 | ||
574 | .Lmul_ynan_or_inf: | |
575 | /* If x is zero, return NaN. */ | |
576 | slli a8, a2, 1 | |
577 | bnez a8, .Lmul_returny | |
0889f168 MF |
578 | mov a2, a3 |
579 | ||
580 | .Lmul_return_nan: | |
581 | movi a4, 0x400000 /* make it a quiet NaN */ | |
582 | or a2, a2, a4 | |
134c8a50 BW |
583 | j .Lmul_done |
584 | ||
585 | .align 4 | |
586 | .global __mulsf3 | |
587 | .type __mulsf3, @function | |
588 | __mulsf3: | |
134c8a50 | 589 | #if __XTENSA_CALL0_ABI__ |
7f0ee694 | 590 | leaf_entry sp, 32 |
134c8a50 BW |
591 | addi sp, sp, -32 |
592 | s32i a12, sp, 16 | |
593 | s32i a13, sp, 20 | |
594 | s32i a14, sp, 24 | |
595 | s32i a15, sp, 28 | |
7f0ee694 BW |
596 | #elif XCHAL_NO_MUL |
597 | /* This is not really a leaf function; allocate enough stack space | |
598 | to allow CALL12s to a helper function. */ | |
599 | leaf_entry sp, 64 | |
600 | #else | |
601 | leaf_entry sp, 32 | |
134c8a50 BW |
602 | #endif |
603 | movi a6, 0x7f800000 | |
604 | ||
605 | /* Get the sign of the result. */ | |
606 | xor a7, a2, a3 | |
607 | ||
608 | /* Check for NaN and infinity. */ | |
609 | ball a2, a6, .Lmul_xnan_or_inf | |
610 | ball a3, a6, .Lmul_ynan_or_inf | |
611 | ||
612 | /* Extract the exponents. */ | |
613 | extui a8, a2, 23, 8 | |
614 | extui a9, a3, 23, 8 | |
615 | ||
616 | beqz a8, .Lmul_xexpzero | |
617 | .Lmul_xnormalized: | |
618 | beqz a9, .Lmul_yexpzero | |
619 | .Lmul_ynormalized: | |
620 | ||
621 | /* Add the exponents. */ | |
622 | add a8, a8, a9 | |
623 | ||
624 | /* Replace sign/exponent fields with explicit "1.0". */ | |
625 | movi a10, 0xffffff | |
626 | or a2, a2, a6 | |
627 | and a2, a2, a10 | |
628 | or a3, a3, a6 | |
629 | and a3, a3, a10 | |
630 | ||
631 | /* Multiply 32x32 to 64 bits. The result ends up in a2/a6. */ | |
632 | ||
633 | #if XCHAL_HAVE_MUL32_HIGH | |
634 | ||
635 | mull a6, a2, a3 | |
636 | muluh a2, a2, a3 | |
637 | ||
638 | #else | |
639 | ||
640 | /* Break the inputs into 16-bit chunks and compute 4 32-bit partial | |
641 | products. These partial products are: | |
642 | ||
643 | 0 xl * yl | |
644 | ||
645 | 1 xl * yh | |
646 | 2 xh * yl | |
647 | ||
648 | 3 xh * yh | |
649 | ||
650 | If using the Mul16 or Mul32 multiplier options, these input | |
651 | chunks must be stored in separate registers. For Mac16, the | |
652 | UMUL.AA.* opcodes can specify that the inputs come from either | |
653 | half of the registers, so there is no need to shift them out | |
654 | ahead of time. If there is no multiply hardware, the 16-bit | |
655 | chunks can be extracted when setting up the arguments to the | |
656 | separate multiply function. */ | |
657 | ||
7f0ee694 | 658 | #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL |
134c8a50 BW |
659 | /* Calling a separate multiply function will clobber a0 and requires |
660 | use of a8 as a temporary, so save those values now. (The function | |
661 | uses a custom ABI so nothing else needs to be saved.) */ | |
662 | s32i a0, sp, 0 | |
663 | s32i a8, sp, 4 | |
664 | #endif | |
665 | ||
666 | #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32 | |
667 | ||
668 | #define a2h a4 | |
669 | #define a3h a5 | |
670 | ||
671 | /* Get the high halves of the inputs into registers. */ | |
672 | srli a2h, a2, 16 | |
673 | srli a3h, a3, 16 | |
674 | ||
675 | #define a2l a2 | |
676 | #define a3l a3 | |
677 | ||
678 | #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16 | |
679 | /* Clear the high halves of the inputs. This does not matter | |
680 | for MUL16 because the high bits are ignored. */ | |
681 | extui a2, a2, 0, 16 | |
682 | extui a3, a3, 0, 16 | |
683 | #endif | |
684 | #endif /* MUL16 || MUL32 */ | |
685 | ||
686 | ||
687 | #if XCHAL_HAVE_MUL16 | |
688 | ||
689 | #define do_mul(dst, xreg, xhalf, yreg, yhalf) \ | |
690 | mul16u dst, xreg ## xhalf, yreg ## yhalf | |
691 | ||
692 | #elif XCHAL_HAVE_MUL32 | |
693 | ||
694 | #define do_mul(dst, xreg, xhalf, yreg, yhalf) \ | |
695 | mull dst, xreg ## xhalf, yreg ## yhalf | |
696 | ||
697 | #elif XCHAL_HAVE_MAC16 | |
698 | ||
699 | /* The preprocessor insists on inserting a space when concatenating after | |
700 | a period in the definition of do_mul below. These macros are a workaround | |
701 | using underscores instead of periods when doing the concatenation. */ | |
702 | #define umul_aa_ll umul.aa.ll | |
703 | #define umul_aa_lh umul.aa.lh | |
704 | #define umul_aa_hl umul.aa.hl | |
705 | #define umul_aa_hh umul.aa.hh | |
706 | ||
707 | #define do_mul(dst, xreg, xhalf, yreg, yhalf) \ | |
708 | umul_aa_ ## xhalf ## yhalf xreg, yreg; \ | |
709 | rsr dst, ACCLO | |
710 | ||
711 | #else /* no multiply hardware */ | |
712 | ||
713 | #define set_arg_l(dst, src) \ | |
714 | extui dst, src, 0, 16 | |
715 | #define set_arg_h(dst, src) \ | |
716 | srli dst, src, 16 | |
717 | ||
7f0ee694 | 718 | #if __XTENSA_CALL0_ABI__ |
134c8a50 BW |
719 | #define do_mul(dst, xreg, xhalf, yreg, yhalf) \ |
720 | set_arg_ ## xhalf (a13, xreg); \ | |
721 | set_arg_ ## yhalf (a14, yreg); \ | |
722 | call0 .Lmul_mulsi3; \ | |
723 | mov dst, a12 | |
7f0ee694 BW |
724 | #else |
725 | #define do_mul(dst, xreg, xhalf, yreg, yhalf) \ | |
726 | set_arg_ ## xhalf (a14, xreg); \ | |
727 | set_arg_ ## yhalf (a15, yreg); \ | |
728 | call12 .Lmul_mulsi3; \ | |
729 | mov dst, a14 | |
730 | #endif /* __XTENSA_CALL0_ABI__ */ | |
731 | ||
732 | #endif /* no multiply hardware */ | |
134c8a50 BW |
733 | |
734 | /* Add pp1 and pp2 into a6 with carry-out in a9. */ | |
735 | do_mul(a6, a2, l, a3, h) /* pp 1 */ | |
736 | do_mul(a11, a2, h, a3, l) /* pp 2 */ | |
737 | movi a9, 0 | |
738 | add a6, a6, a11 | |
739 | bgeu a6, a11, 1f | |
740 | addi a9, a9, 1 | |
741 | 1: | |
742 | /* Shift the high half of a9/a6 into position in a9. Note that | |
743 | this value can be safely incremented without any carry-outs. */ | |
744 | ssai 16 | |
745 | src a9, a9, a6 | |
746 | ||
747 | /* Compute the low word into a6. */ | |
748 | do_mul(a11, a2, l, a3, l) /* pp 0 */ | |
749 | sll a6, a6 | |
750 | add a6, a6, a11 | |
751 | bgeu a6, a11, 1f | |
752 | addi a9, a9, 1 | |
753 | 1: | |
754 | /* Compute the high word into a2. */ | |
755 | do_mul(a2, a2, h, a3, h) /* pp 3 */ | |
756 | add a2, a2, a9 | |
757 | ||
7f0ee694 | 758 | #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL |
134c8a50 BW |
759 | /* Restore values saved on the stack during the multiplication. */ |
760 | l32i a0, sp, 0 | |
761 | l32i a8, sp, 4 | |
762 | #endif | |
7f0ee694 | 763 | #endif /* ! XCHAL_HAVE_MUL32_HIGH */ |
134c8a50 BW |
764 | |
765 | /* Shift left by 9 bits, unless there was a carry-out from the | |
766 | multiply, in which case, shift by 8 bits and increment the | |
767 | exponent. */ | |
768 | movi a4, 9 | |
769 | srli a5, a2, 24 - 9 | |
770 | beqz a5, 1f | |
771 | addi a4, a4, -1 | |
772 | addi a8, a8, 1 | |
773 | 1: ssl a4 | |
774 | src a2, a2, a6 | |
775 | sll a6, a6 | |
776 | ||
777 | /* Subtract the extra bias from the exponent sum (plus one to account | |
778 | for the explicit "1.0" of the mantissa that will be added to the | |
779 | exponent in the final result). */ | |
780 | movi a4, 0x80 | |
781 | sub a8, a8, a4 | |
782 | ||
783 | /* Check for over/underflow. The value in a8 is one less than the | |
784 | final exponent, so values in the range 0..fd are OK here. */ | |
785 | movi a4, 0xfe | |
786 | bgeu a8, a4, .Lmul_overflow | |
787 | ||
788 | .Lmul_round: | |
789 | /* Round. */ | |
790 | bgez a6, .Lmul_rounded | |
791 | addi a2, a2, 1 | |
792 | slli a6, a6, 1 | |
793 | beqz a6, .Lmul_exactlyhalf | |
794 | ||
795 | .Lmul_rounded: | |
796 | /* Add the exponent to the mantissa. */ | |
797 | slli a8, a8, 23 | |
798 | add a2, a2, a8 | |
799 | ||
800 | .Lmul_addsign: | |
801 | /* Add the sign bit. */ | |
802 | srli a7, a7, 31 | |
803 | slli a7, a7, 31 | |
804 | or a2, a2, a7 | |
805 | ||
806 | .Lmul_done: | |
807 | #if __XTENSA_CALL0_ABI__ | |
808 | l32i a12, sp, 16 | |
809 | l32i a13, sp, 20 | |
810 | l32i a14, sp, 24 | |
811 | l32i a15, sp, 28 | |
812 | addi sp, sp, 32 | |
813 | #endif | |
b7974b3a | 814 | leaf_return |
134c8a50 BW |
815 | |
816 | .Lmul_exactlyhalf: | |
817 | /* Round down to the nearest even value. */ | |
818 | srli a2, a2, 1 | |
819 | slli a2, a2, 1 | |
820 | j .Lmul_rounded | |
821 | ||
822 | .Lmul_overflow: | |
823 | bltz a8, .Lmul_underflow | |
824 | /* Return +/- Infinity. */ | |
825 | movi a8, 0xff | |
826 | slli a2, a8, 23 | |
827 | j .Lmul_addsign | |
828 | ||
829 | .Lmul_underflow: | |
830 | /* Create a subnormal value, where the exponent field contains zero, | |
831 | but the effective exponent is 1. The value of a8 is one less than | |
832 | the actual exponent, so just negate it to get the shift amount. */ | |
833 | neg a8, a8 | |
834 | mov a9, a6 | |
835 | ssr a8 | |
836 | bgeui a8, 32, .Lmul_flush_to_zero | |
837 | ||
838 | /* Shift a2 right. Any bits that are shifted out of a2 are saved | |
839 | in a6 (combined with the shifted-out bits currently in a6) for | |
840 | rounding the result. */ | |
841 | sll a6, a2 | |
842 | srl a2, a2 | |
843 | ||
844 | /* Set the exponent to zero. */ | |
845 | movi a8, 0 | |
846 | ||
847 | /* Pack any nonzero bits shifted out into a6. */ | |
848 | beqz a9, .Lmul_round | |
849 | movi a9, 1 | |
850 | or a6, a6, a9 | |
851 | j .Lmul_round | |
852 | ||
853 | .Lmul_flush_to_zero: | |
854 | /* Return zero with the appropriate sign bit. */ | |
855 | srli a2, a7, 31 | |
856 | slli a2, a2, 31 | |
857 | j .Lmul_done | |
858 | ||
7f0ee694 | 859 | #if XCHAL_NO_MUL |
134c8a50 BW |
860 | |
861 | /* For Xtensa processors with no multiply hardware, this simplified | |
862 | version of _mulsi3 is used for multiplying 16-bit chunks of | |
7f0ee694 BW |
863 | the floating-point mantissas. When using CALL0, this function |
864 | uses a custom ABI: the inputs are passed in a13 and a14, the | |
865 | result is returned in a12, and a8 and a15 are clobbered. */ | |
134c8a50 BW |
866 | .align 4 |
867 | .Lmul_mulsi3: | |
7f0ee694 BW |
868 | leaf_entry sp, 16 |
869 | .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2 | |
870 | movi \dst, 0 | |
871 | 1: add \tmp1, \src2, \dst | |
872 | extui \tmp2, \src1, 0, 1 | |
873 | movnez \dst, \tmp1, \tmp2 | |
874 | ||
875 | do_addx2 \tmp1, \src2, \dst, \tmp1 | |
876 | extui \tmp2, \src1, 1, 1 | |
877 | movnez \dst, \tmp1, \tmp2 | |
878 | ||
879 | do_addx4 \tmp1, \src2, \dst, \tmp1 | |
880 | extui \tmp2, \src1, 2, 1 | |
881 | movnez \dst, \tmp1, \tmp2 | |
882 | ||
883 | do_addx8 \tmp1, \src2, \dst, \tmp1 | |
884 | extui \tmp2, \src1, 3, 1 | |
885 | movnez \dst, \tmp1, \tmp2 | |
886 | ||
887 | srli \src1, \src1, 4 | |
888 | slli \src2, \src2, 4 | |
889 | bnez \src1, 1b | |
890 | .endm | |
891 | #if __XTENSA_CALL0_ABI__ | |
892 | mul_mulsi3_body a12, a13, a14, a15, a8 | |
893 | #else | |
894 | /* The result will be written into a2, so save that argument in a4. */ | |
895 | mov a4, a2 | |
896 | mul_mulsi3_body a2, a4, a3, a5, a6 | |
897 | #endif | |
898 | leaf_return | |
899 | #endif /* XCHAL_NO_MUL */ | |
134c8a50 BW |
900 | #endif /* L_mulsf3 */ |
901 | ||
902 | #ifdef L_divsf3 | |
903 | ||
904 | /* Division */ | |
66192aa1 DKC |
905 | |
906 | #if XCHAL_HAVE_FP_DIV | |
907 | ||
908 | .align 4 | |
909 | .global __divsf3 | |
910 | .type __divsf3, @function | |
911 | __divsf3: | |
912 | leaf_entry sp, 16 | |
913 | ||
914 | wfr f1, a2 /* dividend */ | |
915 | wfr f2, a3 /* divisor */ | |
916 | ||
917 | div0.s f3, f2 | |
918 | nexp01.s f4, f2 | |
919 | const.s f5, 1 | |
920 | maddn.s f5, f4, f3 | |
921 | mov.s f6, f3 | |
922 | mov.s f7, f2 | |
923 | nexp01.s f2, f1 | |
924 | maddn.s f6, f5, f6 | |
925 | const.s f5, 1 | |
926 | const.s f0, 0 | |
927 | neg.s f8, f2 | |
928 | maddn.s f5, f4, f6 | |
929 | maddn.s f0, f8, f3 | |
930 | mkdadj.s f7, f1 | |
931 | maddn.s f6, f5, f6 | |
932 | maddn.s f8, f4, f0 | |
933 | const.s f3, 1 | |
934 | maddn.s f3, f4, f6 | |
935 | maddn.s f0, f8, f6 | |
936 | neg.s f2, f2 | |
937 | maddn.s f6, f3, f6 | |
938 | maddn.s f2, f4, f0 | |
939 | addexpm.s f0, f7 | |
940 | addexp.s f6, f7 | |
941 | divn.s f0, f2, f6 | |
942 | ||
943 | rfr a2, f0 | |
944 | ||
945 | leaf_return | |
946 | ||
947 | #else | |
948 | ||
949 | .literal_position | |
134c8a50 BW |
950 | __divsf3_aux: |
951 | ||
952 | /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). | |
953 | (This code is placed before the start of the function just to | |
954 | keep it in range of the limited branch displacements.) */ | |
955 | ||
956 | .Ldiv_yexpzero: | |
957 | /* Clear the sign bit of y. */ | |
958 | slli a3, a3, 1 | |
959 | srli a3, a3, 1 | |
960 | ||
961 | /* Check for division by zero. */ | |
962 | beqz a3, .Ldiv_yzero | |
963 | ||
964 | /* Normalize y. Adjust the exponent in a9. */ | |
965 | do_nsau a10, a3, a4, a5 | |
966 | addi a10, a10, -8 | |
967 | ssl a10 | |
968 | sll a3, a3 | |
969 | movi a9, 1 | |
970 | sub a9, a9, a10 | |
971 | j .Ldiv_ynormalized | |
972 | ||
973 | .Ldiv_yzero: | |
974 | /* y is zero. Return NaN if x is also zero; otherwise, infinity. */ | |
975 | slli a4, a2, 1 | |
976 | srli a4, a4, 1 | |
977 | srli a2, a7, 31 | |
978 | slli a2, a2, 31 | |
979 | or a2, a2, a6 | |
980 | bnez a4, 1f | |
981 | movi a4, 0x400000 /* make it a quiet NaN */ | |
982 | or a2, a2, a4 | |
b7974b3a | 983 | 1: leaf_return |
134c8a50 BW |
984 | |
985 | .Ldiv_xexpzero: | |
986 | /* Clear the sign bit of x. */ | |
987 | slli a2, a2, 1 | |
988 | srli a2, a2, 1 | |
989 | ||
990 | /* If x is zero, return zero. */ | |
991 | beqz a2, .Ldiv_return_zero | |
992 | ||
993 | /* Normalize x. Adjust the exponent in a8. */ | |
994 | do_nsau a10, a2, a4, a5 | |
995 | addi a10, a10, -8 | |
996 | ssl a10 | |
997 | sll a2, a2 | |
998 | movi a8, 1 | |
999 | sub a8, a8, a10 | |
1000 | j .Ldiv_xnormalized | |
1001 | ||
1002 | .Ldiv_return_zero: | |
1003 | /* Return zero with the appropriate sign bit. */ | |
1004 | srli a2, a7, 31 | |
1005 | slli a2, a2, 31 | |
b7974b3a | 1006 | leaf_return |
134c8a50 BW |
1007 | |
1008 | .Ldiv_xnan_or_inf: | |
1009 | /* Set the sign bit of the result. */ | |
1010 | srli a7, a3, 31 | |
1011 | slli a7, a7, 31 | |
1012 | xor a2, a2, a7 | |
1013 | /* If y is NaN or Inf, return NaN. */ | |
0889f168 MF |
1014 | ball a3, a6, .Ldiv_return_nan |
1015 | slli a7, a2, 9 | |
1016 | bnez a7, .Ldiv_return_nan | |
1017 | leaf_return | |
134c8a50 BW |
1018 | |
1019 | .Ldiv_ynan_or_inf: | |
1020 | /* If y is Infinity, return zero. */ | |
1021 | slli a8, a3, 9 | |
1022 | beqz a8, .Ldiv_return_zero | |
1023 | /* y is NaN; return it. */ | |
1024 | mov a2, a3 | |
0889f168 MF |
1025 | |
1026 | .Ldiv_return_nan: | |
1027 | movi a4, 0x400000 /* make it a quiet NaN */ | |
1028 | or a2, a2, a4 | |
b7974b3a | 1029 | leaf_return |
134c8a50 BW |
1030 | |
1031 | .align 4 | |
1032 | .global __divsf3 | |
1033 | .type __divsf3, @function | |
1034 | __divsf3: | |
b7974b3a | 1035 | leaf_entry sp, 16 |
134c8a50 BW |
1036 | movi a6, 0x7f800000 |
1037 | ||
1038 | /* Get the sign of the result. */ | |
1039 | xor a7, a2, a3 | |
1040 | ||
1041 | /* Check for NaN and infinity. */ | |
1042 | ball a2, a6, .Ldiv_xnan_or_inf | |
1043 | ball a3, a6, .Ldiv_ynan_or_inf | |
1044 | ||
1045 | /* Extract the exponents. */ | |
1046 | extui a8, a2, 23, 8 | |
1047 | extui a9, a3, 23, 8 | |
1048 | ||
1049 | beqz a9, .Ldiv_yexpzero | |
1050 | .Ldiv_ynormalized: | |
1051 | beqz a8, .Ldiv_xexpzero | |
1052 | .Ldiv_xnormalized: | |
1053 | ||
1054 | /* Subtract the exponents. */ | |
1055 | sub a8, a8, a9 | |
1056 | ||
1057 | /* Replace sign/exponent fields with explicit "1.0". */ | |
1058 | movi a10, 0xffffff | |
1059 | or a2, a2, a6 | |
1060 | and a2, a2, a10 | |
1061 | or a3, a3, a6 | |
1062 | and a3, a3, a10 | |
1063 | ||
1064 | /* The first digit of the mantissa division must be a one. | |
1065 | Shift x (and adjust the exponent) as needed to make this true. */ | |
1066 | bltu a3, a2, 1f | |
1067 | slli a2, a2, 1 | |
1068 | addi a8, a8, -1 | |
1069 | 1: | |
1070 | /* Do the first subtraction and shift. */ | |
1071 | sub a2, a2, a3 | |
1072 | slli a2, a2, 1 | |
1073 | ||
1074 | /* Put the quotient into a10. */ | |
1075 | movi a10, 1 | |
1076 | ||
1077 | /* Divide one bit at a time for 23 bits. */ | |
1078 | movi a9, 23 | |
1079 | #if XCHAL_HAVE_LOOPS | |
1080 | loop a9, .Ldiv_loopend | |
1081 | #endif | |
1082 | .Ldiv_loop: | |
1083 | /* Shift the quotient << 1. */ | |
1084 | slli a10, a10, 1 | |
1085 | ||
1086 | /* Is this digit a 0 or 1? */ | |
1087 | bltu a2, a3, 1f | |
1088 | ||
1089 | /* Output a 1 and subtract. */ | |
1090 | addi a10, a10, 1 | |
1091 | sub a2, a2, a3 | |
1092 | ||
1093 | /* Shift the dividend << 1. */ | |
1094 | 1: slli a2, a2, 1 | |
1095 | ||
1096 | #if !XCHAL_HAVE_LOOPS | |
1097 | addi a9, a9, -1 | |
1098 | bnez a9, .Ldiv_loop | |
1099 | #endif | |
1100 | .Ldiv_loopend: | |
1101 | ||
1102 | /* Add the exponent bias (less one to account for the explicit "1.0" | |
1103 | of the mantissa that will be added to the exponent in the final | |
1104 | result). */ | |
1105 | addi a8, a8, 0x7e | |
1106 | ||
1107 | /* Check for over/underflow. The value in a8 is one less than the | |
1108 | final exponent, so values in the range 0..fd are OK here. */ | |
1109 | movi a4, 0xfe | |
1110 | bgeu a8, a4, .Ldiv_overflow | |
1111 | ||
1112 | .Ldiv_round: | |
1113 | /* Round. The remainder (<< 1) is in a2. */ | |
1114 | bltu a2, a3, .Ldiv_rounded | |
1115 | addi a10, a10, 1 | |
1116 | beq a2, a3, .Ldiv_exactlyhalf | |
1117 | ||
1118 | .Ldiv_rounded: | |
1119 | /* Add the exponent to the mantissa. */ | |
1120 | slli a8, a8, 23 | |
1121 | add a2, a10, a8 | |
1122 | ||
1123 | .Ldiv_addsign: | |
1124 | /* Add the sign bit. */ | |
1125 | srli a7, a7, 31 | |
1126 | slli a7, a7, 31 | |
1127 | or a2, a2, a7 | |
b7974b3a | 1128 | leaf_return |
134c8a50 BW |
1129 | |
1130 | .Ldiv_overflow: | |
1131 | bltz a8, .Ldiv_underflow | |
1132 | /* Return +/- Infinity. */ | |
1133 | addi a8, a4, 1 /* 0xff */ | |
1134 | slli a2, a8, 23 | |
1135 | j .Ldiv_addsign | |
1136 | ||
1137 | .Ldiv_exactlyhalf: | |
1138 | /* Remainder is exactly half the divisor. Round even. */ | |
1139 | srli a10, a10, 1 | |
1140 | slli a10, a10, 1 | |
1141 | j .Ldiv_rounded | |
1142 | ||
1143 | .Ldiv_underflow: | |
1144 | /* Create a subnormal value, where the exponent field contains zero, | |
1145 | but the effective exponent is 1. The value of a8 is one less than | |
1146 | the actual exponent, so just negate it to get the shift amount. */ | |
1147 | neg a8, a8 | |
1148 | ssr a8 | |
1149 | bgeui a8, 32, .Ldiv_flush_to_zero | |
1150 | ||
1151 | /* Shift a10 right. Any bits that are shifted out of a10 are | |
1152 | saved in a6 for rounding the result. */ | |
1153 | sll a6, a10 | |
1154 | srl a10, a10 | |
1155 | ||
1156 | /* Set the exponent to zero. */ | |
1157 | movi a8, 0 | |
1158 | ||
1159 | /* Pack any nonzero remainder (in a2) into a6. */ | |
1160 | beqz a2, 1f | |
1161 | movi a9, 1 | |
1162 | or a6, a6, a9 | |
1163 | ||
1164 | /* Round a10 based on the bits shifted out into a6. */ | |
1165 | 1: bgez a6, .Ldiv_rounded | |
1166 | addi a10, a10, 1 | |
1167 | slli a6, a6, 1 | |
1168 | bnez a6, .Ldiv_rounded | |
1169 | srli a10, a10, 1 | |
1170 | slli a10, a10, 1 | |
1171 | j .Ldiv_rounded | |
1172 | ||
1173 | .Ldiv_flush_to_zero: | |
1174 | /* Return zero with the appropriate sign bit. */ | |
1175 | srli a2, a7, 31 | |
1176 | slli a2, a2, 31 | |
b7974b3a | 1177 | leaf_return |
134c8a50 | 1178 | |
66192aa1 DKC |
1179 | #endif /* XCHAL_HAVE_FP_DIV */ |
1180 | ||
134c8a50 BW |
1181 | #endif /* L_divsf3 */ |
1182 | ||
1183 | #ifdef L_cmpsf2 | |
1184 | ||
1185 | /* Equal and Not Equal */ | |
1186 | ||
1187 | .align 4 | |
1188 | .global __eqsf2 | |
1189 | .global __nesf2 | |
1190 | .set __nesf2, __eqsf2 | |
1191 | .type __eqsf2, @function | |
1192 | __eqsf2: | |
b7974b3a | 1193 | leaf_entry sp, 16 |
134c8a50 BW |
1194 | bne a2, a3, 4f |
1195 | ||
1196 | /* The values are equal but NaN != NaN. Check the exponent. */ | |
1197 | movi a6, 0x7f800000 | |
1198 | ball a2, a6, 3f | |
1199 | ||
1200 | /* Equal. */ | |
1201 | movi a2, 0 | |
b7974b3a | 1202 | leaf_return |
134c8a50 BW |
1203 | |
1204 | /* Not equal. */ | |
1205 | 2: movi a2, 1 | |
b7974b3a | 1206 | leaf_return |
134c8a50 BW |
1207 | |
1208 | /* Check if the mantissas are nonzero. */ | |
1209 | 3: slli a7, a2, 9 | |
1210 | j 5f | |
1211 | ||
1212 | /* Check if x and y are zero with different signs. */ | |
1213 | 4: or a7, a2, a3 | |
1214 | slli a7, a7, 1 | |
1215 | ||
1216 | /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa | |
1217 | or x when exponent(x) = 0x7f8 and x == y. */ | |
1218 | 5: movi a2, 0 | |
1219 | movi a3, 1 | |
1220 | movnez a2, a3, a7 | |
b7974b3a | 1221 | leaf_return |
134c8a50 BW |
1222 | |
1223 | ||
1224 | /* Greater Than */ | |
1225 | ||
1226 | .align 4 | |
1227 | .global __gtsf2 | |
1228 | .type __gtsf2, @function | |
1229 | __gtsf2: | |
b7974b3a | 1230 | leaf_entry sp, 16 |
134c8a50 BW |
1231 | movi a6, 0x7f800000 |
1232 | ball a2, a6, 2f | |
1233 | 1: bnall a3, a6, .Lle_cmp | |
1234 | ||
1235 | /* Check if y is a NaN. */ | |
1236 | slli a7, a3, 9 | |
1237 | beqz a7, .Lle_cmp | |
1238 | movi a2, 0 | |
b7974b3a | 1239 | leaf_return |
134c8a50 BW |
1240 | |
1241 | /* Check if x is a NaN. */ | |
1242 | 2: slli a7, a2, 9 | |
1243 | beqz a7, 1b | |
1244 | movi a2, 0 | |
b7974b3a | 1245 | leaf_return |
134c8a50 BW |
1246 | |
1247 | ||
1248 | /* Less Than or Equal */ | |
1249 | ||
1250 | .align 4 | |
1251 | .global __lesf2 | |
1252 | .type __lesf2, @function | |
1253 | __lesf2: | |
b7974b3a | 1254 | leaf_entry sp, 16 |
134c8a50 BW |
1255 | movi a6, 0x7f800000 |
1256 | ball a2, a6, 2f | |
1257 | 1: bnall a3, a6, .Lle_cmp | |
1258 | ||
1259 | /* Check if y is a NaN. */ | |
1260 | slli a7, a3, 9 | |
1261 | beqz a7, .Lle_cmp | |
1262 | movi a2, 1 | |
b7974b3a | 1263 | leaf_return |
134c8a50 BW |
1264 | |
1265 | /* Check if x is a NaN. */ | |
1266 | 2: slli a7, a2, 9 | |
1267 | beqz a7, 1b | |
1268 | movi a2, 1 | |
b7974b3a | 1269 | leaf_return |
134c8a50 BW |
1270 | |
1271 | .Lle_cmp: | |
1272 | /* Check if x and y have different signs. */ | |
1273 | xor a7, a2, a3 | |
1274 | bltz a7, .Lle_diff_signs | |
1275 | ||
1276 | /* Check if x is negative. */ | |
1277 | bltz a2, .Lle_xneg | |
1278 | ||
1279 | /* Check if x <= y. */ | |
1280 | bltu a3, a2, 5f | |
1281 | 4: movi a2, 0 | |
b7974b3a | 1282 | leaf_return |
134c8a50 BW |
1283 | |
1284 | .Lle_xneg: | |
1285 | /* Check if y <= x. */ | |
1286 | bgeu a2, a3, 4b | |
1287 | 5: movi a2, 1 | |
b7974b3a | 1288 | leaf_return |
134c8a50 BW |
1289 | |
1290 | .Lle_diff_signs: | |
1291 | bltz a2, 4b | |
1292 | ||
1293 | /* Check if both x and y are zero. */ | |
1294 | or a7, a2, a3 | |
1295 | slli a7, a7, 1 | |
1296 | movi a2, 1 | |
1297 | movi a3, 0 | |
1298 | moveqz a2, a3, a7 | |
b7974b3a | 1299 | leaf_return |
134c8a50 BW |
1300 | |
1301 | ||
1302 | /* Greater Than or Equal */ | |
1303 | ||
1304 | .align 4 | |
1305 | .global __gesf2 | |
1306 | .type __gesf2, @function | |
1307 | __gesf2: | |
b7974b3a | 1308 | leaf_entry sp, 16 |
134c8a50 BW |
1309 | movi a6, 0x7f800000 |
1310 | ball a2, a6, 2f | |
1311 | 1: bnall a3, a6, .Llt_cmp | |
1312 | ||
1313 | /* Check if y is a NaN. */ | |
1314 | slli a7, a3, 9 | |
1315 | beqz a7, .Llt_cmp | |
1316 | movi a2, -1 | |
b7974b3a | 1317 | leaf_return |
134c8a50 BW |
1318 | |
1319 | /* Check if x is a NaN. */ | |
1320 | 2: slli a7, a2, 9 | |
1321 | beqz a7, 1b | |
1322 | movi a2, -1 | |
b7974b3a | 1323 | leaf_return |
134c8a50 BW |
1324 | |
1325 | ||
1326 | /* Less Than */ | |
1327 | ||
1328 | .align 4 | |
1329 | .global __ltsf2 | |
1330 | .type __ltsf2, @function | |
1331 | __ltsf2: | |
b7974b3a | 1332 | leaf_entry sp, 16 |
134c8a50 BW |
1333 | movi a6, 0x7f800000 |
1334 | ball a2, a6, 2f | |
1335 | 1: bnall a3, a6, .Llt_cmp | |
1336 | ||
1337 | /* Check if y is a NaN. */ | |
1338 | slli a7, a3, 9 | |
1339 | beqz a7, .Llt_cmp | |
1340 | movi a2, 0 | |
b7974b3a | 1341 | leaf_return |
134c8a50 BW |
1342 | |
1343 | /* Check if x is a NaN. */ | |
1344 | 2: slli a7, a2, 9 | |
1345 | beqz a7, 1b | |
1346 | movi a2, 0 | |
b7974b3a | 1347 | leaf_return |
134c8a50 BW |
1348 | |
1349 | .Llt_cmp: | |
1350 | /* Check if x and y have different signs. */ | |
1351 | xor a7, a2, a3 | |
1352 | bltz a7, .Llt_diff_signs | |
1353 | ||
1354 | /* Check if x is negative. */ | |
1355 | bltz a2, .Llt_xneg | |
1356 | ||
1357 | /* Check if x < y. */ | |
1358 | bgeu a2, a3, 5f | |
1359 | 4: movi a2, -1 | |
b7974b3a | 1360 | leaf_return |
134c8a50 BW |
1361 | |
1362 | .Llt_xneg: | |
1363 | /* Check if y < x. */ | |
1364 | bltu a3, a2, 4b | |
1365 | 5: movi a2, 0 | |
b7974b3a | 1366 | leaf_return |
134c8a50 BW |
1367 | |
1368 | .Llt_diff_signs: | |
1369 | bgez a2, 5b | |
1370 | ||
1371 | /* Check if both x and y are nonzero. */ | |
1372 | or a7, a2, a3 | |
1373 | slli a7, a7, 1 | |
1374 | movi a2, 0 | |
1375 | movi a3, -1 | |
1376 | movnez a2, a3, a7 | |
b7974b3a | 1377 | leaf_return |
134c8a50 BW |
1378 | |
1379 | ||
1380 | /* Unordered */ | |
1381 | ||
1382 | .align 4 | |
1383 | .global __unordsf2 | |
1384 | .type __unordsf2, @function | |
1385 | __unordsf2: | |
b7974b3a | 1386 | leaf_entry sp, 16 |
134c8a50 BW |
1387 | movi a6, 0x7f800000 |
1388 | ball a2, a6, 3f | |
1389 | 1: ball a3, a6, 4f | |
1390 | 2: movi a2, 0 | |
b7974b3a | 1391 | leaf_return |
134c8a50 BW |
1392 | |
1393 | 3: slli a7, a2, 9 | |
1394 | beqz a7, 1b | |
1395 | movi a2, 1 | |
b7974b3a | 1396 | leaf_return |
134c8a50 BW |
1397 | |
1398 | 4: slli a7, a3, 9 | |
1399 | beqz a7, 2b | |
1400 | movi a2, 1 | |
b7974b3a | 1401 | leaf_return |
134c8a50 BW |
1402 | |
1403 | #endif /* L_cmpsf2 */ | |
1404 | ||
1405 | #ifdef L_fixsfsi | |
1406 | ||
1407 | .align 4 | |
1408 | .global __fixsfsi | |
1409 | .type __fixsfsi, @function | |
1410 | __fixsfsi: | |
b7974b3a | 1411 | leaf_entry sp, 16 |
134c8a50 BW |
1412 | |
1413 | /* Check for NaN and Infinity. */ | |
1414 | movi a6, 0x7f800000 | |
1415 | ball a2, a6, .Lfixsfsi_nan_or_inf | |
1416 | ||
1417 | /* Extract the exponent and check if 0 < (exp - 0x7e) < 32. */ | |
1418 | extui a4, a2, 23, 8 | |
1419 | addi a4, a4, -0x7e | |
1420 | bgei a4, 32, .Lfixsfsi_maxint | |
1421 | blti a4, 1, .Lfixsfsi_zero | |
1422 | ||
1423 | /* Add explicit "1.0" and shift << 8. */ | |
1424 | or a7, a2, a6 | |
1425 | slli a5, a7, 8 | |
1426 | ||
1427 | /* Shift back to the right, based on the exponent. */ | |
1428 | ssl a4 /* shift by 32 - a4 */ | |
1429 | srl a5, a5 | |
1430 | ||
1431 | /* Negate the result if sign != 0. */ | |
1432 | neg a2, a5 | |
1433 | movgez a2, a5, a7 | |
b7974b3a | 1434 | leaf_return |
134c8a50 BW |
1435 | |
1436 | .Lfixsfsi_nan_or_inf: | |
1437 | /* Handle Infinity and NaN. */ | |
1438 | slli a4, a2, 9 | |
1439 | beqz a4, .Lfixsfsi_maxint | |
1440 | ||
1441 | /* Translate NaN to +maxint. */ | |
1442 | movi a2, 0 | |
1443 | ||
1444 | .Lfixsfsi_maxint: | |
1445 | slli a4, a6, 8 /* 0x80000000 */ | |
1446 | addi a5, a4, -1 /* 0x7fffffff */ | |
1447 | movgez a4, a5, a2 | |
1448 | mov a2, a4 | |
b7974b3a | 1449 | leaf_return |
134c8a50 BW |
1450 | |
1451 | .Lfixsfsi_zero: | |
1452 | movi a2, 0 | |
b7974b3a | 1453 | leaf_return |
134c8a50 BW |
1454 | |
1455 | #endif /* L_fixsfsi */ | |
1456 | ||
1457 | #ifdef L_fixsfdi | |
1458 | ||
1459 | .align 4 | |
1460 | .global __fixsfdi | |
1461 | .type __fixsfdi, @function | |
1462 | __fixsfdi: | |
b7974b3a | 1463 | leaf_entry sp, 16 |
134c8a50 BW |
1464 | |
1465 | /* Check for NaN and Infinity. */ | |
1466 | movi a6, 0x7f800000 | |
1467 | ball a2, a6, .Lfixsfdi_nan_or_inf | |
1468 | ||
1469 | /* Extract the exponent and check if 0 < (exp - 0x7e) < 64. */ | |
1470 | extui a4, a2, 23, 8 | |
1471 | addi a4, a4, -0x7e | |
1472 | bgei a4, 64, .Lfixsfdi_maxint | |
1473 | blti a4, 1, .Lfixsfdi_zero | |
1474 | ||
1475 | /* Add explicit "1.0" and shift << 8. */ | |
1476 | or a7, a2, a6 | |
1477 | slli xh, a7, 8 | |
1478 | ||
1479 | /* Shift back to the right, based on the exponent. */ | |
1480 | ssl a4 /* shift by 64 - a4 */ | |
1481 | bgei a4, 32, .Lfixsfdi_smallshift | |
1482 | srl xl, xh | |
1483 | movi xh, 0 | |
1484 | ||
1485 | .Lfixsfdi_shifted: | |
1486 | /* Negate the result if sign != 0. */ | |
1487 | bgez a7, 1f | |
1488 | neg xl, xl | |
1489 | neg xh, xh | |
1490 | beqz xl, 1f | |
1491 | addi xh, xh, -1 | |
b7974b3a | 1492 | 1: leaf_return |
134c8a50 BW |
1493 | |
1494 | .Lfixsfdi_smallshift: | |
1495 | movi xl, 0 | |
1496 | sll xl, xh | |
1497 | srl xh, xh | |
1498 | j .Lfixsfdi_shifted | |
1499 | ||
1500 | .Lfixsfdi_nan_or_inf: | |
1501 | /* Handle Infinity and NaN. */ | |
1502 | slli a4, a2, 9 | |
1503 | beqz a4, .Lfixsfdi_maxint | |
1504 | ||
1505 | /* Translate NaN to +maxint. */ | |
1506 | movi a2, 0 | |
1507 | ||
1508 | .Lfixsfdi_maxint: | |
1509 | slli a7, a6, 8 /* 0x80000000 */ | |
1510 | bgez a2, 1f | |
1511 | mov xh, a7 | |
1512 | movi xl, 0 | |
b7974b3a | 1513 | leaf_return |
134c8a50 BW |
1514 | |
1515 | 1: addi xh, a7, -1 /* 0x7fffffff */ | |
1516 | movi xl, -1 | |
b7974b3a | 1517 | leaf_return |
134c8a50 BW |
1518 | |
1519 | .Lfixsfdi_zero: | |
1520 | movi xh, 0 | |
1521 | movi xl, 0 | |
b7974b3a | 1522 | leaf_return |
134c8a50 BW |
1523 | |
1524 | #endif /* L_fixsfdi */ | |
1525 | ||
1526 | #ifdef L_fixunssfsi | |
1527 | ||
1528 | .align 4 | |
1529 | .global __fixunssfsi | |
1530 | .type __fixunssfsi, @function | |
1531 | __fixunssfsi: | |
b7974b3a | 1532 | leaf_entry sp, 16 |
134c8a50 BW |
1533 | |
1534 | /* Check for NaN and Infinity. */ | |
1535 | movi a6, 0x7f800000 | |
1536 | ball a2, a6, .Lfixunssfsi_nan_or_inf | |
1537 | ||
1538 | /* Extract the exponent and check if 0 <= (exp - 0x7f) < 32. */ | |
1539 | extui a4, a2, 23, 8 | |
1540 | addi a4, a4, -0x7f | |
1541 | bgei a4, 32, .Lfixunssfsi_maxint | |
1542 | bltz a4, .Lfixunssfsi_zero | |
1543 | ||
1544 | /* Add explicit "1.0" and shift << 8. */ | |
1545 | or a7, a2, a6 | |
1546 | slli a5, a7, 8 | |
1547 | ||
1548 | /* Shift back to the right, based on the exponent. */ | |
1549 | addi a4, a4, 1 | |
1550 | beqi a4, 32, .Lfixunssfsi_bigexp | |
1551 | ssl a4 /* shift by 32 - a4 */ | |
1552 | srl a5, a5 | |
1553 | ||
1554 | /* Negate the result if sign != 0. */ | |
1555 | neg a2, a5 | |
1556 | movgez a2, a5, a7 | |
b7974b3a | 1557 | leaf_return |
134c8a50 BW |
1558 | |
1559 | .Lfixunssfsi_nan_or_inf: | |
1560 | /* Handle Infinity and NaN. */ | |
1561 | slli a4, a2, 9 | |
1562 | beqz a4, .Lfixunssfsi_maxint | |
1563 | ||
1564 | /* Translate NaN to 0xffffffff. */ | |
1565 | movi a2, -1 | |
b7974b3a | 1566 | leaf_return |
134c8a50 BW |
1567 | |
1568 | .Lfixunssfsi_maxint: | |
1569 | slli a4, a6, 8 /* 0x80000000 */ | |
1570 | movi a5, -1 /* 0xffffffff */ | |
1571 | movgez a4, a5, a2 | |
1572 | mov a2, a4 | |
b7974b3a | 1573 | leaf_return |
134c8a50 BW |
1574 | |
1575 | .Lfixunssfsi_zero: | |
1576 | movi a2, 0 | |
b7974b3a | 1577 | leaf_return |
134c8a50 BW |
1578 | |
1579 | .Lfixunssfsi_bigexp: | |
1580 | /* Handle unsigned maximum exponent case. */ | |
1581 | bltz a2, 1f | |
1582 | mov a2, a5 /* no shift needed */ | |
b7974b3a | 1583 | leaf_return |
134c8a50 BW |
1584 | |
1585 | /* Return 0x80000000 if negative. */ | |
1586 | 1: slli a2, a6, 8 | |
b7974b3a | 1587 | leaf_return |
134c8a50 BW |
1588 | |
1589 | #endif /* L_fixunssfsi */ | |
1590 | ||
1591 | #ifdef L_fixunssfdi | |
1592 | ||
1593 | .align 4 | |
1594 | .global __fixunssfdi | |
1595 | .type __fixunssfdi, @function | |
1596 | __fixunssfdi: | |
b7974b3a | 1597 | leaf_entry sp, 16 |
134c8a50 BW |
1598 | |
1599 | /* Check for NaN and Infinity. */ | |
1600 | movi a6, 0x7f800000 | |
1601 | ball a2, a6, .Lfixunssfdi_nan_or_inf | |
1602 | ||
1603 | /* Extract the exponent and check if 0 <= (exp - 0x7f) < 64. */ | |
1604 | extui a4, a2, 23, 8 | |
1605 | addi a4, a4, -0x7f | |
1606 | bgei a4, 64, .Lfixunssfdi_maxint | |
1607 | bltz a4, .Lfixunssfdi_zero | |
1608 | ||
1609 | /* Add explicit "1.0" and shift << 8. */ | |
1610 | or a7, a2, a6 | |
1611 | slli xh, a7, 8 | |
1612 | ||
1613 | /* Shift back to the right, based on the exponent. */ | |
1614 | addi a4, a4, 1 | |
1615 | beqi a4, 64, .Lfixunssfdi_bigexp | |
1616 | ssl a4 /* shift by 64 - a4 */ | |
1617 | bgei a4, 32, .Lfixunssfdi_smallshift | |
1618 | srl xl, xh | |
1619 | movi xh, 0 | |
1620 | ||
1621 | .Lfixunssfdi_shifted: | |
1622 | /* Negate the result if sign != 0. */ | |
1623 | bgez a7, 1f | |
1624 | neg xl, xl | |
1625 | neg xh, xh | |
1626 | beqz xl, 1f | |
1627 | addi xh, xh, -1 | |
b7974b3a | 1628 | 1: leaf_return |
134c8a50 BW |
1629 | |
1630 | .Lfixunssfdi_smallshift: | |
1631 | movi xl, 0 | |
1632 | src xl, xh, xl | |
1633 | srl xh, xh | |
1634 | j .Lfixunssfdi_shifted | |
1635 | ||
1636 | .Lfixunssfdi_nan_or_inf: | |
1637 | /* Handle Infinity and NaN. */ | |
1638 | slli a4, a2, 9 | |
1639 | beqz a4, .Lfixunssfdi_maxint | |
1640 | ||
1641 | /* Translate NaN to 0xffffffff.... */ | |
1642 | 1: movi xh, -1 | |
1643 | movi xl, -1 | |
b7974b3a | 1644 | leaf_return |
134c8a50 BW |
1645 | |
1646 | .Lfixunssfdi_maxint: | |
1647 | bgez a2, 1b | |
1648 | 2: slli xh, a6, 8 /* 0x80000000 */ | |
1649 | movi xl, 0 | |
b7974b3a | 1650 | leaf_return |
134c8a50 BW |
1651 | |
1652 | .Lfixunssfdi_zero: | |
1653 | movi xh, 0 | |
1654 | movi xl, 0 | |
b7974b3a | 1655 | leaf_return |
134c8a50 BW |
1656 | |
1657 | .Lfixunssfdi_bigexp: | |
1658 | /* Handle unsigned maximum exponent case. */ | |
1659 | bltz a7, 2b | |
1660 | movi xl, 0 | |
b7974b3a | 1661 | leaf_return /* no shift needed */ |
134c8a50 BW |
1662 | |
1663 | #endif /* L_fixunssfdi */ | |
1664 | ||
1665 | #ifdef L_floatsisf | |
1666 | ||
1667 | .align 4 | |
1668 | .global __floatunsisf | |
1669 | .type __floatunsisf, @function | |
1670 | __floatunsisf: | |
b7974b3a | 1671 | leaf_entry sp, 16 |
134c8a50 BW |
1672 | beqz a2, .Lfloatsisf_return |
1673 | ||
1674 | /* Set the sign to zero and jump to the floatsisf code. */ | |
1675 | movi a7, 0 | |
1676 | j .Lfloatsisf_normalize | |
1677 | ||
1678 | .align 4 | |
1679 | .global __floatsisf | |
1680 | .type __floatsisf, @function | |
1681 | __floatsisf: | |
b7974b3a | 1682 | leaf_entry sp, 16 |
134c8a50 BW |
1683 | |
1684 | /* Check for zero. */ | |
1685 | beqz a2, .Lfloatsisf_return | |
1686 | ||
1687 | /* Save the sign. */ | |
1688 | extui a7, a2, 31, 1 | |
1689 | ||
1690 | /* Get the absolute value. */ | |
1691 | #if XCHAL_HAVE_ABS | |
1692 | abs a2, a2 | |
1693 | #else | |
1694 | neg a4, a2 | |
1695 | movltz a2, a4, a2 | |
1696 | #endif | |
1697 | ||
1698 | .Lfloatsisf_normalize: | |
1699 | /* Normalize with the first 1 bit in the msb. */ | |
1700 | do_nsau a4, a2, a5, a6 | |
1701 | ssl a4 | |
1702 | sll a5, a2 | |
1703 | ||
1704 | /* Shift the mantissa into position, with rounding bits in a6. */ | |
1705 | srli a2, a5, 8 | |
1706 | slli a6, a5, (32 - 8) | |
1707 | ||
1708 | /* Set the exponent. */ | |
1709 | movi a5, 0x9d /* 0x7e + 31 */ | |
1710 | sub a5, a5, a4 | |
1711 | slli a5, a5, 23 | |
1712 | add a2, a2, a5 | |
1713 | ||
1714 | /* Add the sign. */ | |
1715 | slli a7, a7, 31 | |
1716 | or a2, a2, a7 | |
1717 | ||
1718 | /* Round up if the leftover fraction is >= 1/2. */ | |
1719 | bgez a6, .Lfloatsisf_return | |
1720 | addi a2, a2, 1 /* Overflow to the exponent is OK. */ | |
1721 | ||
1722 | /* Check if the leftover fraction is exactly 1/2. */ | |
1723 | slli a6, a6, 1 | |
1724 | beqz a6, .Lfloatsisf_exactlyhalf | |
1725 | ||
1726 | .Lfloatsisf_return: | |
b7974b3a | 1727 | leaf_return |
134c8a50 BW |
1728 | |
1729 | .Lfloatsisf_exactlyhalf: | |
1730 | /* Round down to the nearest even value. */ | |
1731 | srli a2, a2, 1 | |
1732 | slli a2, a2, 1 | |
b7974b3a | 1733 | leaf_return |
134c8a50 BW |
1734 | |
1735 | #endif /* L_floatsisf */ | |
1736 | ||
1737 | #ifdef L_floatdisf | |
1738 | ||
1739 | .align 4 | |
1740 | .global __floatundisf | |
1741 | .type __floatundisf, @function | |
1742 | __floatundisf: | |
b7974b3a | 1743 | leaf_entry sp, 16 |
134c8a50 BW |
1744 | |
1745 | /* Check for zero. */ | |
1746 | or a4, xh, xl | |
1747 | beqz a4, 2f | |
1748 | ||
1749 | /* Set the sign to zero and jump to the floatdisf code. */ | |
1750 | movi a7, 0 | |
1751 | j .Lfloatdisf_normalize | |
1752 | ||
1753 | .align 4 | |
1754 | .global __floatdisf | |
1755 | .type __floatdisf, @function | |
1756 | __floatdisf: | |
b7974b3a | 1757 | leaf_entry sp, 16 |
134c8a50 BW |
1758 | |
1759 | /* Check for zero. */ | |
1760 | or a4, xh, xl | |
1761 | beqz a4, 2f | |
1762 | ||
1763 | /* Save the sign. */ | |
1764 | extui a7, xh, 31, 1 | |
1765 | ||
1766 | /* Get the absolute value. */ | |
1767 | bgez xh, .Lfloatdisf_normalize | |
1768 | neg xl, xl | |
1769 | neg xh, xh | |
1770 | beqz xl, .Lfloatdisf_normalize | |
1771 | addi xh, xh, -1 | |
1772 | ||
1773 | .Lfloatdisf_normalize: | |
1774 | /* Normalize with the first 1 bit in the msb of xh. */ | |
1775 | beqz xh, .Lfloatdisf_bigshift | |
1776 | do_nsau a4, xh, a5, a6 | |
1777 | ssl a4 | |
1778 | src xh, xh, xl | |
1779 | sll xl, xl | |
1780 | ||
1781 | .Lfloatdisf_shifted: | |
1782 | /* Shift the mantissa into position, with rounding bits in a6. */ | |
1783 | ssai 8 | |
1784 | sll a5, xl | |
1785 | src a6, xh, xl | |
1786 | srl xh, xh | |
1787 | beqz a5, 1f | |
1788 | movi a5, 1 | |
1789 | or a6, a6, a5 | |
1790 | 1: | |
1791 | /* Set the exponent. */ | |
1792 | movi a5, 0xbd /* 0x7e + 63 */ | |
1793 | sub a5, a5, a4 | |
1794 | slli a5, a5, 23 | |
1795 | add a2, xh, a5 | |
1796 | ||
1797 | /* Add the sign. */ | |
1798 | slli a7, a7, 31 | |
1799 | or a2, a2, a7 | |
1800 | ||
1801 | /* Round up if the leftover fraction is >= 1/2. */ | |
1802 | bgez a6, 2f | |
1803 | addi a2, a2, 1 /* Overflow to the exponent is OK. */ | |
1804 | ||
1805 | /* Check if the leftover fraction is exactly 1/2. */ | |
1806 | slli a6, a6, 1 | |
1807 | beqz a6, .Lfloatdisf_exactlyhalf | |
b7974b3a | 1808 | 2: leaf_return |
134c8a50 BW |
1809 | |
1810 | .Lfloatdisf_bigshift: | |
1811 | /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */ | |
1812 | do_nsau a4, xl, a5, a6 | |
1813 | ssl a4 | |
1814 | sll xh, xl | |
1815 | movi xl, 0 | |
1816 | addi a4, a4, 32 | |
1817 | j .Lfloatdisf_shifted | |
1818 | ||
1819 | .Lfloatdisf_exactlyhalf: | |
1820 | /* Round down to the nearest even value. */ | |
1821 | srli a2, a2, 1 | |
1822 | slli a2, a2, 1 | |
b7974b3a | 1823 | leaf_return |
134c8a50 BW |
1824 | |
1825 | #endif /* L_floatdisf */ | |
66192aa1 DKC |
1826 | |
1827 | #if XCHAL_HAVE_FP_SQRT | |
1828 | #ifdef L_sqrtf | |
1829 | /* Square root */ | |
1830 | ||
1831 | .align 4 | |
1832 | .global __ieee754_sqrtf | |
1833 | .type __ieee754_sqrtf, @function | |
1834 | __ieee754_sqrtf: | |
1835 | leaf_entry sp, 16 | |
1836 | ||
1837 | wfr f1, a2 | |
1838 | ||
1839 | sqrt0.s f2, f1 | |
1840 | const.s f3, 0 | |
1841 | maddn.s f3, f2, f2 | |
1842 | nexp01.s f4, f1 | |
1843 | const.s f0, 3 | |
1844 | addexp.s f4, f0 | |
1845 | maddn.s f0, f3, f4 | |
1846 | nexp01.s f3, f1 | |
1847 | neg.s f5, f3 | |
1848 | maddn.s f2, f0, f2 | |
1849 | const.s f0, 0 | |
1850 | const.s f6, 0 | |
1851 | const.s f7, 0 | |
1852 | maddn.s f0, f5, f2 | |
1853 | maddn.s f6, f2, f4 | |
1854 | const.s f4, 3 | |
1855 | maddn.s f7, f4, f2 | |
1856 | maddn.s f3, f0, f0 | |
1857 | maddn.s f4, f6, f2 | |
1858 | neg.s f2, f7 | |
1859 | maddn.s f0, f3, f2 | |
1860 | maddn.s f7, f4, f7 | |
1861 | mksadj.s f2, f1 | |
1862 | nexp01.s f1, f1 | |
1863 | maddn.s f1, f0, f0 | |
1864 | neg.s f3, f7 | |
1865 | addexpm.s f0, f2 | |
1866 | addexp.s f3, f2 | |
1867 | divn.s f0, f1, f3 | |
1868 | ||
1869 | rfr a2, f0 | |
1870 | ||
1871 | leaf_return | |
1872 | ||
1873 | #endif /* L_sqrtf */ | |
1874 | #endif /* XCHAL_HAVE_FP_SQRT */ | |
1875 | ||
1876 | #if XCHAL_HAVE_FP_RECIP | |
1877 | #ifdef L_recipsf2 | |
1878 | /* Reciprocal */ | |
1879 | ||
1880 | .align 4 | |
1881 | .global __recipsf2 | |
1882 | .type __recipsf2, @function | |
1883 | __recipsf2: | |
1884 | leaf_entry sp, 16 | |
1885 | ||
1886 | wfr f1, a2 | |
1887 | ||
1888 | recip0.s f0, f1 | |
1889 | const.s f2, 1 | |
1890 | msub.s f2, f1, f0 | |
1891 | maddn.s f0, f0, f2 | |
1892 | const.s f2, 1 | |
1893 | msub.s f2, f1, f0 | |
1894 | maddn.s f0, f0, f2 | |
1895 | ||
1896 | rfr a2, f0 | |
1897 | ||
1898 | leaf_return | |
1899 | ||
1900 | #endif /* L_recipsf2 */ | |
1901 | #endif /* XCHAL_HAVE_FP_RECIP */ | |
1902 | ||
1903 | #if XCHAL_HAVE_FP_RSQRT | |
1904 | #ifdef L_rsqrtsf2 | |
1905 | /* Reciprocal square root */ | |
1906 | ||
1907 | .align 4 | |
1908 | .global __rsqrtsf2 | |
1909 | .type __rsqrtsf2, @function | |
1910 | __rsqrtsf2: | |
1911 | leaf_entry sp, 16 | |
1912 | ||
1913 | wfr f1, a2 | |
1914 | ||
1915 | rsqrt0.s f0, f1 | |
1916 | mul.s f2, f1, f0 | |
1917 | const.s f3, 3; | |
1918 | mul.s f4, f3, f0 | |
1919 | const.s f5, 1 | |
1920 | msub.s f5, f2, f0 | |
1921 | maddn.s f0, f4, f5 | |
1922 | mul.s f2, f1, f0 | |
1923 | mul.s f1, f3, f0 | |
1924 | const.s f3, 1 | |
1925 | msub.s f3, f2, f0 | |
1926 | maddn.s f0, f1, f3 | |
1927 | ||
1928 | rfr a2, f0 | |
1929 | ||
1930 | leaf_return | |
1931 | ||
1932 | #endif /* L_rsqrtsf2 */ | |
1933 | #endif /* XCHAL_HAVE_FP_RSQRT */ |