]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgcc/config/ft32/lib1funcs.S
ft32.opt (mnodiv): New.
[thirdparty/gcc.git] / libgcc / config / ft32 / lib1funcs.S
1 # ieee754 sf routines for FT32
2
3 /* Copyright (C) 1995-2016 Free Software Foundation, Inc.
4
5 This file is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 3, or (at your option) any
8 later version.
9
10 This file is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24 # See http://www.ens-lyon.fr/LIP/Pub/Rapports/PhD/PhD2006/PhD2006-02.pdf
25 # for implementation details of all except division which is detailed below
26 #
27
28 #ifdef L_fp_tools
29 // .global __cmpsf2_
30 nan: .long 0x7FFFFFFF # also abs mask
31 inf: .long 0x7F800000
32 sign_mask: .long 0x80000000
33 m_mask: .long 0x007FFFFF
34 exp_bias: .long 127
35 edge_case: .long 0x00FFFFFF
36 smallest_norm: .long 0x00800000 # implicit bit
37 high_FF: .long 0xFF000000
38 high_uint: .long 0xFFFFFFFF
39
40 ntz_table:
41 .byte 32,0,1,12,2,6,0,13,3,0,7,0,0,0,0,14
42 .byte 10,4,0,0,8,0,0,25,0,0,0,0,0,21,27,15
43 .byte 31,11,5,0,0,0,0,0,9,0,0,24,0,0,20,26
44 .byte 30,0,0,0,0,23,0,19,29,0,22,18,28,17,16,0
45
46 #endif
47
48 # Supply a few 'missing' instructions
49
50 # not
51 .macro not rd,r1
52 xor \rd,\r1,-1
53 .endm
54
55 # negate
56 .macro neg x
57 not \x, \x
58 add \x, \x, 1
59 .endm
60
61 # set $cc from the result of "ashl reg,dist"
62 .macro ashlcc reg,dist
63 .long 0x5de04008 | (\reg << 15) | (\dist << 4)
64 .endm
65
66
67 # converts an unsigned number x to a signed rep based on the bits in sign
68 # sign should be 0x00000000 or 0xffffffff.
69 .macro to_signed x, sign
70 add \x,\x,\sign # conditionally decrement x
71 xor \x,\x,\sign # conditionally complement x
72 .endm
73
74
75 .macro ld32 r,v
76 ldk \r,(\v>>10)
77 ldl \r,\r,(\v & 1023)
78 .endm
79
80 # calculate trailing zero count in x, also uses scr.
81 # Using Seal's algorithm
82 .macro ntz x, scr
83 not \scr, \x
84 add \scr, \scr, 1
85 and \x, \x, \scr
86 ashl \scr, \x, 4
87 add \x, \scr, \x
88 ashl \scr, \x, 6
89 add \x, \scr, \x
90 ashl \scr, \x, 16
91 sub \x, \scr, \x
92 lshr \x, \x, 26
93 ldk \scr, ntz_table
94 add \x, \x, \scr
95 lpmi.b \x, \x, 0
96 .endm
97
98 # calculate leading zero count
99 .macro nlz x, scr
100 flip \x, \x, 31
101 ntz \x, \scr
102 .endm
103
104
105 # Round 26 bit mantissa to nearest
106 # | 23 bits frac | G | R | S |
107 .macro round m, s1, s2
108 ldk \s1,0xc8
109 and \s2,\m,7
110 lshr \s1,\s1,\s2
111 and \s1,\s1,1
112 lshr \m,\m,2
113 add \m,\m,\s1
114 .endm
115
116 # If NZ, set the LSB of reg
117 .macro sticky reg
118 jmpc z,1f
119 or \reg,\reg,1 # set the sticky bit to 1
120 1:
121 .endm
122
123 ##########################################################################
124 ##########################################################################
125 ## addition & subtraction
126
127 #if defined(L_subsf3) || defined(L_addsub_sf)
128 .global __subsf3
129 __subsf3:
130 # this is subtraction, so we just change the sign of r1
131 lpm $r2,sign_mask
132 xor $r1,$r1,$r2
133 jmp __addsf3
134 #endif
135
136 #if defined(L_addsf3) || defined(L_addsub_sf)
137 .global __addsf3
138 __addsf3:
139 # x in $r0, y in $r1, result z in $r0 --||| 100 instructions +/- |||--
140 # unpack e, calc d
141 bextu $r2,$r0,(8<<5)|23 # ex in r2
142 bextu $r3,$r1,(8<<5)|23 # ey in r3
143 sub $r5,$r2,$r3 # d = ex - ey
144
145 # Special values are 0x00 and 0xff in ex and ey.
146 # If (ex&ey) != 0 or (xy|ey)=255 then there may be
147 # a special value.
148 tst $r2,$r3
149 jmpc nz,1f
150 jmp slow
151 1: or $r4,$r2,$r3
152 cmp $r4,255
153 jmpc nz,no_special_vals
154 slow:
155 # Check for early exit
156 cmp $r2,0
157 jmpc z,test_if_not_255
158 cmp $r3,0
159 jmpc nz,no_early_exit
160 test_if_not_255:
161 cmp $r2,255
162 jmpc z,no_early_exit
163 cmp $r3,255
164 jmpc z,no_early_exit
165 or $r6,$r2,$r3
166 cmp $r6,0
167 jmpc nz,was_not_zero
168 and $r0,$r0,$r1
169 lpm $r1,sign_mask
170 and $r0,$r0,$r1
171 return
172 was_not_zero:
173 cmp $r2,0
174 jmpc nz,ret_x
175 move $r0,$r1
176 return
177 ret_x:
178 return
179 no_early_exit:
180 # setup to test for special values
181 sub $r6,$r2,1
182 and $r6,$r6,0xFE
183 sub $r7,$r3,1
184 and $r7,$r7,0xFE
185 # test for special values
186 cmp $r6,$r7
187 jmpc gte,ex_spec_is_gte
188 move $r6,$r7
189 ex_spec_is_gte:
190 cmp $r6,0xFE
191 jmpc nz,no_special_vals
192 cmp $r5,0
193 jmpc ns,d_gte_0
194 cmp $r3,0xFF
195 jmpc z,ret_y
196 cmp $r2,0
197 jmpc z,ret_y
198 ret_y:
199 move $r0,$r1
200 return
201 d_gte_0:
202 cmp $r5,0
203 jmpc z,d_is_0
204 cmp $r2,0xFF
205 jmpc z,ret_x
206 cmp $r3,0
207 jmpc z,ret_x
208 d_is_0:
209 cmp $r2,0xFF
210 jmpc nz,no_special_vals
211 ashl $r6,$r0,9 # clear all except x frac
212 ashl $r7,$r1,9 # clear all except y frac
213 or $r6,$r6,$r7
214 cmp $r6,0
215 jmpc nz,ret_nan
216 lshr $r4,$r0,31 # sx in r4
217 lshr $r5,$r1,31 # sy in r4
218 cmp $r4,$r5
219 jmpc nz,ret_nan
220 return
221 ret_nan:
222 lpm $r0,nan
223 return
224 no_special_vals:
225 ldk $r8,(1<<10)|(9<<5)|26 # setup implicit bit and mask for e
226 #----------------------
227 ashr $r4,$r0,31 # sx in r4
228 ashl $r0,$r0,3 # shift mx 3 for GRS bits
229 bins $r0,$r0,$r8 # clear sx, ex and add implicit bit mx
230 # change mx to signed mantissa
231 to_signed $r0,$r4
232 #----------------------
233 ashr $r4,$r1,31 # sy in r4
234 ashl $r1,$r1,3 # shift my 3 for GRS bits
235 bins $r1,$r1,$r8 # clear sy, ey and add implicit bit my
236 # change my to signed mantissa
237 to_signed $r1,$r4
238 #----------------------
239 # test if we swap ms based on d sign
240 cmp $r5,0
241 jmpc gte,noswap
242 # swap mx & my
243 xor $r0,$r0,$r1
244 xor $r1,$r0,$r1
245 xor $r0,$r0,$r1
246 # d positive means that ex>=ey, so ez = ex
247 # d negative means that ey>ex, so ez = ey
248 move $r2,$r3
249 # |d|
250 neg $r5
251 noswap:
252 # now $r2 = ez = max(ex,ey)
253 cmp $r5,26 # max necessary alignment shift is 26
254 jmpc lt,under_26
255 ldk $r5,26
256 under_26:
257 ldk $r7,-1
258 ashl $r7,$r7,$r5 # create inverse of mask for test of S bit value in discarded my
259 not $r7,$r7
260 tst $r1,$r7 # determine value of sticky bit
261 # shift my >> |d|
262 ashr $r1,$r1,$r5
263 sticky $r1
264
265 # add ms
266 add $r0,$r0,$r1
267
268 # $r4 = sign(mx), mx = |mx|
269 ashr $r4,$r0,31
270 xor $r0,$r0,$r4
271 sub $r0,$r0,$r4
272
273 # realign mantissa using leading zero count
274 flip $r7,$r0,31
275 ntz $r7,$r8
276 ashl $r0,$r0,$r7
277 btst $r0,(6<<5)|0 # test low bits for sticky again
278 lshr $r0,$r0,6
279 sticky $r0
280
281 # update exponent
282 add $r2,$r2,5
283 sub $r2,$r2,$r7
284
285 # Round to nearest
286 round $r0,$r7,$r6
287
288 # detect_exp_update
289 lshr $r6,$r0,24
290 add $r2,$r2,$r6
291
292 # final tests
293 # mz == 0? if so, we just bail with a +0
294 cmp $r0,0
295 jmpc nz,msum_not_zero
296 ldk $r0,0
297 return
298 msum_not_zero:
299 # Combined check that (1 <= ez <= 254)
300 sub $r3,$r2,1
301 cmp $r3,254
302 jmpc b,no_special_ret
303 # underflow?
304 cmp $r2,0
305 jmpc gt,no_under
306 ldk $r0,0
307 jmp pack_sz
308 no_under:
309 # overflow?
310 cmp $r2,255
311 jmpc lt,no_special_ret
312 ldk $r0,0x7F8
313 ashl $r0,$r0,20
314 jmp pack_sz
315 no_special_ret:
316 # Pack ez
317 ldl $r2,$r2,(8<<5)|23
318 bins $r0,$r0,$r2 # width = 8, pos = 23 pack ez
319 # Pack sz
320 pack_sz:
321 ldl $r4,$r4,(1<<5)|31
322 bins $r0,$r0,$r4 # width = 1, pos = 31 set sz to sy
323 return
324 #endif
325
326 ##########################################################################
327 ##########################################################################
328 ## multiplication
329
330 #ifdef L_mulsf3
331 .global __mulsf3
332 __mulsf3:
333 # x in $r0, y in $r1, result z in $r0 --||| 61 instructions +/- |||--
334
335 # unpack e
336 bextu $r2,$r0,(8<<5)|23 # ex in r2
337 bextu $r3,$r1,(8<<5)|23 # ey in r3
338 # calc result sign
339 xor $r4,$r0,$r1
340 lpm $r5,sign_mask
341 and $r4,$r4,$r5 # sz in r4
342
343 # unpack m add implicit bit
344 ldk $r5,(1<<10)|(9<<5)|23 # setup implicit bit and mask for e
345 #----------------------
346 bins $r0,$r0,$r5 # clear sx, ex and add implicit bit mx
347
348 sub $r6,$r2,1
349 cmp $r6,254
350 jmpc b,1f
351 jmp slow_mul
352 1: sub $r6,$r3,1
353 cmp $r6,254
354 jmpc b,no_special_vals_mul
355
356 slow_mul:
357 # Check for early exit
358 cmp $r2,0
359 jmpc z,op_is_zero
360 cmp $r3,0
361 jmpc nz,no_early_exit_mul
362 op_is_zero:
363 cmp $r2,255
364 jmpc z,no_early_exit_mul
365 cmp $r3,255
366 jmpc z,no_early_exit_mul
367 move $r0,$r4
368 return
369 no_early_exit_mul:
370 # setup to test for special values
371 sub $r6,$r2,1
372 and $r6,$r6,0xFE
373 sub $r7,$r3,1
374 and $r7,$r7,0xFE
375 # test for special values
376 cmp $r6,$r7
377 jmpc gte,ex_spec_is_gte_ey_mul
378 move $r6,$r7
379 ex_spec_is_gte_ey_mul:
380 cmp $r6,0xFE
381 jmpc nz,no_special_vals_mul
382 cmp $r2,0xFF
383 jmpc nz,ex_not_FF_mul
384 ashl $r6,$r0,9
385 cmp $r6,0
386 jmpc nz,ret_nan
387 cmp $r3,0
388 jmpc z,ret_nan
389 ashl $r6,$r1,1
390 lpm $r7,high_FF
391 cmp $r6,$r7
392 jmpc a,ret_nan
393 cmp $r6,0
394 jmpc z,ret_nan
395 # infinity
396 lpm $r0,inf
397 or $r0,$r0,$r4
398 return
399 ex_not_FF_mul:
400 cmp $r2,0
401 jmpc nz,no_nan_mul
402 cmp $r3,0xFF
403 jmpc nz,no_nan_mul
404 jmp ret_nan
405 no_nan_mul:
406 lpm $r0,nan
407 and $r0,$r0,$r1
408 or $r0,$r0,$r4
409 return
410
411 ret_nan:
412 lpm $r0,nan
413 return
414
415 no_special_vals_mul:
416 bins $r1,$r1,$r5 # clear sy, ey and add implicit bit my
417 # calc ez
418 add $r3,$r2,$r3
419 sub $r3,$r3,127 # ez in r3
420
421 # (r1,r2) = R0 * R1
422 mul $r2,$r0,$r1
423 muluh $r1,$r0,$r1
424
425 btst $r1,(1<<5)|15 # XXX use jmpx
426 jmpc z,mul_z0
427
428 # mz is 1X.XX...X
429 # 48-bit product is in (r1,r2). The low 22 bits of r2
430 # are discarded.
431 lshr $r0,$r2,22
432 ashl $r1,$r1,10
433 or $r0,$r0,$r1 # r0 = (r1,r2) >> 22
434 ashlcc 2,10
435 sticky $r0
436 add $r3,$r3,1 # bump exponent
437
438 # Round to nearest
439 round $r0, $r1, $r2
440 lshr $r6,$r0,24
441 add $r3,$r3,$r6
442
443 sub $r6,$r3,1
444 cmp $r6,254
445 jmpc b,no_special_ret_mul
446
447 special_ret_mul:
448 # When the final exponent <= 0, result is flushed to 0 except
449 # for the border case 0x00FFFFFF which is promoted to next higher
450 # FP no., that is, the smallest "normalized" number.
451 cmp $r3,0
452 jmpc gt,exp_normal
453 # Pack ez
454 ldl $r3,$r3,(8<<5)|23
455 bins $r0,$r0,$r3 # width = 8, pos = 23 pack ez
456 lpm $r2,edge_case
457 cmp $r0,$r2
458 jmpc nz,no_edge_case
459 lpm $r0,smallest_norm
460 jmp pack_sz_mul
461 no_edge_case:
462 ldk $r0,0
463 jmp pack_sz_mul
464 exp_normal:
465 # overflow?
466 cmp $r3,255
467 jmpc lt,no_special_ret_mul
468 ldk $r0,0x7F8
469 ashl $r0,$r0,20
470 jmp pack_sz_mul
471 no_special_ret_mul:
472 # Pack ez
473 ldl $r3,$r3,(8<<5)|23
474 bins $r0,$r0,$r3 # width = 8, pos = 23 pack ez
475 # Pack sz
476 pack_sz_mul:
477 or $r0,$r0,$r4
478 return
479
480 mul_z0:
481 # mz is 0X.XX...X
482 # 48-bit product is in (r1,r2). The low 21 bits of r2
483 # are discarded.
484 lshr $r0,$r2,21
485 ashl $r1,$r1,11
486 or $r0,$r0,$r1 # r0 = (r1,r2) >> 22
487 ashlcc 2,11
488 sticky $r0
489 # Round to nearest
490 round $r0, $r1, $r2
491 lshr $r6,$r0,24
492 add $r3,$r3,$r6
493
494 sub $r6,$r3,1
495 cmp $r6,254
496 jmpc b,no_special_ret_mul
497 jmp special_ret_mul
498 #endif
499
500 ##########################################################################
501 ##########################################################################
502 ## division
503
504 ## See http://perso.ens-lyon.fr/gilles.villard/BIBLIOGRAPHIE/PDF/arith19.pdf
505 ## for implementation details
506
507
508
509
510 #ifdef L_divsf3
511 dc_1: .long 0xffffe7d7
512 dc_2: .long 0xffffffe8
513 dc_3: .long 0xffbad86f
514 dc_4: .long 0xfffbece7
515 dc_5: .long 0xf3672b51
516 dc_6: .long 0xfd9d3a3e
517 dc_7: .long 0x9a3c4390
518 dc_8: .long 0xd4d2ce9b
519 dc_9: .long 0x1bba92b3
520 dc_10: .long 0x525a1a8b
521 dc_11: .long 0x0452b1bf
522 dc_12: .long 0xFFFFFFC0
523 spec_val_test: .long 0x7F7FFFFF
524
525 .global __divsf3
526 __divsf3:
527 push $r13
528 # x in $r0, y in $r1, result z in $r0 --||| 73 instructions +/- |||-
529 bextu $r10,$r0,(8<<5)|23 # ex in r2
530 bextu $r11,$r1,(8<<5)|23 # ey in r3
531 lpm $r6, m_mask
532 and $r2, $r0, $r6 # mx
533 and $r3, $r1, $r6 # my
534 cmp $r2,$r3
535 bextu $r2,$r30,(1<<5)|4 # c = Tx >= T;
536 ashl $r3,$r3,9 # T = X << 9;
537 lpm $r13, sign_mask
538 ashl $r4,$r0,8 # X8 = X << 8;
539 or $r4,$r4,$r13 # Mx = X8 | 0x80000000;
540 lshr $r5,$r4,$r2 # S = Mx >> c;
541 # calc D
542 sub $r2, $r11, $r2
543 add $r12, $r10, 125
544 sub $r2, $r12, $r2 # int D = (Ex + 125) - (Ey - c);
545 # calc result sign
546 xor $r12,$r0,$r1
547 and $r12,$r12,$r13 # Sr = ( X ˆ Y ) & 0x80000000;
548 # check early exit
549 cmp $r10, 0
550 jmpc nz, no_early_ret_dev
551 cmp $r11, 0
552 jmpc z, no_early_ret_dev
553 cmp $r11, 255
554 jmpc z, no_early_ret_dev
555 move $r0, $r12
556 pop $r13
557 return
558 no_early_ret_dev:
559 # setup to test for special values
560 sub $r8,$r10,1
561 and $r8,$r8,0xFE
562 sub $r9,$r11,1
563 and $r9,$r9,0xFE
564 # test for special values
565 cmp $r8, $r9
566 jmpc gte, absXm1_gte_absYm1
567 move $r8, $r9
568 absXm1_gte_absYm1:
569 cmp $r8, 0xFE
570 jmpc nz, no_spec_ret_div
571 cmp $r10, 0xFF
572 jmpc nz, ex_not_FF_div
573 lpm $r6, m_mask
574 and $r2, $r0, $r6 # mx
575 cmp $r2, 0
576 jmpc nz, ret_nan_div
577 cmp $r11, 0xFF
578 jmpc z, ret_nan_div
579 jmp ret_inf_div
580 ex_not_FF_div:
581 cmp $r11, 0xFF
582 jmpc nz, ey_not_FF_div
583 ashl $r13, $r1, 9
584 cmp $r13, 0
585 jmpc nz, ret_nan_div
586 move $r0, $r12
587 pop $r13
588 return
589 ey_not_FF_div:
590 or $r10, $r10, $r11
591 cmp $r10, 0
592 jmpc z, ret_nan_div
593 ret_inf_div:
594 lpm $r6, inf
595 move $r0, $r6
596 or $r0, $r0, $r12
597 pop $r13
598 return
599 ret_nan_div:
600 lpm $r0, nan
601 pop $r13
602 return
603
604 no_spec_ret_div:
605 # check for overflow
606 ldk $r6, 0xFE
607 cmp $r2, $r6
608 jmpc lt, no_overflow_div
609 lpm $r6, inf
610 or $r0, $r12, $r6
611 pop $r13
612 return
613 no_overflow_div:
614 # check for underflow
615 cmp $r2, 0
616 jmpc ns, no_underflow_div
617 xnor $r6, $r6, $r6 # -1
618 cmp $r2, $r6
619 jmpc nz, ret_sr_div
620 ldk $r7, 0xFF
621 xor $r6, $r6, $r7 # 0xFF ^ -1 = 0xFFFFFF00
622 cmp $r4, $r6
623 jmpc nz, ret_sr_div
624 lpm $r6, sign_mask
625 cmp $r4, $r6
626 jmpc nz, ret_sr_div
627 lshr $r0, $r6, 8
628 or $r0, $r0, $r12
629 pop $r13
630 return
631 ret_sr_div:
632 move $r0, $r12
633 pop $r13
634 return
635 no_underflow_div:
636 lpm $r6, dc_1
637 muluh $r7, $r3, $r6 # i0 = mul( T , 0xffffe7d7 );
638 lpm $r6, dc_2
639 sub $r7, $r6, $r7 # i1 = 0xffffffe8 - i0;
640 muluh $r7, $r5, $r7 # i2 = mul( S , i1 );
641 add $r7, $r7, 0x20 # i3 = 0x00000020 + i2;
642 muluh $r8, $r3, $r3 # i4 = mul( T , T );
643 muluh $r9, $r5, $r8 # i5 = mul( S , i4 );
644 lpm $r6, dc_3
645 muluh $r10, $r3, $r6 # i6 = mul( T , 0xffbad86f );
646 lpm $r6, dc_4
647 sub $r10, $r6, $r10 # i7 = 0xfffbece7 - i6;
648 muluh $r10, $r9, $r10 # i8 = mul( i5 , i7 );
649 add $r7, $r7, $r10 # i9 = i3 + i8;
650 muluh $r9, $r8, $r9 # i10 = mul( i4 , i5 );
651 lpm $r6, dc_5
652 muluh $r10, $r3, $r6 # i11 = mul( T , 0xf3672b51 );
653 lpm $r6, dc_6
654 sub $r10, $r6, $r10 # i12 = 0xfd9d3a3e - i11;
655 lpm $r6, dc_7
656 muluh $r11, $r3, $r6 # i13 = mul( T , 0x9a3c4390 );
657 lpm $r6, dc_8
658 sub $r11, $r6, $r11 # i14 = 0xd4d2ce9b - i13
659 muluh $r11, $r8, $r11 # i15 = mul( i4 , i14 );
660 add $r10, $r10, $r11 # i16 = i12 + i15;
661 muluh $r10, $r9, $r10 # i17 = mul( i10 , i16 )
662 add $r7, $r7, $r10 # i18 = i9 + i17;
663 muluh $r10, $r8, $r8 # i19 = mul( i4 , i4 );
664 lpm $r6, dc_9
665 muluh $r11, $r3, $r6 # i20 = mul( T , 0x1bba92b3 );
666 lpm $r6, dc_10
667 sub $r11, $r6, $r11 # i21 = 0x525a1a8b - i20;
668 lpm $r6, dc_11
669 muluh $r8, $r8, $r6 # i22 = mul( i4 , 0x0452b1bf );
670 add $r8, $r11, $r8 # i23 = i21 + i22;
671 muluh $r8, $r10, $r8 # i24 = mul( i19 , i23 );
672 muluh $r8, $r9, $r8 # i25 = mul( i10 , i24 );
673 add $r3, $r7, $r8 # V = i18 + i25;
674 # W = V & 0xFFFFFFC0;
675 lpm $r6, dc_12
676 and $r3, $r3, $r6 # W
677 # round and pack final values
678 ashl $r0, $r2, 23 # pack D
679 or $r0, $r0, $r12 # pack Sr
680 ashl $r12, $r1, 8
681 or $r12, $r12, $r13 # My
682 muluh $r10, $r3, $r12
683 lshr $r11, $r5, 1
684 cmp $r10, $r11
685 jmpc gte, div_ret_1
686 add $r3, $r3, 0x40
687 div_ret_1:
688 lshr $r3, $r3, 7
689 add $r0, $r0, $r3
690 pop $r13
691 return
692 #endif
693
694 ##########################################################################
695 ##########################################################################
696 ## Negate
697
698 #ifdef L_negsf
699 .global __negsf
700 __negsf:
701 lpm $r1, sign_mask
702 xor $r0, $r0, $r1
703 return
704 #endif
705
706 ##########################################################################
707 ##########################################################################
708 ## float to int & unsigned int
709
710 #ifdef L_fixsfsi
711 .global __fixsfsi
712 __fixsfsi: # 20 instructions
713 bextu $r1,$r0,(8<<5)|23 # e in r1
714 lshr $r2,$r0,31 # s in r2
715 lpm $r3, m_mask
716 and $r0,$r0,$r3 # m in r0
717 # test nan
718 cmp $r1,0xFF
719 jmpc nz, int_not_nan
720 cmp $r0,0
721 jmpc z, int_not_nan
722 ldk $r0,0
723 return
724 int_not_nan:
725 # test edges
726 cmp $r1, 127
727 jmpc gte, int_not_zero # lower limit
728 ldk $r0,0
729 return
730 int_not_zero:
731 cmp $r1, 158
732 jmpc lt, int_not_max # upper limit
733 lpm $r0, nan
734 cmp $r2, 0
735 jmpc z, int_positive
736 xnor $r0, $r0, 0
737 return
738 int_not_max:
739 lpm $r3, smallest_norm
740 or $r0, $r0, $r3 # set implicit bit
741 sub $r1, $r1, 150
742 cmp $r1, 0
743 jmpc s, shift_right
744 ashl $r0, $r0, $r1
745 jmp set_int_sign
746 shift_right:
747 xnor $r1, $r1, 0
748 add $r1, $r1, 1
749 lshr $r0, $r0, $r1
750 set_int_sign:
751 cmp $r2, 0
752 jmpc z, int_positive
753 xnor $r0, $r0, 0
754 add $r0, $r0, 1
755 int_positive:
756 return
757 #endif
758
759 #ifdef L_fixunssfsi
760 .global __fixunssfsi
761 __fixunssfsi: # 19 instructions
762 lshr $r2, $r0, 31 # s in r2
763 cmp $r2, 0
764 jmpc z, uint_not_neg
765 ldk $r0, 0
766 return
767 uint_not_neg:
768 bextu $r1, $r0, (8<<5)|23 # e in r1
769 sub $r1, $r1, 127
770 lpm $r3, m_mask
771 and $r0, $r0, $r3 # m in r0
772 # test nan
773 cmp $r1, 0xFF
774 jmpc nz, uint_not_nan
775 cmp $r0, 0
776 jmpc z, uint_not_nan
777 ldk $r0, 0
778 return
779 uint_not_nan:
780 # test edges
781 cmp $r1, 0
782 jmpc ns, uint_not_zero # lower limit
783 ldk $r0, 0
784 return
785 uint_not_zero:
786 lpm $r3, smallest_norm
787 or $r0, $r0, $r3 # set implicit bit
788 cmp $r1, 23
789 jmpc lt, shift_uint_right
790 sub $r1, $r1, 23
791 ashl $r0, $r0, $r1
792 return
793 shift_uint_right:
794 ldk $r3, 23
795 sub $r1, $r3, $r1
796 lshr $r0, $r0, $r1
797 return
798 #endif
799
800 ##########################################################################
801 ##########################################################################
802 ## int & unsigned int to float
803
804
805 .macro i2f x, s1, s2, s3, lbl
806 move \s1, \x
807 nlz \s1, \s2
808 cmp \s1, 8
809 jmpc s, float_round\lbl
810 sub \s2, \s1, 8
811 ashl \x, \x, \s2
812 jmp float_no_round\lbl
813 float_round\lbl:
814 cmp \s1, 6
815 jmpc s, float_shift_right\lbl
816 sub \s2, \s1, 6
817 ashl \x, \x, \s2
818 jmp float_round_and_pack\lbl
819 float_shift_right\lbl:
820 ldk \s2, 6
821 sub \s2, \s2, \s1
822 xnor \s3, \s3 ,\s3 # 0xFFFFFFFF
823 ashl \s3, \s3 ,\s2 # create inverse of mask for test of S bit value in discarded my
824 xnor \s3, \s3 ,0 # NOT
825 tst \x, \s3 # determine value of sticky bit
826 lshr \x, \x, \s2
827 jmpc z,float_round_and_pack\lbl
828 or \x, \x, 1 # set the sticky bit to 1
829 float_round_and_pack\lbl:
830 bextu \s2, \x, (1<<5)|2 # extract low bit of m
831 or \x, \x, \s2 # or p into r
832 add \x, \x, 1
833 lshr \x, \x, 2
834 btst \x, (1<<5)|24 # test for carry from round
835 jmpc z, float_no_round\lbl
836 sub \s1, \s1, 1 # inc e for carry (actually dec nlz)
837 lshr \x, \x, 1
838 float_no_round\lbl:
839 ldk \s2, 158
840 sub \s1, \s2, \s1
841 # Pack e
842 ldl \s1, \s1, (8<<5)|23
843 bins \x, \x, \s1
844 .endm
845
846
847 #ifdef L_floatsisf
848 .global __floatsisf
849 __floatsisf: # 32 instructions
850 cmp $r0, 0
851 jmpc nz, float_not_zero
852 return
853 float_not_zero:
854 ashr $r1, $r0, 31 # s in r1
855 xor $r0, $r0, $r1 # cond neg
856 sub $r0, $r0, $r1
857 i2f $r0, $r2, $r3, $r4, 1
858 ldl $r1, $r1, (1<<5)|31
859 bins $r0, $r0, $r1
860 return
861 #endif
862
863 #ifdef L_floatunsisf
864 .global __floatunsisf
865 __floatunsisf: # 26 instructions
866 cmp $r0, 0
867 jmpc nz, float_not_zero2
868 return
869 float_not_zero2:
870 i2f $r0, $r1, $r2, $r3, 2
871 return
872 #endif
873
874 #if 0
875 ##########################################################################
876 ##########################################################################
877 ## float compare
878
879
880 __cmpsf2_:
881 # calc abs vals
882 lpm $r3, nan # also abs mask
883 and $r2, $r0, $r3
884 and $r3, $r1, $r3
885 # test if either abs is nan
886 lpm $r4, inf
887 cmp $r2, $r4
888 jmpc gt, cmp_is_gt
889 cmp $r3, $r4
890 jmpc gt, cmp_is_gt
891 # test if both are 0
892 or $r2, $r2, $r3
893 cmp $r2, 0
894 jmpc z, cmp_is_eq
895 # test if eq
896 cmp $r0, $r1
897 jmpc z, cmp_is_eq
898 # -- if either is pos
899 and $r2, $r0, $r1
900 cmp $r2, 0
901 jmpc s, cmp_both_neg
902 cmp $r0, $r1
903 jmpc gt, cmp_is_gt
904 # r0 < r1
905 lpm $r0, high_uint
906 return
907 cmp_both_neg:
908 cmp $r0, $r1
909 jmpc lt, cmp_is_gt
910 # r0 < r1
911 lpm $r0, high_uint
912 return
913 cmp_is_gt:
914 ldk $r0, 1
915 return
916 cmp_is_eq:
917 ldk $r0, 0
918 return
919 #endif
920
921 #ifdef L_udivsi3
922 .global __udivsi3
923 __udivsi3:
924 # $r0 is dividend
925 # $r1 is divisor
926 ldk $r2,0
927 push $r28
928 ldk $r28,-32
929 0:
930 lshr $r3,$r0,31 # Shift $r2:$r0 left one
931 ashl $r0,$r0,1
932 ashl $r2,$r2,1
933 or $r2,$r2,$r3
934 cmp $r2,$r1
935 jmpc b,1f
936 2:
937 sub $r2,$r2,$r1
938 add $r0,$r0,1
939 1:
940 add $r28,$r28,1
941 jmpx 31,$r28,1,0b
942 pop $r28
943 # $r0: quotient
944 # $r2: remainder
945 return
946 #endif
947
948 #ifdef L_umodsi3
949 .global __umodsi3
950 __umodsi3:
951 call __udivsi3
952 move $r0,$r2
953 return
954 #endif
955
956 #ifdef L_divsi3
957 .global __divsi3
958 __divsi3:
959 xor $r5,$r0,$r1 # $r5 is sign of result
960 ashr $r2,$r0,31 # $r0 = abs($r0)
961 xor $r0,$r0,$r2
962 sub $r0,$r0,$r2
963 ashr $r2,$r1,31 # $r1 = abs($r1)
964 xor $r1,$r1,$r2
965 sub $r1,$r1,$r2
966 call __udivsi3
967 ashr $r5,$r5,31
968 xor $r0,$r0,$r5
969 sub $r0,$r0,$r5
970 return
971
972 #endif
973
974 #ifdef L_modsi3
975 .global __modsi3
976 __modsi3:
977 move $r5,$r0 # $r5 is sign of result
978 ashr $r2,$r0,31 # $r0 = abs($r0)
979 xor $r0,$r0,$r2
980 sub $r0,$r0,$r2
981 ashr $r2,$r1,31 # $r1 = abs($r1)
982 xor $r1,$r1,$r2
983 sub $r1,$r1,$r2
984 call __umodsi3
985 ashr $r5,$r5,31
986 xor $r0,$r0,$r5
987 sub $r0,$r0,$r5
988 return
989 #endif