1 # ieee754 sf routines for FT32
3 /* Copyright (C) 1995-2016 Free Software Foundation, Inc.
5 This file is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 3, or (at your option) any
10 This file is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 # See http://www.ens-lyon.fr/LIP/Pub/Rapports/PhD/PhD2006/PhD2006-02.pdf
25 # for implementation details of all except division which is detailed below
30 nan: .long 0x7FFFFFFF # also abs mask
32 sign_mask: .long 0x80000000
33 m_mask: .long 0x007FFFFF
35 edge_case: .long 0x00FFFFFF
36 smallest_norm: .long 0x00800000 # implicit bit
37 high_FF: .long 0xFF000000
38 high_uint: .long 0xFFFFFFFF
40 # Supply a few 'missing' instructions
53 # set $cc from the result of "ashl reg,dist"
54 .macro ashlcc reg,dist
55 .long 0x5de04008 | (\reg << 15) | (\dist << 4)
59 # converts an unsigned number x to a signed rep based on the bits in sign
60 # sign should be 0x00000000 or 0xffffffff.
61 .macro to_signed x, sign
62 add \x,\x,\sign # conditionally decrement x
63 xor \x,\x,\sign # conditionally complement x
72 # calculate trailing zero count in x, also uses scr.
73 # Using Seal's algorithm
91 .byte 32,0,1,12,2,6,0,13,3,0,7,0,0,0,0,14
92 .byte 10,4,0,0,8,0,0,25,0,0,0,0,0,21,27,15
93 .byte 31,11,5,0,0,0,0,0,9,0,0,24,0,0,20,26
94 .byte 30,0,0,0,0,23,0,19,29,0,22,18,28,17,16,0
96 # calculate leading zero count
103 # Round 26 bit mantissa to nearest
104 # | 23 bits frac | G | R | S |
105 .macro round m, s1, s2
114 # If NZ, set the LSB of reg
117 or \reg,\reg,1 # set the sticky bit to 1
121 ##########################################################################
122 ##########################################################################
123 ## addition & subtraction
125 #if defined(L_subsf3) || defined(L_addsub_sf)
128 # this is subtraction, so we just change the sign of r1
134 #if defined(L_addsf3) || defined(L_addsub_sf)
137 # x in $r0, y in $r1, result z in $r0 --||| 100 instructions +/- |||--
139 bextu $r2,$r0,(8<<5)|23 # ex in r2
140 bextu $r3,$r1,(8<<5)|23 # ey in r3
141 sub $r5,$r2,$r3 # d = ex - ey
143 # Special values are 0x00 and 0xff in ex and ey.
144 # If (ex&ey) != 0 or (xy|ey)=255 then there may be
151 jmpc nz,no_special_vals
153 # Check for early exit
155 jmpc z,test_if_not_255
157 jmpc nz,no_early_exit
178 # setup to test for special values
183 # test for special values
185 jmpc gte,ex_spec_is_gte
189 jmpc nz,no_special_vals
208 jmpc nz,no_special_vals
209 ashl $r6,$r0,9 # clear all except x frac
210 ashl $r7,$r1,9 # clear all except y frac
214 lshr $r4,$r0,31 # sx in r4
215 lshr $r5,$r1,31 # sy in r4
223 ldk $r8,(1<<10)|(9<<5)|26 # setup implicit bit and mask for e
224 #----------------------
225 ashr $r4,$r0,31 # sx in r4
226 ashl $r0,$r0,3 # shift mx 3 for GRS bits
227 bins $r0,$r0,$r8 # clear sx, ex and add implicit bit mx
228 # change mx to signed mantissa
230 #----------------------
231 ashr $r4,$r1,31 # sy in r4
232 ashl $r1,$r1,3 # shift my 3 for GRS bits
233 bins $r1,$r1,$r8 # clear sy, ey and add implicit bit my
234 # change my to signed mantissa
236 #----------------------
237 # test if we swap ms based on d sign
244 # d positive means that ex>=ey, so ez = ex
245 # d negative means that ey>ex, so ez = ey
250 # now $r2 = ez = max(ex,ey)
251 cmp $r5,26 # max necessary alignment shift is 26
256 ashl $r7,$r7,$r5 # create inverse of mask for test of S bit value in discarded my
258 tst $r1,$r7 # determine value of sticky bit
266 # $r4 = sign(mx), mx = |mx|
271 # realign mantissa using leading zero count
275 btst $r0,(6<<5)|0 # test low bits for sticky again
291 # mz == 0? if so, we just bail with a +0
293 jmpc nz,msum_not_zero
297 # Combined check that (1 <= ez <= 254)
300 jmpc b,no_special_ret
309 jmpc lt,no_special_ret
315 ldl $r2,$r2,(8<<5)|23
316 bins $r0,$r0,$r2 # width = 8, pos = 23 pack ez
319 ldl $r4,$r4,(1<<5)|31
320 bins $r0,$r0,$r4 # width = 1, pos = 31 set sz to sy
324 ##########################################################################
325 ##########################################################################
331 # x in $r0, y in $r1, result z in $r0 --||| 61 instructions +/- |||--
334 bextu $r2,$r0,(8<<5)|23 # ex in r2
335 bextu $r3,$r1,(8<<5)|23 # ey in r3
339 and $r4,$r4,$r5 # sz in r4
341 # unpack m add implicit bit
342 ldk $r5,(1<<10)|(9<<5)|23 # setup implicit bit and mask for e
343 #----------------------
344 bins $r0,$r0,$r5 # clear sx, ex and add implicit bit mx
352 jmpc b,no_special_vals_mul
355 # Check for early exit
359 jmpc nz,no_early_exit_mul
362 jmpc z,no_early_exit_mul
364 jmpc z,no_early_exit_mul
368 # setup to test for special values
373 # test for special values
375 jmpc gte,ex_spec_is_gte_ey_mul
377 ex_spec_is_gte_ey_mul:
379 jmpc nz,no_special_vals_mul
381 jmpc nz,ex_not_FF_mul
414 bins $r1,$r1,$r5 # clear sy, ey and add implicit bit my
417 sub $r3,$r3,127 # ez in r3
423 btst $r1,(1<<5)|15 # XXX use jmpx
427 # 48-bit product is in (r1,r2). The low 22 bits of r2
431 or $r0,$r0,$r1 # r0 = (r1,r2) >> 22
434 add $r3,$r3,1 # bump exponent
443 jmpc b,no_special_ret_mul
446 # When the final exponent <= 0, result is flushed to 0 except
447 # for the border case 0x00FFFFFF which is promoted to next higher
448 # FP no., that is, the smallest "normalized" number.
452 ldl $r3,$r3,(8<<5)|23
453 bins $r0,$r0,$r3 # width = 8, pos = 23 pack ez
457 lpm $r0,smallest_norm
465 jmpc lt,no_special_ret_mul
471 ldl $r3,$r3,(8<<5)|23
472 bins $r0,$r0,$r3 # width = 8, pos = 23 pack ez
480 # 48-bit product is in (r1,r2). The low 21 bits of r2
484 or $r0,$r0,$r1 # r0 = (r1,r2) >> 22
494 jmpc b,no_special_ret_mul
498 ##########################################################################
499 ##########################################################################
502 ## See http://perso.ens-lyon.fr/gilles.villard/BIBLIOGRAPHIE/PDF/arith19.pdf
503 ## for implementation details
506 dc_1: .long 0xffffe7d7
507 dc_2: .long 0xffffffe8
508 dc_3: .long 0xffbad86f
509 dc_4: .long 0xfffbece7
510 dc_5: .long 0xf3672b51
511 dc_6: .long 0xfd9d3a3e
512 dc_7: .long 0x9a3c4390
513 dc_8: .long 0xd4d2ce9b
514 dc_9: .long 0x1bba92b3
515 dc_10: .long 0x525a1a8b
516 dc_11: .long 0x0452b1bf
517 dc_12: .long 0xFFFFFFC0
518 spec_val_test: .long 0x7F7FFFFF
526 # x in $r0, y in $r1, result z in $r0 --||| 73 instructions +/- |||-
527 bextu $r10,$r0,(8<<5)|23 # ex in r2
528 bextu $r11,$r1,(8<<5)|23 # ey in r3
530 and $r2, $r0, $r6 # mx
531 and $r3, $r1, $r6 # my
533 bextu $r2,$r30,(1<<5)|4 # c = Tx >= T;
534 ashl $r3,$r3,9 # T = X << 9;
536 ashl $r4,$r0,8 # X8 = X << 8;
537 or $r4,$r4,$r13 # Mx = X8 | 0x80000000;
538 lshr $r5,$r4,$r2 # S = Mx >> c;
542 sub $r2, $r12, $r2 # int D = (Ex + 125) - (Ey - c);
545 and $r12,$r12,$r13 # Sr = ( X ˆ Y ) & 0x80000000;
548 jmpc nz, no_early_ret_dev
550 jmpc z, no_early_ret_dev
552 jmpc z, no_early_ret_dev
557 # setup to test for special values
562 # test for special values
564 jmpc gte, absXm1_gte_absYm1
568 jmpc nz, no_spec_ret_div
570 jmpc nz, ex_not_FF_div
572 and $r2, $r0, $r6 # mx
580 jmpc nz, ey_not_FF_div
606 jmpc lt, no_overflow_div
612 # check for underflow
614 jmpc ns, no_underflow_div
615 xnor $r6, $r6, $r6 # -1
619 xor $r6, $r6, $r7 # 0xFF ^ -1 = 0xFFFFFF00
635 muluh $r7, $r3, $r6 # i0 = mul( T , 0xffffe7d7 );
637 sub $r7, $r6, $r7 # i1 = 0xffffffe8 - i0;
638 muluh $r7, $r5, $r7 # i2 = mul( S , i1 );
639 add $r7, $r7, 0x20 # i3 = 0x00000020 + i2;
640 muluh $r8, $r3, $r3 # i4 = mul( T , T );
641 muluh $r9, $r5, $r8 # i5 = mul( S , i4 );
643 muluh $r10, $r3, $r6 # i6 = mul( T , 0xffbad86f );
645 sub $r10, $r6, $r10 # i7 = 0xfffbece7 - i6;
646 muluh $r10, $r9, $r10 # i8 = mul( i5 , i7 );
647 add $r7, $r7, $r10 # i9 = i3 + i8;
648 muluh $r9, $r8, $r9 # i10 = mul( i4 , i5 );
650 muluh $r10, $r3, $r6 # i11 = mul( T , 0xf3672b51 );
652 sub $r10, $r6, $r10 # i12 = 0xfd9d3a3e - i11;
654 muluh $r11, $r3, $r6 # i13 = mul( T , 0x9a3c4390 );
656 sub $r11, $r6, $r11 # i14 = 0xd4d2ce9b - i13
657 muluh $r11, $r8, $r11 # i15 = mul( i4 , i14 );
658 add $r10, $r10, $r11 # i16 = i12 + i15;
659 muluh $r10, $r9, $r10 # i17 = mul( i10 , i16 )
660 add $r7, $r7, $r10 # i18 = i9 + i17;
661 muluh $r10, $r8, $r8 # i19 = mul( i4 , i4 );
663 muluh $r11, $r3, $r6 # i20 = mul( T , 0x1bba92b3 );
665 sub $r11, $r6, $r11 # i21 = 0x525a1a8b - i20;
667 muluh $r8, $r8, $r6 # i22 = mul( i4 , 0x0452b1bf );
668 add $r8, $r11, $r8 # i23 = i21 + i22;
669 muluh $r8, $r10, $r8 # i24 = mul( i19 , i23 );
670 muluh $r8, $r9, $r8 # i25 = mul( i10 , i24 );
671 add $r3, $r7, $r8 # V = i18 + i25;
672 # W = V & 0xFFFFFFC0;
674 and $r3, $r3, $r6 # W
675 # round and pack final values
676 ashl $r0, $r2, 23 # pack D
677 or $r0, $r0, $r12 # pack Sr
679 or $r12, $r12, $r13 # My
680 muluh $r10, $r3, $r12
692 ##########################################################################
693 ##########################################################################
704 ##########################################################################
705 ##########################################################################
706 ## float to int & unsigned int
710 __fixsfsi: # 20 instructions
711 bextu $r1,$r0,(8<<5)|23 # e in r1
712 lshr $r2,$r0,31 # s in r2
714 and $r0,$r0,$r3 # m in r0
725 jmpc gte, int_not_zero # lower limit
730 jmpc lt, int_not_max # upper limit
737 lpm $r3, smallest_norm
738 or $r0, $r0, $r3 # set implicit bit
759 __fixunssfsi: # 19 instructions
760 lshr $r2, $r0, 31 # s in r2
766 bextu $r1, $r0, (8<<5)|23 # e in r1
769 and $r0, $r0, $r3 # m in r0
772 jmpc nz, uint_not_nan
780 jmpc ns, uint_not_zero # lower limit
784 lpm $r3, smallest_norm
785 or $r0, $r0, $r3 # set implicit bit
787 jmpc lt, shift_uint_right
798 ##########################################################################
799 ##########################################################################
800 ## int & unsigned int to float
803 .macro i2f x, s1, s2, s3, lbl
807 jmpc s, float_round\lbl
810 jmp float_no_round\lbl
813 jmpc s, float_shift_right\lbl
816 jmp float_round_and_pack\lbl
817 float_shift_right\lbl:
820 xnor \s3, \s3 ,\s3 # 0xFFFFFFFF
821 ashl \s3, \s3 ,\s2 # create inverse of mask for test of S bit value in discarded my
822 xnor \s3, \s3 ,0 # NOT
823 tst \x, \s3 # determine value of sticky bit
825 jmpc z,float_round_and_pack\lbl
826 or \x, \x, 1 # set the sticky bit to 1
827 float_round_and_pack\lbl:
828 bextu \s2, \x, (1<<5)|2 # extract low bit of m
829 or \x, \x, \s2 # or p into r
832 btst \x, (1<<5)|24 # test for carry from round
833 jmpc z, float_no_round\lbl
834 sub \s1, \s1, 1 # inc e for carry (actually dec nlz)
840 ldl \s1, \s1, (8<<5)|23
847 __floatsisf: # 32 instructions
849 jmpc nz, float_not_zero
852 ashr $r1, $r0, 31 # s in r1
853 xor $r0, $r0, $r1 # cond neg
855 i2f $r0, $r2, $r3, $r4, 1
856 ldl $r1, $r1, (1<<5)|31
862 .global __floatunsisf
863 __floatunsisf: # 26 instructions
865 jmpc nz, float_not_zero2
868 i2f $r0, $r1, $r2, $r3, 2
872 ##########################################################################
873 ##########################################################################
879 lpm $r3, nan # also abs mask
882 # test if either abs is nan
895 # -- if either is pos