]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgcc/config/ft32/lib1funcs.S
FT32 target added. Approved by Jeff Law [law@redhat.com]
[thirdparty/gcc.git] / libgcc / config / ft32 / lib1funcs.S
1 # ieee754 sf routines for FT32
2
3 /* Copyright (C) 1995-2014 Free Software Foundation, Inc.
4
5 This file is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 3, or (at your option) any
8 later version.
9
10 This file is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24 # See http://www.ens-lyon.fr/LIP/Pub/Rapports/PhD/PhD2006/PhD2006-02.pdf
25 # for implementation details of all except division which is detailed below
26 #
27
28 // .global __cmpsf2_
29
30 nan: .long 0x7FFFFFFF # also abs mask
31 inf: .long 0x7F800000
32 sign_mask: .long 0x80000000
33 m_mask: .long 0x007FFFFF
34 exp_bias: .long 127
35 edge_case: .long 0x00FFFFFF
36 smallest_norm: .long 0x00800000 # implicit bit
37 high_FF: .long 0xFF000000
38 high_uint: .long 0xFFFFFFFF
39
40 # Supply a few 'missing' instructions
41
42 # not
43 .macro not rd,r1
44 xor \rd,\r1,-1
45 .endm
46
47 # negate
48 .macro neg x
49 not \x, \x
50 add \x, \x, 1
51 .endm
52
53 # set $cc from the result of "ashl reg,dist"
54 .macro ashlcc reg,dist
55 .long 0x5de04008 | (\reg << 15) | (\dist << 4)
56 .endm
57
58
59 # converts an unsigned number x to a signed rep based on the bits in sign
60 # sign should be 0x00000000 or 0xffffffff.
61 .macro to_signed x, sign
62 add \x,\x,\sign # conditionally decrement x
63 xor \x,\x,\sign # conditionally complement x
64 .endm
65
66
67 .macro ld32 r,v
68 ldk \r,(\v>>10)
69 ldl \r,\r,(\v & 1023)
70 .endm
71
72 # calculate trailing zero count in x, also uses scr.
73 # Using Seal's algorithm
74 .macro ntz x, scr
75 not \scr, \x
76 add \scr, \scr, 1
77 and \x, \x, \scr
78 ashl \scr, \x, 4
79 add \x, \scr, \x
80 ashl \scr, \x, 6
81 add \x, \scr, \x
82 ashl \scr, \x, 16
83 sub \x, \scr, \x
84 lshr \x, \x, 26
85 ldk \scr, ntz_table
86 add \x, \x, \scr
87 lpmi.b \x, \x, 0
88 .endm
89
90 ntz_table:
91 .byte 32,0,1,12,2,6,0,13,3,0,7,0,0,0,0,14
92 .byte 10,4,0,0,8,0,0,25,0,0,0,0,0,21,27,15
93 .byte 31,11,5,0,0,0,0,0,9,0,0,24,0,0,20,26
94 .byte 30,0,0,0,0,23,0,19,29,0,22,18,28,17,16,0
95
96 # calculate leading zero count
97 .macro nlz x, scr
98 flip \x, \x, 31
99 ntz \x, \scr
100 .endm
101
102
103 # Round 26 bit mantissa to nearest
104 # | 23 bits frac | G | R | S |
105 .macro round m, s1, s2
106 ldk \s1,0xc8
107 and \s2,\m,7
108 lshr \s1,\s1,\s2
109 and \s1,\s1,1
110 lshr \m,\m,2
111 add \m,\m,\s1
112 .endm
113
114 # If NZ, set the LSB of reg
115 .macro sticky reg
116 jmpc z,1f
117 or \reg,\reg,1 # set the sticky bit to 1
118 1:
119 .endm
120
121 ##########################################################################
122 ##########################################################################
123 ## addition & subtraction
124
125 #if defined(L_subsf3) || defined(L_addsub_sf)
126 .global __subsf3
127 __subsf3:
128 # this is subtraction, so we just change the sign of r1
129 lpm $r2,sign_mask
130 xor $r1,$r1,$r2
131 jmp __addsf3
132 #endif
133
134 #if defined(L_addsf3) || defined(L_addsub_sf)
135 .global __addsf3
136 __addsf3:
137 # x in $r0, y in $r1, result z in $r0 --||| 100 instructions +/- |||--
138 # unpack e, calc d
139 bextu $r2,$r0,(8<<5)|23 # ex in r2
140 bextu $r3,$r1,(8<<5)|23 # ey in r3
141 sub $r5,$r2,$r3 # d = ex - ey
142
143 # Special values are 0x00 and 0xff in ex and ey.
144 # If (ex&ey) != 0 or (xy|ey)=255 then there may be
145 # a special value.
146 tst $r2,$r3
147 jmpc nz,1f
148 jmp slow
149 1: or $r4,$r2,$r3
150 cmp $r4,255
151 jmpc nz,no_special_vals
152 slow:
153 # Check for early exit
154 cmp $r2,0
155 jmpc z,test_if_not_255
156 cmp $r3,0
157 jmpc nz,no_early_exit
158 test_if_not_255:
159 cmp $r2,255
160 jmpc z,no_early_exit
161 cmp $r3,255
162 jmpc z,no_early_exit
163 or $r6,$r2,$r3
164 cmp $r6,0
165 jmpc nz,was_not_zero
166 and $r0,$r0,$r1
167 lpm $r1,sign_mask
168 and $r0,$r0,$r1
169 return
170 was_not_zero:
171 cmp $r2,0
172 jmpc nz,ret_x
173 move $r0,$r1
174 return
175 ret_x:
176 return
177 no_early_exit:
178 # setup to test for special values
179 sub $r6,$r2,1
180 and $r6,$r6,0xFE
181 sub $r7,$r3,1
182 and $r7,$r7,0xFE
183 # test for special values
184 cmp $r6,$r7
185 jmpc gte,ex_spec_is_gte
186 move $r6,$r7
187 ex_spec_is_gte:
188 cmp $r6,0xFE
189 jmpc nz,no_special_vals
190 cmp $r5,0
191 jmpc ns,d_gte_0
192 cmp $r3,0xFF
193 jmpc z,ret_y
194 cmp $r2,0
195 jmpc z,ret_y
196 ret_y:
197 move $r0,$r1
198 return
199 d_gte_0:
200 cmp $r5,0
201 jmpc z,d_is_0
202 cmp $r2,0xFF
203 jmpc z,ret_x
204 cmp $r3,0
205 jmpc z,ret_x
206 d_is_0:
207 cmp $r2,0xFF
208 jmpc nz,no_special_vals
209 ashl $r6,$r0,9 # clear all except x frac
210 ashl $r7,$r1,9 # clear all except y frac
211 or $r6,$r6,$r7
212 cmp $r6,0
213 jmpc nz,ret_nan
214 lshr $r4,$r0,31 # sx in r4
215 lshr $r5,$r1,31 # sy in r4
216 cmp $r4,$r5
217 jmpc nz,ret_nan
218 return
219 ret_nan:
220 lpm $r0,nan
221 return
222 no_special_vals:
223 ldk $r8,(1<<10)|(9<<5)|26 # setup implicit bit and mask for e
224 #----------------------
225 ashr $r4,$r0,31 # sx in r4
226 ashl $r0,$r0,3 # shift mx 3 for GRS bits
227 bins $r0,$r0,$r8 # clear sx, ex and add implicit bit mx
228 # change mx to signed mantissa
229 to_signed $r0,$r4
230 #----------------------
231 ashr $r4,$r1,31 # sy in r4
232 ashl $r1,$r1,3 # shift my 3 for GRS bits
233 bins $r1,$r1,$r8 # clear sy, ey and add implicit bit my
234 # change my to signed mantissa
235 to_signed $r1,$r4
236 #----------------------
237 # test if we swap ms based on d sign
238 cmp $r5,0
239 jmpc gte,noswap
240 # swap mx & my
241 xor $r0,$r0,$r1
242 xor $r1,$r0,$r1
243 xor $r0,$r0,$r1
244 # d positive means that ex>=ey, so ez = ex
245 # d negative means that ey>ex, so ez = ey
246 move $r2,$r3
247 # |d|
248 neg $r5
249 noswap:
250 # now $r2 = ez = max(ex,ey)
251 cmp $r5,26 # max necessary alignment shift is 26
252 jmpc lt,under_26
253 ldk $r5,26
254 under_26:
255 ldk $r7,-1
256 ashl $r7,$r7,$r5 # create inverse of mask for test of S bit value in discarded my
257 not $r7,$r7
258 tst $r1,$r7 # determine value of sticky bit
259 # shift my >> |d|
260 ashr $r1,$r1,$r5
261 sticky $r1
262
263 # add ms
264 add $r0,$r0,$r1
265
266 # $r4 = sign(mx), mx = |mx|
267 ashr $r4,$r0,31
268 xor $r0,$r0,$r4
269 sub $r0,$r0,$r4
270
271 # realign mantissa using leading zero count
272 flip $r7,$r0,31
273 ntz $r7,$r8
274 ashl $r0,$r0,$r7
275 btst $r0,(6<<5)|0 # test low bits for sticky again
276 lshr $r0,$r0,6
277 sticky $r0
278
279 # update exponent
280 add $r2,$r2,5
281 sub $r2,$r2,$r7
282
283 # Round to nearest
284 round $r0,$r7,$r6
285
286 # detect_exp_update
287 lshr $r6,$r0,24
288 add $r2,$r2,$r6
289
290 # final tests
291 # mz == 0? if so, we just bail with a +0
292 cmp $r0,0
293 jmpc nz,msum_not_zero
294 ldk $r0,0
295 return
296 msum_not_zero:
297 # Combined check that (1 <= ez <= 254)
298 sub $r3,$r2,1
299 cmp $r3,254
300 jmpc b,no_special_ret
301 # underflow?
302 cmp $r2,0
303 jmpc gt,no_under
304 ldk $r0,0
305 jmp pack_sz
306 no_under:
307 # overflow?
308 cmp $r2,255
309 jmpc lt,no_special_ret
310 ldk $r0,0x7F8
311 ashl $r0,$r0,20
312 jmp pack_sz
313 no_special_ret:
314 # Pack ez
315 ldl $r2,$r2,(8<<5)|23
316 bins $r0,$r0,$r2 # width = 8, pos = 23 pack ez
317 # Pack sz
318 pack_sz:
319 ldl $r4,$r4,(1<<5)|31
320 bins $r0,$r0,$r4 # width = 1, pos = 31 set sz to sy
321 return
322 #endif
323
324 ##########################################################################
325 ##########################################################################
326 ## multiplication
327
328 #ifdef L_mulsf3
329 .global __mulsf3
330 __mulsf3:
331 # x in $r0, y in $r1, result z in $r0 --||| 61 instructions +/- |||--
332
333 # unpack e
334 bextu $r2,$r0,(8<<5)|23 # ex in r2
335 bextu $r3,$r1,(8<<5)|23 # ey in r3
336 # calc result sign
337 xor $r4,$r0,$r1
338 lpm $r5,sign_mask
339 and $r4,$r4,$r5 # sz in r4
340
341 # unpack m add implicit bit
342 ldk $r5,(1<<10)|(9<<5)|23 # setup implicit bit and mask for e
343 #----------------------
344 bins $r0,$r0,$r5 # clear sx, ex and add implicit bit mx
345
346 sub $r6,$r2,1
347 cmp $r6,254
348 jmpc b,1f
349 jmp slow_mul
350 1: sub $r6,$r3,1
351 cmp $r6,254
352 jmpc b,no_special_vals_mul
353
354 slow_mul:
355 # Check for early exit
356 cmp $r2,0
357 jmpc z,op_is_zero
358 cmp $r3,0
359 jmpc nz,no_early_exit_mul
360 op_is_zero:
361 cmp $r2,255
362 jmpc z,no_early_exit_mul
363 cmp $r3,255
364 jmpc z,no_early_exit_mul
365 move $r0,$r4
366 return
367 no_early_exit_mul:
368 # setup to test for special values
369 sub $r6,$r2,1
370 and $r6,$r6,0xFE
371 sub $r7,$r3,1
372 and $r7,$r7,0xFE
373 # test for special values
374 cmp $r6,$r7
375 jmpc gte,ex_spec_is_gte_ey_mul
376 move $r6,$r7
377 ex_spec_is_gte_ey_mul:
378 cmp $r6,0xFE
379 jmpc nz,no_special_vals_mul
380 cmp $r2,0xFF
381 jmpc nz,ex_not_FF_mul
382 ashl $r6,$r0,9
383 cmp $r6,0
384 jmpc nz,ret_nan
385 cmp $r3,0
386 jmpc z,ret_nan
387 ashl $r6,$r1,1
388 lpm $r7,high_FF
389 cmp $r6,$r7
390 jmpc a,ret_nan
391 cmp $r6,0
392 jmpc z,ret_nan
393 # infinity
394 lpm $r0,inf
395 or $r0,$r0,$r4
396 return
397 ex_not_FF_mul:
398 cmp $r2,0
399 jmpc nz,no_nan_mul
400 cmp $r3,0xFF
401 jmpc nz,no_nan_mul
402 jmp ret_nan
403 no_nan_mul:
404 lpm $r0,nan
405 and $r0,$r0,$r1
406 or $r0,$r0,$r4
407 return
408
409 ret_nan:
410 lpm $r0,nan
411 return
412
413 no_special_vals_mul:
414 bins $r1,$r1,$r5 # clear sy, ey and add implicit bit my
415 # calc ez
416 add $r3,$r2,$r3
417 sub $r3,$r3,127 # ez in r3
418
419 # (r1,r2) = R0 * R1
420 mul $r2,$r0,$r1
421 muluh $r1,$r0,$r1
422
423 btst $r1,(1<<5)|15 # XXX use jmpx
424 jmpc z,mul_z0
425
426 # mz is 1X.XX...X
427 # 48-bit product is in (r1,r2). The low 22 bits of r2
428 # are discarded.
429 lshr $r0,$r2,22
430 ashl $r1,$r1,10
431 or $r0,$r0,$r1 # r0 = (r1,r2) >> 22
432 ashlcc 2,10
433 sticky $r0
434 add $r3,$r3,1 # bump exponent
435
436 # Round to nearest
437 round $r0, $r1, $r2
438 lshr $r6,$r0,24
439 add $r3,$r3,$r6
440
441 sub $r6,$r3,1
442 cmp $r6,254
443 jmpc b,no_special_ret_mul
444
445 special_ret_mul:
446 # When the final exponent <= 0, result is flushed to 0 except
447 # for the border case 0x00FFFFFF which is promoted to next higher
448 # FP no., that is, the smallest "normalized" number.
449 cmp $r3,0
450 jmpc gt,exp_normal
451 # Pack ez
452 ldl $r3,$r3,(8<<5)|23
453 bins $r0,$r0,$r3 # width = 8, pos = 23 pack ez
454 lpm $r2,edge_case
455 cmp $r0,$r2
456 jmpc nz,no_edge_case
457 lpm $r0,smallest_norm
458 jmp pack_sz_mul
459 no_edge_case:
460 ldk $r0,0
461 jmp pack_sz_mul
462 exp_normal:
463 # overflow?
464 cmp $r3,255
465 jmpc lt,no_special_ret_mul
466 ldk $r0,0x7F8
467 ashl $r0,$r0,20
468 jmp pack_sz_mul
469 no_special_ret_mul:
470 # Pack ez
471 ldl $r3,$r3,(8<<5)|23
472 bins $r0,$r0,$r3 # width = 8, pos = 23 pack ez
473 # Pack sz
474 pack_sz_mul:
475 or $r0,$r0,$r4
476 return
477
478 mul_z0:
479 # mz is 0X.XX...X
480 # 48-bit product is in (r1,r2). The low 21 bits of r2
481 # are discarded.
482 lshr $r0,$r2,21
483 ashl $r1,$r1,11
484 or $r0,$r0,$r1 # r0 = (r1,r2) >> 22
485 ashlcc 2,11
486 sticky $r0
487 # Round to nearest
488 round $r0, $r1, $r2
489 lshr $r6,$r0,24
490 add $r3,$r3,$r6
491
492 sub $r6,$r3,1
493 cmp $r6,254
494 jmpc b,no_special_ret_mul
495 jmp special_ret_mul
496 #endif
497
498 ##########################################################################
499 ##########################################################################
500 ## division
501
502 ## See http://perso.ens-lyon.fr/gilles.villard/BIBLIOGRAPHIE/PDF/arith19.pdf
503 ## for implementation details
504
505
506 dc_1: .long 0xffffe7d7
507 dc_2: .long 0xffffffe8
508 dc_3: .long 0xffbad86f
509 dc_4: .long 0xfffbece7
510 dc_5: .long 0xf3672b51
511 dc_6: .long 0xfd9d3a3e
512 dc_7: .long 0x9a3c4390
513 dc_8: .long 0xd4d2ce9b
514 dc_9: .long 0x1bba92b3
515 dc_10: .long 0x525a1a8b
516 dc_11: .long 0x0452b1bf
517 dc_12: .long 0xFFFFFFC0
518 spec_val_test: .long 0x7F7FFFFF
519
520
521
522 #ifdef L_divsf3
523 .global __divsf3
524 __divsf3:
525 push $r13
526 # x in $r0, y in $r1, result z in $r0 --||| 73 instructions +/- |||-
527 bextu $r10,$r0,(8<<5)|23 # ex in r2
528 bextu $r11,$r1,(8<<5)|23 # ey in r3
529 lpm $r6, m_mask
530 and $r2, $r0, $r6 # mx
531 and $r3, $r1, $r6 # my
532 cmp $r2,$r3
533 bextu $r2,$r30,(1<<5)|4 # c = Tx >= T;
534 ashl $r3,$r3,9 # T = X << 9;
535 lpm $r13, sign_mask
536 ashl $r4,$r0,8 # X8 = X << 8;
537 or $r4,$r4,$r13 # Mx = X8 | 0x80000000;
538 lshr $r5,$r4,$r2 # S = Mx >> c;
539 # calc D
540 sub $r2, $r11, $r2
541 add $r12, $r10, 125
542 sub $r2, $r12, $r2 # int D = (Ex + 125) - (Ey - c);
543 # calc result sign
544 xor $r12,$r0,$r1
545 and $r12,$r12,$r13 # Sr = ( X ˆ Y ) & 0x80000000;
546 # check early exit
547 cmp $r10, 0
548 jmpc nz, no_early_ret_dev
549 cmp $r11, 0
550 jmpc z, no_early_ret_dev
551 cmp $r11, 255
552 jmpc z, no_early_ret_dev
553 move $r0, $r12
554 pop $r13
555 return
556 no_early_ret_dev:
557 # setup to test for special values
558 sub $r8,$r10,1
559 and $r8,$r8,0xFE
560 sub $r9,$r11,1
561 and $r9,$r9,0xFE
562 # test for special values
563 cmp $r8, $r9
564 jmpc gte, absXm1_gte_absYm1
565 move $r8, $r9
566 absXm1_gte_absYm1:
567 cmp $r8, 0xFE
568 jmpc nz, no_spec_ret_div
569 cmp $r10, 0xFF
570 jmpc nz, ex_not_FF_div
571 lpm $r6, m_mask
572 and $r2, $r0, $r6 # mx
573 cmp $r2, 0
574 jmpc nz, ret_nan_div
575 cmp $r11, 0xFF
576 jmpc z, ret_nan_div
577 jmp ret_inf_div
578 ex_not_FF_div:
579 cmp $r11, 0xFF
580 jmpc nz, ey_not_FF_div
581 ashl $r13, $r1, 9
582 cmp $r13, 0
583 jmpc nz, ret_nan_div
584 move $r0, $r12
585 pop $r13
586 return
587 ey_not_FF_div:
588 or $r10, $r10, $r11
589 cmp $r10, 0
590 jmpc z, ret_nan_div
591 ret_inf_div:
592 lpm $r6, inf
593 move $r0, $r6
594 or $r0, $r0, $r12
595 pop $r13
596 return
597 ret_nan_div:
598 lpm $r0, nan
599 pop $r13
600 return
601
602 no_spec_ret_div:
603 # check for overflow
604 ldk $r6, 0xFE
605 cmp $r2, $r6
606 jmpc lt, no_overflow_div
607 lpm $r6, inf
608 or $r0, $r12, $r6
609 pop $r13
610 return
611 no_overflow_div:
612 # check for underflow
613 cmp $r2, 0
614 jmpc ns, no_underflow_div
615 xnor $r6, $r6, $r6 # -1
616 cmp $r2, $r6
617 jmpc nz, ret_sr_div
618 ldk $r7, 0xFF
619 xor $r6, $r6, $r7 # 0xFF ^ -1 = 0xFFFFFF00
620 cmp $r4, $r6
621 jmpc nz, ret_sr_div
622 lpm $r6, sign_mask
623 cmp $r4, $r6
624 jmpc nz, ret_sr_div
625 lshr $r0, $r6, 8
626 or $r0, $r0, $r12
627 pop $r13
628 return
629 ret_sr_div:
630 move $r0, $r12
631 pop $r13
632 return
633 no_underflow_div:
634 lpm $r6, dc_1
635 muluh $r7, $r3, $r6 # i0 = mul( T , 0xffffe7d7 );
636 lpm $r6, dc_2
637 sub $r7, $r6, $r7 # i1 = 0xffffffe8 - i0;
638 muluh $r7, $r5, $r7 # i2 = mul( S , i1 );
639 add $r7, $r7, 0x20 # i3 = 0x00000020 + i2;
640 muluh $r8, $r3, $r3 # i4 = mul( T , T );
641 muluh $r9, $r5, $r8 # i5 = mul( S , i4 );
642 lpm $r6, dc_3
643 muluh $r10, $r3, $r6 # i6 = mul( T , 0xffbad86f );
644 lpm $r6, dc_4
645 sub $r10, $r6, $r10 # i7 = 0xfffbece7 - i6;
646 muluh $r10, $r9, $r10 # i8 = mul( i5 , i7 );
647 add $r7, $r7, $r10 # i9 = i3 + i8;
648 muluh $r9, $r8, $r9 # i10 = mul( i4 , i5 );
649 lpm $r6, dc_5
650 muluh $r10, $r3, $r6 # i11 = mul( T , 0xf3672b51 );
651 lpm $r6, dc_6
652 sub $r10, $r6, $r10 # i12 = 0xfd9d3a3e - i11;
653 lpm $r6, dc_7
654 muluh $r11, $r3, $r6 # i13 = mul( T , 0x9a3c4390 );
655 lpm $r6, dc_8
656 sub $r11, $r6, $r11 # i14 = 0xd4d2ce9b - i13
657 muluh $r11, $r8, $r11 # i15 = mul( i4 , i14 );
658 add $r10, $r10, $r11 # i16 = i12 + i15;
659 muluh $r10, $r9, $r10 # i17 = mul( i10 , i16 )
660 add $r7, $r7, $r10 # i18 = i9 + i17;
661 muluh $r10, $r8, $r8 # i19 = mul( i4 , i4 );
662 lpm $r6, dc_9
663 muluh $r11, $r3, $r6 # i20 = mul( T , 0x1bba92b3 );
664 lpm $r6, dc_10
665 sub $r11, $r6, $r11 # i21 = 0x525a1a8b - i20;
666 lpm $r6, dc_11
667 muluh $r8, $r8, $r6 # i22 = mul( i4 , 0x0452b1bf );
668 add $r8, $r11, $r8 # i23 = i21 + i22;
669 muluh $r8, $r10, $r8 # i24 = mul( i19 , i23 );
670 muluh $r8, $r9, $r8 # i25 = mul( i10 , i24 );
671 add $r3, $r7, $r8 # V = i18 + i25;
672 # W = V & 0xFFFFFFC0;
673 lpm $r6, dc_12
674 and $r3, $r3, $r6 # W
675 # round and pack final values
676 ashl $r0, $r2, 23 # pack D
677 or $r0, $r0, $r12 # pack Sr
678 ashl $r12, $r1, 8
679 or $r12, $r12, $r13 # My
680 muluh $r10, $r3, $r12
681 lshr $r11, $r5, 1
682 cmp $r10, $r11
683 jmpc gte, div_ret_1
684 add $r3, $r3, 0x40
685 div_ret_1:
686 lshr $r3, $r3, 7
687 add $r0, $r0, $r3
688 pop $r13
689 return
690 #endif
691
692 ##########################################################################
693 ##########################################################################
694 ## Negate
695
696 #ifdef L_negsf
697 .global __negsf
698 __negsf:
699 lpm $r1, sign_mask
700 xor $r0, $r0, $r1
701 return
702 #endif
703
704 ##########################################################################
705 ##########################################################################
706 ## float to int & unsigned int
707
708 #ifdef L_fixsfsi
709 .global __fixsfsi
710 __fixsfsi: # 20 instructions
711 bextu $r1,$r0,(8<<5)|23 # e in r1
712 lshr $r2,$r0,31 # s in r2
713 lpm $r3, m_mask
714 and $r0,$r0,$r3 # m in r0
715 # test nan
716 cmp $r1,0xFF
717 jmpc nz, int_not_nan
718 cmp $r0,0
719 jmpc z, int_not_nan
720 ldk $r0,0
721 return
722 int_not_nan:
723 # test edges
724 cmp $r1, 127
725 jmpc gte, int_not_zero # lower limit
726 ldk $r0,0
727 return
728 int_not_zero:
729 cmp $r1, 158
730 jmpc lt, int_not_max # upper limit
731 lpm $r0, nan
732 cmp $r2, 0
733 jmpc z, int_positive
734 xnor $r0, $r0, 0
735 return
736 int_not_max:
737 lpm $r3, smallest_norm
738 or $r0, $r0, $r3 # set implicit bit
739 sub $r1, $r1, 150
740 cmp $r1, 0
741 jmpc s, shift_right
742 ashl $r0, $r0, $r1
743 jmp set_int_sign
744 shift_right:
745 xnor $r1, $r1, 0
746 add $r1, $r1, 1
747 lshr $r0, $r0, $r1
748 set_int_sign:
749 cmp $r2, 0
750 jmpc z, int_positive
751 xnor $r0, $r0, 0
752 add $r0, $r0, 1
753 int_positive:
754 return
755 #endif
756
757 #ifdef L_fixunssfsi
758 .global __fixunssfsi
759 __fixunssfsi: # 19 instructions
760 lshr $r2, $r0, 31 # s in r2
761 cmp $r2, 0
762 jmpc z, uint_not_neg
763 ldk $r0, 0
764 return
765 uint_not_neg:
766 bextu $r1, $r0, (8<<5)|23 # e in r1
767 sub $r1, $r1, 127
768 lpm $r3, m_mask
769 and $r0, $r0, $r3 # m in r0
770 # test nan
771 cmp $r1, 0xFF
772 jmpc nz, uint_not_nan
773 cmp $r0, 0
774 jmpc z, uint_not_nan
775 ldk $r0, 0
776 return
777 uint_not_nan:
778 # test edges
779 cmp $r1, 0
780 jmpc ns, uint_not_zero # lower limit
781 ldk $r0, 0
782 return
783 uint_not_zero:
784 lpm $r3, smallest_norm
785 or $r0, $r0, $r3 # set implicit bit
786 cmp $r1, 23
787 jmpc lt, shift_uint_right
788 sub $r1, $r1, 23
789 ashl $r0, $r0, $r1
790 return
791 shift_uint_right:
792 ldk $r3, 23
793 sub $r1, $r3, $r1
794 lshr $r0, $r0, $r1
795 return
796 #endif
797
798 ##########################################################################
799 ##########################################################################
800 ## int & unsigned int to float
801
802
803 .macro i2f x, s1, s2, s3, lbl
804 move \s1, \x
805 nlz \s1, \s2
806 cmp \s1, 8
807 jmpc s, float_round\lbl
808 sub \s2, \s1, 8
809 ashl \x, \x, \s2
810 jmp float_no_round\lbl
811 float_round\lbl:
812 cmp \s1, 6
813 jmpc s, float_shift_right\lbl
814 sub \s2, \s1, 6
815 ashl \x, \x, \s2
816 jmp float_round_and_pack\lbl
817 float_shift_right\lbl:
818 ldk \s2, 6
819 sub \s2, \s2, \s1
820 xnor \s3, \s3 ,\s3 # 0xFFFFFFFF
821 ashl \s3, \s3 ,\s2 # create inverse of mask for test of S bit value in discarded my
822 xnor \s3, \s3 ,0 # NOT
823 tst \x, \s3 # determine value of sticky bit
824 lshr \x, \x, \s2
825 jmpc z,float_round_and_pack\lbl
826 or \x, \x, 1 # set the sticky bit to 1
827 float_round_and_pack\lbl:
828 bextu \s2, \x, (1<<5)|2 # extract low bit of m
829 or \x, \x, \s2 # or p into r
830 add \x, \x, 1
831 lshr \x, \x, 2
832 btst \x, (1<<5)|24 # test for carry from round
833 jmpc z, float_no_round\lbl
834 sub \s1, \s1, 1 # inc e for carry (actually dec nlz)
835 lshr \x, \x, 1
836 float_no_round\lbl:
837 ldk \s2, 158
838 sub \s1, \s2, \s1
839 # Pack e
840 ldl \s1, \s1, (8<<5)|23
841 bins \x, \x, \s1
842 .endm
843
844
845 #ifdef L_floatsisf
846 .global __floatsisf
847 __floatsisf: # 32 instructions
848 cmp $r0, 0
849 jmpc nz, float_not_zero
850 return
851 float_not_zero:
852 ashr $r1, $r0, 31 # s in r1
853 xor $r0, $r0, $r1 # cond neg
854 sub $r0, $r0, $r1
855 i2f $r0, $r2, $r3, $r4, 1
856 ldl $r1, $r1, (1<<5)|31
857 bins $r0, $r0, $r1
858 return
859 #endif
860
861 #ifdef L_floatunsisf
862 .global __floatunsisf
863 __floatunsisf: # 26 instructions
864 cmp $r0, 0
865 jmpc nz, float_not_zero2
866 return
867 float_not_zero2:
868 i2f $r0, $r1, $r2, $r3, 2
869 return
870 #endif
871
872 ##########################################################################
873 ##########################################################################
874 ## float compare
875
876
877 __cmpsf2_:
878 # calc abs vals
879 lpm $r3, nan # also abs mask
880 and $r2, $r0, $r3
881 and $r3, $r1, $r3
882 # test if either abs is nan
883 lpm $r4, inf
884 cmp $r2, $r4
885 jmpc gt, cmp_is_gt
886 cmp $r3, $r4
887 jmpc gt, cmp_is_gt
888 # test if both are 0
889 or $r2, $r2, $r3
890 cmp $r2, 0
891 jmpc z, cmp_is_eq
892 # test if eq
893 cmp $r0, $r1
894 jmpc z, cmp_is_eq
895 # -- if either is pos
896 and $r2, $r0, $r1
897 cmp $r2, 0
898 jmpc s, cmp_both_neg
899 cmp $r0, $r1
900 jmpc gt, cmp_is_gt
901 # r0 < r1
902 lpm $r0, high_uint
903 return
904 cmp_both_neg:
905 cmp $r0, $r1
906 jmpc lt, cmp_is_gt
907 # r0 < r1
908 lpm $r0, high_uint
909 return
910 cmp_is_gt:
911 ldk $r0, 1
912 return
913 cmp_is_eq:
914 ldk $r0, 0
915 return
916
917
918
919