1 /* Copyright (C) 2008-2024 Free Software Foundation, Inc.
2 Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
3 on behalf of Synopsys Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 #include "arc-ieee-754.h"
74 #define __adddf3 __adddf3_asm
75 #define __subdf3 __subdf3_asm
77 /* N.B. This is optimized for ARC700.
78 ARC600 has very different scheduling / instruction selection criteria. */
80 /* inputs: DBL0, DBL1 (r0-r3)
82 clobber: r2-r10, r12, flags
83 All NaN highword bits must be 1. NaN low word is random. */
88 .long 0x7ff00000 ; exponent mask
102 brhs r12,32,.Large_shift
103 brne r12,0,.Lsmall_shift
104 brge r10,0,.Ladd_same_exp ; r12 == 0
106 /* After subtracting, we need to normalize; when shifting to place the
107 leading 1 into position for the implicit 1 and adding that to DBL0H,
108 we increment the exponent. Thus, we have to subtract one more than
109 the shift count from the exponent beforehand. Iff the exponent drops thus
110 below zero (before adding in the fraction with the leading one), we have
111 generated a denormal number. Denormal handling is basicallly reducing the
112 shift count so that we produce a zero exponent instead; however, this way
113 the shift count can become zero (if we started out with exponent 1).
114 Therefore, a simple min operation is not good enough, since we don't
115 want to handle a zero normalizing shift in the main path.
116 On the plus side, we don't need to check for denorm input, the result
117 of subtracing these looks just the same as denormals generated during
123 sub.f DBL0L,DBL0L,DBL1L
129 b.d .Lsub_done_same_exp
134 ; If both inputs are inf, but with different signs, the result is NaN.
138 or.eq DBL0H,DBL0H,DBL1H
142 rsub.f DBL0L,DBL0L,DBL1L
144 bic_s DBL1H,DBL1H,r12
160 asl_s DBL0L,DBL0L,DBL1L
163 add_l DBL0H,DBL0H,r12
166 /* This is a special case because we can't test for need to shift
167 down by checking if bit 20 of DBL0H changes. OTOH, here we know
168 that we always need to shift down. */
169 ; The implicit 1 of DBL0 is not shifted together with the
170 ; fraction, thus effectively doubled, compensating for not setting
172 add_s r12,DBL0L,DBL1L
173 lsr.f 0,r12,2 ; round to even
174 breq r6,0,.Ldenorm_add
175 adc.f DBL0L,DBL0L,DBL1L
177 sub1 r7,r7,r9 ; boost exponent by 2/2
179 asr.f r7,r7 ; DBL1.fraction/2 - DBL0.fraction/2 ; exp++
180 add.cs.f DBL0L,DBL0L,0x80000000
181 add_l DBL0H,DBL0H,r7 ; DBL0.implicit1 not shifted for DBL1.implicit1
183 bic.f 0,r9,DBL0H ; check for overflow -> infinity.
185 and DBL0H,DBL0H,0xfff00000
190 brhs r12,55,.Lret_dbl0
191 bmsk_s DBL1H,DBL1H,19
192 brne r6,0,.Lno_denorm_large_shift
193 brhi.d r12,33,.Lfixed_denorm_large_shift
195 breq r12,31, .Lfixed_denorm_small_shift
212 breq.d r6,0,.Ldenorm_small_shift
213 bmsk_s DBL1H,DBL1H,19
214 bset_s DBL1H,DBL1H,20
215 .Lfixed_denorm_small_shift:
218 lsr_l DBL1H,DBL1H,r12
223 /* subtract, abs(DBL0) > abs(DBL1) */
224 /* DBL0H, DBL0L: original values
225 DBL1H, DBL1L: fraction with explicit leading 1, shifted into place
226 r4: orig. DBL0H & 0x7fffffff
227 r6: orig. DBL1H & 0x7ff00000
229 r10: orig. DBL0H ^ DBL1H
236 sbc.f DBL0L,DBL0L,DBL1L
240 beq_l .Large_cancel_sub
245 breq DBL1L,1,.Lsub_done_noshift
248 brlo r6,r5,.Ldenorm_sub
261 add_s DBL0H,DBL0H,r12
266 .Lno_denorm_large_shift:
267 breq.d r12,32,.Lshift32
268 bset_l DBL1H,DBL1H,20
269 .Lfixed_denorm_large_shift:
282 ; If a denorm is produced without shifting, we have an exact result -
283 ; no need for rounding.
288 brne.d DBL1L,1,.Lpast_denorm_sub
294 add.cs.f DBL0L,DBL0L,1
300 .Ldenorm_small_shift:
301 brne.d r12,1,.Lfixed_denorm_small_shift
304 .Ladd: ; bit 20 of DBL1H is clear and bit 0 of r12 does not matter
305 add.f DBL0L,DBL0L,DBL1L
306 add_s DBL1H,DBL1H,DBL0H
308 xor_l DBL0H,DBL0H,DBL1H
309 bbit0 DBL0H,20,.Lno_shiftdown
313 sbc DBL0H,DBL1H,DBL0H
317 add.cs.f DBL0L,DBL0L,1
318 bic.f 0,r9,DBL0H ; check for generating infinity with possible ...
319 jne.d [blink] ; ... non-zero fraction
324 bic_s DBL0H,DBL0H,DBL1H
330 add.cs.f DBL0L,DBL0L,1
337 xor_s DBL0H,DBL0H,DBL1H
345 ;DBL0L: mantissa DBL1H: sign & exponent
353 brhs.d DBL0H,r12,.Lpast_denorm_large_cancel
357 xor_s DBL0H,DBL0H,DBL1H
360 .Lpast_denorm_large_cancel:
363 asl_s DBL0L,DBL0L,DBL1L
372 /* r4:DBL0L:r12 : unnormalized result fraction
373 DBL1H: result sign and exponent */
374 /* When seeing large cancellation, only the topmost guard bit might be set. */
392 1: brlo DBL0H,r5,.Ldenorm_large_cancel_sub
394 .Lpast_denorm_large_cancel_sub:
398 asl_s DBL0L,DBL0L,DBL1L
399 add.ge DBL0H,DBL0H,r7
400 add_s DBL0L,DBL0L,r12
401 add.lt DBL0H,DBL0H,DBL0L
406 .Ldenorm_large_cancel_sub:
408 xor_s DBL0H,DBL0H,DBL1H
409 brgt.d r5,1,.Lpast_denorm_large_cancel_sub
411 j_l [blink] ; denorm, no shift -> no rounding needed.
413 /* r4: DBL0H & 0x7fffffff
414 r6: DBL1H & 0x7ff00000
417 r12: shift count (negative) */
420 brhs r6,r9,.Lret_dbl1 ; inf or NaN
422 brhs r8,32,.Large_shift_dbl0
424 breq.d r6,0,.Ldenorm_small_shift_dbl0
425 bmsk_s DBL0H,DBL0H,19
426 bset_s DBL0H,DBL0H,20
427 .Lfixed_denorm_small_shift_dbl0:
432 brge.d r10,0,.Ladd_dbl1_gt
434 /* subtract, abs(DBL0) < abs(DBL1) */
435 /* DBL0H, DBL0L: fraction with explicit leading 1, shifted into place
436 DBL1H, DBL1L: original values
437 r6: orig. DBL1H & 0x7ff00000
445 sbc.f DBL0L,DBL1L,DBL0L
449 beq_l .Large_cancel_sub
451 b_l .Lsub_done ; note: r6 is already set up.
458 .Ldenorm_small_shift_dbl0:
460 bne.d .Lfixed_denorm_small_shift_dbl0
463 .Ladd_dbl1_gt: ; bit 20 of DBL0H is clear and bit 0 of r12 does not matter
464 add.f DBL0L,DBL0L,DBL1L
465 add_s DBL0H,DBL0H,DBL1H
467 xor DBL1H,DBL0H,DBL1H
468 bbit0 DBL1H,20,.Lno_shiftdown_dbl1_gt
472 sbc DBL0H,DBL0H,DBL1H
476 add.cs.f DBL0L,DBL0L,1
477 bic.f 0,r9,DBL0H ; check for generating infinity with possible ...
478 jne.d [blink] ; ... non-zero fraction
483 bic_s DBL0H,DBL0H,DBL1H
484 .Lno_shiftdown_dbl1_gt:
488 add.cs.f DBL0L,DBL0L,1
494 brhs r8,55,.Lret_dbl1
495 bmsk_s DBL0H,DBL0H,19
496 brne r6,0,.Lno_denorm_large_shift_dbl0
498 brne.d r8,33,.Lfixed_denorm_large_shift_dbl0
500 bset_s DBL0H,DBL0H,20
509 .Lno_denorm_large_shift_dbl0:
510 breq.d r8,32,.Lshift32_dbl0
511 bset_l DBL0H,DBL0H,20
512 .Lfixed_denorm_large_shift_dbl0: