libgcc/config/arc/ieee-754/addsf3.S

   1 /* Copyright (C) 2008-2024 Free Software Foundation, Inc.
   2    Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
   3                 on behalf of Synopsys Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 Under Section 7 of GPL version 3, you are granted additional
  18 permissions described in the GCC Runtime Library Exception, version
  19 3.1, as published by the Free Software Foundation.
  20
  21 You should have received a copy of the GNU General Public License and
  22 a copy of the GCC Runtime Library Exception along with this program;
  23 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  24 <http://www.gnu.org/licenses/>.  */
  25
  26 #include "arc-ieee-754.h"
  27 #if 0 /* DEBUG */
  28         .global __addsf3
  29         FUNC(__addsf3)
  30         .balign 4
  31 __addsf3:
  32         push_s blink
  33         push_s r1
  34         bl.d __addsf3_c
  35         push_s r0
  36         ld_s r1,[sp,4]
  37         st_s r0,[sp,4]
  38         bl.d __addsf3_asm
  39         pop_s r0
  40         pop_s r1
  41         pop_s blink
  42         cmp r0,r1
  43         jeq_s [blink]
  44         bl abort
  45         ENDFUNC(__addsf3)
  46         .global __subsf3
  47         FUNC(__subsf3)
  48         .balign 4
  49 __subsf3:
  50         push_s blink
  51         push_s r1
  52         bl.d __subsf3_c
  53         push_s r0
  54         ld_s r1,[sp,4]
  55         st_s r0,[sp,4]
  56         bl.d __subsf3_asm
  57         pop_s r0
  58         pop_s r1
  59         pop_s blink
  60         cmp r0,r1
  61         jeq_s [blink]
  62         bl abort
  63         ENDFUNC(__subsf3)
  64 #define __addsf3 __addsf3_asm
  65 #define __subsf3 __subsf3_asm
  66 #endif /* DEBUG */
  67 /* N.B. This is optimized for ARC700.
  68   ARC600 has very different scheduling / instruction selection criteria.  */
  69
  70 /* inputs: r0, r1
  71    output: r0
  72    clobber: r1-r10, r12, flags  */
  73
  74         .balign 4
  75         .global __addsf3
  76         .global __subsf3
  77         FUNC(__addsf3)
  78         FUNC(__subsf3)
  79         .long 0x7f800000 ; exponent mask
  80 __subsf3:
  81         bxor_l r1,r1,31
  82 __addsf3:
  83         ld r9,[pcl,-8]
  84         bmsk r4,r0,30
  85         xor r10,r0,r1
  86         and r6,r1,r9
  87         sub.f r12,r4,r6
  88         asr_s r12,r12,23
  89         blo .Ldbl1_gt
  90         brhs r4,r9,.Linf_nan
  91         brne r12,0,.Lsmall_shift
  92         brge r10,0,.Ladd_same_exp ; r12 == 0
  93 /* After subtracting, we need to normalize; when shifting to place the
  94   leading 1 into position for the implicit 1 and adding that to DBL0,
  95   we increment the exponent.  Thus, we have to subtract one more than
  96   the shift count from the exponent beforehand.  Iff the exponent drops thus
  97   below zero (before adding in the fraction with the leading one), we have
  98   generated a denormal number.  Denormal handling is basicallly reducing the
  99   shift count so that we produce a zero exponent instead; FWIW, this way
 100   the shift count can become zero (if we started out with exponent 1).
 101   On the plus side, we don't need to check for denorm input, the result
 102   of subtracing these looks just the same as denormals generated during
 103   subtraction.  */
 104         bmsk r7,r1,30
 105         breq    r4,r7,.Lret0
 106         sub.f r5,r4,r7
 107         lsr r12,r4,23
 108         neg.cs r5,r5
 109         norm r3,r5
 110         bmsk r2,r0,22
 111         sub_s r3,r3,6
 112         min r12,r12,r3
 113         bic r1,r0,r2
 114         sub_s r3,r12,1
 115         asl_s r12,r12,23
 116         asl r2,r5,r3
 117         sub_s r1,r1,r12
 118         add_s r0,r1,r2
 119         j_s.d [blink]
 120         bxor.cs r0,r0,31
 121         .balign 4
 122 .Linf_nan:
 123         ; If both inputs are inf, but with different signs, the result is NaN.
 124         asr r12,r10,31
 125         or_s r1,r1,r12
 126         j_s.d [blink]
 127         or.eq r0,r0,r1
 128         .balign 4
 129 .Ladd_same_exp:
 130         /* This is a special case because we can't test for need to shift
 131            down by checking if bit 23 of DBL0 changes.  OTOH, here we know
 132            that we always need to shift down.  */
 133         ; adding the two floating point numbers together makes the sign
 134         ; cancel out and apear as carry; the exponent is doubled, and the
 135         ; fraction also in need of shifting left by one. The two implicit
 136         ; ones of the sources make an implicit 1 of the result, again
 137         ; non-existent in a place shifted by one.
 138         add.f   r0,r0,r1
 139         btst_s  r0,1
 140         breq    r6,0,.Ldenorm_add
 141         add.ne  r0,r0,1 ; round to even.
 142         rrc     r0,r0
 143         bmsk    r1,r9,23
 144         add     r0,r0,r1 ; increment exponent
 145         bic.f   0,r9,r0; check for overflow -> infinity.
 146         jne_l   [blink]
 147         mov_s   r0,r9
 148         j_s.d   [blink]
 149         bset.cs r0,r0,31
 150
 151 .Ldenorm_add:
 152         j_s.d [blink]
 153         add r0,r4,r1
 154
 155 .Lret_dbl0:
 156         j_s [blink]
 157
 158         .balign 4
 159 .Lsmall_shift:
 160         brhi r12,25,.Lret_dbl0
 161         breq.d r6,0,.Ldenorm_small_shift
 162         bmsk_s r1,r1,22
 163         bset_s r1,r1,23
 164 .Lfixed_denorm_small_shift:
 165         neg r8,r12
 166         asl r5,r1,r8
 167         brge.d r10,0,.Ladd
 168         lsr_l r1,r1,r12
 169 /* subtract, abs(DBL0) > abs(DBL1) */
 170 /* DBL0: original values
 171    DBL1: fraction with explicit leading 1, shifted into place
 172    r4:  orig. DBL0 & 0x7fffffff
 173    r6:  orig. DBL1 & 0x7f800000
 174    r9:  0x7f800000
 175    r10: orig. DBL0H ^ DBL1H
 176    r5 : guard bits */
 177         .balign 4
 178 .Lsub:
 179         neg.f r12,r5
 180         bmsk r3,r0,22
 181         bset r5,r3,23
 182         sbc.f r4,r5,r1
 183         beq.d .Large_cancel_sub
 184         bic r7,r0,r3
 185         norm r3,r4
 186         bmsk r6,r7,30
 187 .Lsub_done:
 188         sub_s r3,r3,6
 189         breq r3,1,.Lsub_done_noshift
 190         asl r5,r3,23
 191         sub_l r3,r3,1
 192         brlo r6,r5,.Ldenorm_sub
 193         sub r0,r7,r5
 194         neg_s r1,r3
 195         lsr.f r2,r12,r1
 196         asl_s r12,r12,r3
 197         btst_s  r2,0
 198         bmsk.eq.f r12,r12,30
 199         asl r5,r4,r3
 200         add_s r0,r0,r2
 201         adc.ne r0,r0,0
 202         j_s.d [blink]
 203         add_l r0,r0,r5
 204
 205 .Lret0:
 206         j_s.d   [blink]
 207         mov_l   r0,0
 208
 209         .balign 4
 210 .Ldenorm_small_shift:
 211         brne.d  r12,1,.Lfixed_denorm_small_shift
 212         sub_s   r12,r12,1
 213         brlt.d  r10,0,.Lsub
 214         mov_s   r5,r12 ; zero r5, and align following code
 215 .Ladd: ; Both bit 23 of DBL1 and bit 0 of r5 are clear.
 216         bmsk    r2,r0,22
 217         add_s   r2,r2,r1
 218         bbit0.d r2,23,.Lno_shiftdown
 219         add_s   r0,r0,r1
 220         bic.f   0,r9,r0; check for overflow -> infinity; eq : infinity
 221         bmsk    r1,r2,22
 222         lsr.ne.f r2,r2,2; cc: even ; hi: might round down
 223         lsr.ne  r1,r1,1
 224         rcmp.hi r5,1; hi : round down
 225         bclr.hi r0,r0,0
 226         j_l.d   [blink]
 227         sub_s   r0,r0,r1
 228
 229 /* r4: DBL0H & 0x7fffffff
 230    r6: DBL1H & 0x7f800000
 231    r9: 0x7f800000
 232    r10: sign difference
 233    r12: shift count (negative) */
 234         .balign 4
 235 .Ldbl1_gt:
 236         brhs r6,r9,.Lret_dbl1 ; inf or NaN
 237         neg r8,r12
 238         brhi r8,25,.Lret_dbl1
 239 .Lsmall_shift_dbl0:
 240         breq.d r6,0,.Ldenorm_small_shift_dbl0
 241         bmsk_s r0,r0,22
 242         bset_s r0,r0,23
 243 .Lfixed_denorm_small_shift_dbl0:
 244         asl r5,r0,r12
 245         brge.d r10,0,.Ladd_dbl1_gt
 246         lsr r0,r0,r8
 247 /* subtract, abs(DBL0) < abs(DBL1) */
 248 /* DBL0: fraction with explicit leading 1, shifted into place
 249    DBL1: original value
 250    r6:  orig. DBL1 & 0x7f800000
 251    r9:  0x7f800000
 252    r5: guard bits */
 253         .balign 4
 254 .Lrsub:
 255         neg.f r12,r5
 256         bmsk r5,r1,22
 257         bic r7,r1,r5
 258         bset r5,r5,23
 259         sbc.f r4,r5,r0
 260         bne.d .Lsub_done ; note: r6 is already set up.
 261         norm r3,r4
 262         /* Fall through */
 263
 264 /* r4:r12 : unnormalized result fraction
 265    r7: result sign and exponent         */
 266 /* When seeing large cancellation, only the topmost guard bit might be set.  */
 267         .balign 4
 268 .Large_cancel_sub:
 269         breq_s  r12,0,.Lret0
 270         sub     r0,r7,24<<23
 271         xor.f   0,r0,r7 ; test if exponent is negative
 272         tst.pl  r9,r0  ; test if exponent is zero
 273         jpnz    [blink] ; return if non-denormal result
 274         bmsk    r6,r7,30
 275         lsr     r3,r6,23
 276         xor     r0,r6,r7
 277         sub_s   r3,r3,24-22
 278         j_s.d   [blink]
 279         bset    r0,r0,r3
 280
 281         ; If a denorm is produced, we have an exact result -
 282         ; no need for rounding.
 283         .balign 4
 284 .Ldenorm_sub:
 285         sub r3,r6,1
 286         lsr.f r3,r3,23
 287         xor r0,r6,r7
 288         neg_s r1,r3
 289         asl.ne r4,r4,r3
 290         lsr_s r12,r12,r1
 291         add_s r0,r0,r4
 292         j_s.d [blink]
 293         add.ne r0,r0,r12
 294
 295         .balign 4
 296 .Lsub_done_noshift:
 297         add.f 0,r12,r12
 298         btst.eq r4,0
 299         bclr r4,r4,23
 300         add r0,r7,r4
 301         j_s.d [blink]
 302         adc.ne r0,r0,0
 303
 304         .balign 4
 305 .Lno_shiftdown:
 306         add.f 0,r5,r5
 307         btst.eq r0,0
 308         cmp.eq r5,r5
 309         j_s.d [blink]
 310         add.cs r0,r0,1
 311
 312 .Lret_dbl1:
 313         j_s.d [blink]
 314         mov_l r0,r1
 315         .balign 4
 316 .Ldenorm_small_shift_dbl0:
 317         sub.f r8,r8,1
 318         bne.d .Lfixed_denorm_small_shift_dbl0
 319         add_s r12,r12,1
 320         brlt.d r10,0,.Lrsub
 321         mov r5,0
 322 .Ladd_dbl1_gt: ; both bit 23 of DBL0 and bit 0 of r5 are clear.
 323         bmsk    r2,r1,22
 324         add_s   r2,r2,r0
 325         bbit0.d r2,23,.Lno_shiftdown_dbl1_gt
 326         add_s   r0,r1,r0
 327         bic.f   0,r9,r0; check for overflow -> infinity; eq : infinity
 328         bmsk    r1,r2,22
 329         lsr.ne.f r2,r2,2; cc: even ; hi: might round down
 330         lsr.ne  r1,r1,1
 331         rcmp.hi r5,1; hi : round down
 332         bclr.hi r0,r0,0
 333         j_l.d   [blink]
 334         sub_s   r0,r0,r1
 335
 336         .balign 4
 337 .Lno_shiftdown_dbl1_gt:
 338         add.f   0,r5,r5
 339         btst.eq r0,0
 340         cmp.eq  r5,r5
 341         j_s.d   [blink]
 342         add.cs  r0,r0,1
 343         ENDFUNC(__addsf3)
 344         ENDFUNC(__subsf3)