libgcc/config/arc/ieee-754/arc600-dsp/muldf3.S

   1 /* Copyright (C) 2008-2024 Free Software Foundation, Inc.
   2    Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
   3                 on behalf of Synopsys Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 Under Section 7 of GPL version 3, you are granted additional
  18 permissions described in the GCC Runtime Library Exception, version
  19 3.1, as published by the Free Software Foundation.
  20
  21 You should have received a copy of the GNU General Public License and
  22 a copy of the GCC Runtime Library Exception along with this program;
  23 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  24 <http://www.gnu.org/licenses/>.  */
  25
  26 #include "../arc-ieee-754.h"
  27
  28 #if 0 /* DEBUG */
  29         .global __muldf3
  30         .balign 4
  31 __muldf3:
  32         push_s blink
  33         push_s r2
  34         push_s r3
  35         push_s r0
  36         bl.d __muldf3_c
  37         push_s r1
  38         ld_s r2,[sp,12]
  39         ld_s r3,[sp,8]
  40         st_s r0,[sp,12]
  41         st_s r1,[sp,8]
  42         pop_s r1
  43         bl.d __muldf3_asm
  44         pop_s r0
  45         pop_s r3
  46         pop_s r2
  47         pop_s blink
  48         cmp r0,r2
  49         cmp.eq r1,r3
  50         jeq_s [blink]
  51         b abort
  52 #define __muldf3 __muldf3_asm
  53 #endif /* DEBUG */
  54
  55 __muldf3_support: /* This label makes debugger output saner.  */
  56         .balign 4
  57         FUNC(__muldf3)
  58 .Ldenorm_2:
  59         breq.d DBL1L,0,.Lret0_2 ; 0 input -> 0 output
  60         norm.f r12,DBL1L
  61         mov.mi r12,21
  62         add.pl r12,r12,22
  63         neg r11,r12
  64         asl_s r12,r12,20
  65         lsr.f DBL1H,DBL1L,r11
  66         ror DBL1L,DBL1L,r11
  67         sub_s DBL0H,DBL0H,r12
  68         mov.eq DBL1H,DBL1L
  69         sub_l DBL1L,DBL1L,DBL1H
  70         /* Fall through.  */
  71         .global __muldf3
  72         .balign 4
  73 __muldf3:
  74         mululw 0,DBL0L,DBL1L
  75         machulw r4,DBL0L,DBL1L
  76         ld.as r9,[pcl,0x67] ; ((.L7ff00000-.+2)/4)]
  77         bmsk r6,DBL0H,19
  78         bset r6,r6,20
  79         mov r8,acc2
  80         mululw 0,r4,1
  81         and r11,DBL0H,r9
  82         breq.d r11,0,.Ldenorm_dbl0
  83         and r12,DBL1H,r9
  84         breq.d r12,0,.Ldenorm_dbl1
  85         maclw 0,r6,DBL1L
  86         machulw 0,r6,DBL1L
  87         breq.d r11,r9,.Linf_nan
  88         bmsk r10,DBL1H,19
  89         breq.d r12,r9,.Linf_nan
  90         bset r10,r10,20
  91         maclw 0,r10,DBL0L
  92         machulw r5,r10,DBL0L
  93         add_s r12,r12,r11 ; add exponents
  94         mov r4,acc2
  95         mululw 0,r5,1
  96         maclw 0,r6,r10
  97         machulw r7,r6,r10 ; fraction product in r7:acc2:r4:r8
  98         tst r8,r8
  99         bclr r8,r9,30 ; 0x3ff00000
 100         bset.ne r4,r4,0 ; put least significant word into sticky bit
 101         bclr r6,r9,20 ; 0x7fe00000
 102         lsr.f r10,r7,9
 103         rsub.eq r8,r8,r9 ; 0x40000000
 104         sub r12,r12,r8 ; subtract bias + implicit 1
 105         brhs.d r12,r6,.Linf_denorm
 106         rsub r10,r10,12
 107 .Lshift_frac:
 108         neg r8,r10
 109         asl r6,r4,r10
 110         lsr DBL0L,r4,r8
 111         add.f 0,r6,r6
 112         btst.eq DBL0L,0
 113         cmp.eq r4,r4 ; round to nearest / round to even
 114         asl r4,acc2,r10
 115         lsr r5,acc2,r8
 116         adc.f DBL0L,DBL0L,r4
 117         xor.f 0,DBL0H,DBL1H
 118         asl r7,r7,r10
 119         add_s r12,r12,r5
 120         adc DBL0H,r12,r7
 121         j_s.d [blink]
 122         bset.mi DBL0H,DBL0H,31
 123
 124 /* N.B. This is optimized for ARC700.
 125   ARC600 has very different scheduling / instruction selection criteria.  */
 126
 127 /* If one number is denormal, subtract some from the exponent of the other
 128    one (if the other exponent is too small, return 0), and normalize the
 129    denormal.  Then re-run the computation.  */
 130 .Lret0_2:
 131         lsr_s DBL0H,DBL0H,31
 132         asl_s DBL0H,DBL0H,31
 133         j_s.d [blink]
 134         mov_s DBL0L,0
 135         .balign 4
 136 .Ldenorm_dbl0:
 137         mov_s r12,DBL0L
 138         mov_s DBL0L,DBL1L
 139         mov_s DBL1L,r12
 140         mov_s r12,DBL0H
 141         mov_s DBL0H,DBL1H
 142         mov_s DBL1H,r12
 143         and r11,DBL0H,r9
 144 .Ldenorm_dbl1:
 145         brhs r11,r9,.Linf_nan
 146         brhs 0x3ca00001,r11,.Lret0
 147         sub_s DBL0H,DBL0H,DBL1H
 148         bmsk.f DBL1H,DBL1H,30
 149         add_s DBL0H,DBL0H,DBL1H
 150         beq.d .Ldenorm_2
 151         norm r12,DBL1H
 152         sub_s r12,r12,10
 153         asl r5,r12,20
 154         asl_s DBL1H,DBL1H,r12
 155         sub DBL0H,DBL0H,r5
 156         neg r5,r12
 157         lsr r6,DBL1L,r5
 158         asl_s DBL1L,DBL1L,r12
 159         b.d __muldf3
 160         add_s DBL1H,DBL1H,r6
 161
 162 .Lret0: xor_s DBL0H,DBL0H,DBL1H
 163         bclr DBL1H,DBL0H,31
 164         xor_s DBL0H,DBL0H,DBL1H
 165         j_s.d [blink]
 166         mov_s DBL0L,0
 167
 168         .balign 4
 169 .Linf_nan:
 170         bclr r12,DBL1H,31
 171         xor_s DBL1H,DBL1H,DBL0H
 172         bclr_s DBL0H,DBL0H,31
 173         max r8,DBL0H,r12 ; either NaN -> NaN ; otherwise inf
 174         or.f 0,DBL0H,DBL0L
 175         mov_s DBL0L,0
 176         or.ne.f DBL1L,DBL1L,r12
 177         not_s DBL0H,DBL0L ; inf * 0 -> NaN
 178         mov.ne DBL0H,r8
 179         tst_s DBL1H,DBL1H
 180         j_s.d [blink]
 181         bset.mi DBL0H,DBL0H,31
 182
 183 /* We have checked for infinity / NaN input before, and transformed
 184    denormalized inputs into normalized inputs.  Thus, the worst case
 185    exponent overflows are:
 186        1 +     1 - 0x400 == 0xc02 : maximum underflow
 187    0x7fe + 0x7fe - 0x3ff == 0xbfd ; maximum overflow
 188    N.B. 0x7e and 0x7f are also values for overflow.
 189
 190    If (r12 <= -54), we have an underflow to zero.  */
 191         .balign 4
 192 .Linf_denorm:
 193         lsr r6,r12,28
 194         brlo.d r6,0xc,.Linf
 195         asr r6,r12,20
 196         add.f r10,r10,r6
 197         brgt.d r10,0,.Lshift_frac
 198         mov_s r12,0
 199         beq.d .Lround_frac
 200         add r10,r10,32
 201 .Lshift32_frac:
 202         tst r4,r4
 203         mov r4,acc2
 204         bset.ne r4,r4,1
 205         mululw 0,r7,1
 206         brge.d r10,1,.Lshift_frac
 207         mov r7,0
 208         breq.d r10,0,.Lround_frac
 209         add r10,r10,32
 210         brgt r10,21,.Lshift32_frac
 211         b_s .Lret0
 212
 213 .Lround_frac:
 214         add.f 0,r4,r4
 215         btst.eq acc2,0
 216         mov_s DBL0L,acc2
 217         mov_s DBL0H,r7
 218         adc.eq.f DBL0L,DBL0L,0
 219         j_s.d [blink]
 220         adc.eq DBL0H,DBL0H,0
 221
 222 .Linf:  mov_s DBL0L,0
 223         xor.f DBL1H,DBL1H,DBL0H
 224         mov_s DBL0H,r9
 225         j_s.d [blink]
 226         bset.mi DBL0H,DBL0H,31
 227         ENDFUNC(__muldf3)
 228
 229         .balign 4
 230 .L7ff00000:
 231         .long 0x7ff00000