]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgcc/config/arc/ieee-754/muldf3.S
Update copyright years.
[thirdparty/gcc.git] / libgcc / config / arc / ieee-754 / muldf3.S
1 /* Copyright (C) 2008-2019 Free Software Foundation, Inc.
2 Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
3 on behalf of Synopsys Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 /* XMAC schedule: directly back-to-back multiplies stall; the third
27 instruction after a multiply stalls unless it is also a multiply. */
28 #include "arc-ieee-754.h"
29
30 #if 0 /* DEBUG */
31 .global __muldf3
32 .balign 4
33 __muldf3:
34 push_s blink
35 push_s r2
36 push_s r3
37 push_s r0
38 bl.d __muldf3_c
39 push_s r1
40 ld_s r2,[sp,12]
41 ld_s r3,[sp,8]
42 st_s r0,[sp,12]
43 st_s r1,[sp,8]
44 pop_s r1
45 bl.d __muldf3_asm
46 pop_s r0
47 pop_s r3
48 pop_s r2
49 pop_s blink
50 cmp r0,r2
51 cmp.eq r1,r3
52 jeq_s [blink]
53 b abort
54 #define __muldf3 __muldf3_asm
55 #endif /* DEBUG */
56 /* N.B. This is optimized for ARC700.
57 ARC600 has very different scheduling / instruction selection criteria. */
58 /* For the standard multiplier, instead of mpyu rx,DBL0L,DBL1L; tst rx,rx ,
59 we can do:
60 sub rx,DBL0L,1; bic rx,DBL0L,rx; lsr rx,rx; norm rx,rx; asl.f 0,DBL1L,rx */
61
62 __muldf3_support: /* This label makes debugger output saner. */
63 /* If one number is denormal, subtract some from the exponent of the other
64 one (if the other exponent is too small, return 0), and normalize the
65 denormal. Then re-run the computation. */
66 .balign 4
67 FUNC(__muldf3)
68 .Ldenorm_dbl0:
69 mov_s r12,DBL0L
70 mov_s DBL0L,DBL1L
71 mov_s DBL1L,r12
72 mov_s r12,DBL0H
73 mov_s DBL0H,DBL1H
74 mov_s DBL1H,r12
75 and r11,DBL0H,r9
76 .Ldenorm_dbl1:
77 brhs r11,r9,.Linf_nan
78 brhs 0x3ca00001,r11,.Lret0
79 sub_s DBL0H,DBL0H,DBL1H
80 bmsk_s DBL1H,DBL1H,30
81 add_s DBL0H,DBL0H,DBL1H
82 breq_s DBL1H,0,.Ldenorm_2
83 norm r12,DBL1H
84
85 sub_s r12,r12,10
86 asl r5,r12,20
87 asl_s DBL1H,DBL1H,r12
88 sub DBL0H,DBL0H,r5
89 neg r5,r12
90 lsr r6,DBL1L,r5
91 asl_s DBL1L,DBL1L,r12
92 b.d __muldf3
93 add_s DBL1H,DBL1H,r6
94
95 .balign 4
96 .Linf_nan:
97 bclr r12,DBL1H,31
98 xor_s DBL1H,DBL1H,DBL0H
99 bclr_s DBL0H,DBL0H,31
100 max r8,DBL0H,r12 ; either NaN -> NaN ; otherwise inf
101 or.f 0,DBL0H,DBL0L
102 mov_s DBL0L,0
103 or.ne.f DBL1L,DBL1L,r12
104 not_s DBL0H,DBL0L ; inf * 0 -> NaN
105 mov.ne DBL0H,r8
106 tst_s DBL1H,DBL1H
107 j_s.d [blink]
108 bset.mi DBL0H,DBL0H,31
109
110 .Lret0: xor_s DBL0H,DBL0H,DBL1H
111 bclr DBL1H,DBL0H,31
112 xor_s DBL0H,DBL0H,DBL1H
113 j_s.d [blink]
114 mov_l DBL0L,0
115
116 .balign 4
117 .Ldenorm_2:
118 breq_s DBL1L,0,.Lret0 ; 0 input -> 0 output
119 norm.f r12,DBL1L
120
121 mov.mi r12,21
122 add.pl r12,r12,22
123 neg r11,r12
124 asl_s r12,r12,20
125 lsr.f DBL1H,DBL1L,r11
126 ror DBL1L,DBL1L,r11
127 sub_s DBL0H,DBL0H,r12
128 mov.eq DBL1H,DBL1L
129 sub_s DBL1L,DBL1L,DBL1H
130 /* Fall through. */
131 .global __muldf3
132 .balign 4
133 __muldf3:
134 ld.as r9,[pcl,0x4b] ; ((.L7ff00000-.+2)/4)]
135 MPYHU r4,DBL0L,DBL1L
136 bmsk r6,DBL0H,19
137 bset r6,r6,20
138 mpyu r7,r6,DBL1L
139 and r11,DBL0H,r9
140 breq r11,0,.Ldenorm_dbl0
141 MPYHU r8,r6,DBL1L
142 bmsk r10,DBL1H,19
143 bset r10,r10,20
144 MPYHU r5,r10,DBL0L
145 add.f r4,r4,r7
146 and r12,DBL1H,r9
147 MPYHU r7,r6,r10
148 breq r12,0,.Ldenorm_dbl1
149 adc.f r5,r5,r8
150 mpyu r8,r10,DBL0L
151 breq r11,r9,.Linf_nan
152 breq r12,r9,.Linf_nan
153 mpyu r6,r6,r10
154 add.cs r7,r7,1
155 add.f r4,r4,r8
156 mpyu r10,DBL1L,DBL0L
157 bclr r8,r9,30 ; 0x3ff00000
158 adc.f r5,r5,r6
159 ; XMAC write-back stall / std. mult stall is one cycle later
160 bclr r6,r9,20 ; 0x7fe00000
161 add.cs r7,r7,1 ; fraction product in r7:r5:r4
162 tst r10,r10
163 bset.ne r4,r4,0 ; put least significant word into sticky bit
164 lsr.f r10,r7,9
165 add_l r12,r12,r11 ; add exponents
166 rsub.eq r8,r8,r9 ; 0x40000000
167 sub r12,r12,r8 ; subtract bias + implicit 1
168 brhs.d r12,r6,.Linf_denorm
169 rsub r10,r10,12
170 .Lshift_frac:
171 neg r8,r10
172 asl r6,r4,r10
173 lsr DBL0L,r4,r8
174 add.f 0,r6,r6
175 btst.eq DBL0L,0
176 cmp.eq r4,r4 ; round to nearest / round to even
177 asl r4,r5,r10
178 lsr r5,r5,r8
179 adc.f DBL0L,DBL0L,r4
180 xor.f 0,DBL0H,DBL1H
181 asl r7,r7,r10
182 add_s r12,r12,r5
183 adc DBL0H,r12,r7
184 j_s.d [blink]
185 bset.mi DBL0H,DBL0H,31
186
187 /* We have checked for infinity / NaN input before, and transformed
188 denormalized inputs into normalized inputs. Thus, the worst case
189 exponent overflows are:
190 1 + 1 - 0x400 == 0xc02 : maximum underflow
191 0x7fe + 0x7fe - 0x3ff == 0xbfd ; maximum overflow
192 N.B. 0x7e and 0x7f are also values for overflow.
193
194 If (r12 <= -54), we have an underflow to zero. */
195 .balign 4
196 .Linf_denorm:
197 brlo r12,0xc0000000,.Linf
198 asr r6,r12,20
199 mov_s r12,0
200 add.f r10,r10,r6
201 brgt r10,0,.Lshift_frac
202 beq_s .Lround_frac
203 add.f r10,r10,32
204 .Lshift32_frac:
205 tst r4,r4
206 mov r4,r5
207 bset.ne r4,r4,1
208 mov r5,r7
209 mov r7,0
210 brge r10,1,.Lshift_frac
211 breq r10,0,.Lround_frac
212 add.f r10,r10,32
213 brgt r10,21,.Lshift32_frac
214 b_s .Lret0
215
216 .Lround_frac:
217 add.f 0,r4,r4
218 btst.eq r5,0
219 mov_s DBL0L,r5
220 mov_s DBL0H,r7
221 adc.eq.f DBL0L,DBL0L,0
222 j_s.d [blink]
223
224 adc.eq DBL0H,DBL0H,0
225
226 .Linf: xor.f DBL1H,DBL1H,DBL0H
227 mov_s DBL0L,0
228 mov_s DBL0H,r9
229 j_s.d [blink]
230 bset.mi DBL0H,DBL0H,31
231 ENDFUNC(__muldf3)
232
233 .balign 4
234 .L7ff00000:
235 .long 0x7ff00000