]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgcc/config/arc/ieee-754/addsf3.S
Update copyright years.
[thirdparty/gcc.git] / libgcc / config / arc / ieee-754 / addsf3.S
1 /* Copyright (C) 2008-2024 Free Software Foundation, Inc.
2 Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
3 on behalf of Synopsys Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 #include "arc-ieee-754.h"
27 #if 0 /* DEBUG */
28 .global __addsf3
29 FUNC(__addsf3)
30 .balign 4
31 __addsf3:
32 push_s blink
33 push_s r1
34 bl.d __addsf3_c
35 push_s r0
36 ld_s r1,[sp,4]
37 st_s r0,[sp,4]
38 bl.d __addsf3_asm
39 pop_s r0
40 pop_s r1
41 pop_s blink
42 cmp r0,r1
43 jeq_s [blink]
44 bl abort
45 ENDFUNC(__addsf3)
46 .global __subsf3
47 FUNC(__subsf3)
48 .balign 4
49 __subsf3:
50 push_s blink
51 push_s r1
52 bl.d __subsf3_c
53 push_s r0
54 ld_s r1,[sp,4]
55 st_s r0,[sp,4]
56 bl.d __subsf3_asm
57 pop_s r0
58 pop_s r1
59 pop_s blink
60 cmp r0,r1
61 jeq_s [blink]
62 bl abort
63 ENDFUNC(__subsf3)
64 #define __addsf3 __addsf3_asm
65 #define __subsf3 __subsf3_asm
66 #endif /* DEBUG */
67 /* N.B. This is optimized for ARC700.
68 ARC600 has very different scheduling / instruction selection criteria. */
69
70 /* inputs: r0, r1
71 output: r0
72 clobber: r1-r10, r12, flags */
73
74 .balign 4
75 .global __addsf3
76 .global __subsf3
77 FUNC(__addsf3)
78 FUNC(__subsf3)
79 .long 0x7f800000 ; exponent mask
80 __subsf3:
81 bxor_l r1,r1,31
82 __addsf3:
83 ld r9,[pcl,-8]
84 bmsk r4,r0,30
85 xor r10,r0,r1
86 and r6,r1,r9
87 sub.f r12,r4,r6
88 asr_s r12,r12,23
89 blo .Ldbl1_gt
90 brhs r4,r9,.Linf_nan
91 brne r12,0,.Lsmall_shift
92 brge r10,0,.Ladd_same_exp ; r12 == 0
93 /* After subtracting, we need to normalize; when shifting to place the
94 leading 1 into position for the implicit 1 and adding that to DBL0,
95 we increment the exponent. Thus, we have to subtract one more than
96 the shift count from the exponent beforehand. Iff the exponent drops thus
97 below zero (before adding in the fraction with the leading one), we have
98 generated a denormal number. Denormal handling is basicallly reducing the
99 shift count so that we produce a zero exponent instead; FWIW, this way
100 the shift count can become zero (if we started out with exponent 1).
101 On the plus side, we don't need to check for denorm input, the result
102 of subtracing these looks just the same as denormals generated during
103 subtraction. */
104 bmsk r7,r1,30
105 breq r4,r7,.Lret0
106 sub.f r5,r4,r7
107 lsr r12,r4,23
108 neg.cs r5,r5
109 norm r3,r5
110 bmsk r2,r0,22
111 sub_s r3,r3,6
112 min r12,r12,r3
113 bic r1,r0,r2
114 sub_s r3,r12,1
115 asl_s r12,r12,23
116 asl r2,r5,r3
117 sub_s r1,r1,r12
118 add_s r0,r1,r2
119 j_s.d [blink]
120 bxor.cs r0,r0,31
121 .balign 4
122 .Linf_nan:
123 ; If both inputs are inf, but with different signs, the result is NaN.
124 asr r12,r10,31
125 or_s r1,r1,r12
126 j_s.d [blink]
127 or.eq r0,r0,r1
128 .balign 4
129 .Ladd_same_exp:
130 /* This is a special case because we can't test for need to shift
131 down by checking if bit 23 of DBL0 changes. OTOH, here we know
132 that we always need to shift down. */
133 ; adding the two floating point numbers together makes the sign
134 ; cancel out and apear as carry; the exponent is doubled, and the
135 ; fraction also in need of shifting left by one. The two implicit
136 ; ones of the sources make an implicit 1 of the result, again
137 ; non-existent in a place shifted by one.
138 add.f r0,r0,r1
139 btst_s r0,1
140 breq r6,0,.Ldenorm_add
141 add.ne r0,r0,1 ; round to even.
142 rrc r0,r0
143 bmsk r1,r9,23
144 add r0,r0,r1 ; increment exponent
145 bic.f 0,r9,r0; check for overflow -> infinity.
146 jne_l [blink]
147 mov_s r0,r9
148 j_s.d [blink]
149 bset.cs r0,r0,31
150
151 .Ldenorm_add:
152 j_s.d [blink]
153 add r0,r4,r1
154
155 .Lret_dbl0:
156 j_s [blink]
157
158 .balign 4
159 .Lsmall_shift:
160 brhi r12,25,.Lret_dbl0
161 breq.d r6,0,.Ldenorm_small_shift
162 bmsk_s r1,r1,22
163 bset_s r1,r1,23
164 .Lfixed_denorm_small_shift:
165 neg r8,r12
166 asl r5,r1,r8
167 brge.d r10,0,.Ladd
168 lsr_l r1,r1,r12
169 /* subtract, abs(DBL0) > abs(DBL1) */
170 /* DBL0: original values
171 DBL1: fraction with explicit leading 1, shifted into place
172 r4: orig. DBL0 & 0x7fffffff
173 r6: orig. DBL1 & 0x7f800000
174 r9: 0x7f800000
175 r10: orig. DBL0H ^ DBL1H
176 r5 : guard bits */
177 .balign 4
178 .Lsub:
179 neg.f r12,r5
180 bmsk r3,r0,22
181 bset r5,r3,23
182 sbc.f r4,r5,r1
183 beq.d .Large_cancel_sub
184 bic r7,r0,r3
185 norm r3,r4
186 bmsk r6,r7,30
187 .Lsub_done:
188 sub_s r3,r3,6
189 breq r3,1,.Lsub_done_noshift
190 asl r5,r3,23
191 sub_l r3,r3,1
192 brlo r6,r5,.Ldenorm_sub
193 sub r0,r7,r5
194 neg_s r1,r3
195 lsr.f r2,r12,r1
196 asl_s r12,r12,r3
197 btst_s r2,0
198 bmsk.eq.f r12,r12,30
199 asl r5,r4,r3
200 add_s r0,r0,r2
201 adc.ne r0,r0,0
202 j_s.d [blink]
203 add_l r0,r0,r5
204
205 .Lret0:
206 j_s.d [blink]
207 mov_l r0,0
208
209 .balign 4
210 .Ldenorm_small_shift:
211 brne.d r12,1,.Lfixed_denorm_small_shift
212 sub_s r12,r12,1
213 brlt.d r10,0,.Lsub
214 mov_s r5,r12 ; zero r5, and align following code
215 .Ladd: ; Both bit 23 of DBL1 and bit 0 of r5 are clear.
216 bmsk r2,r0,22
217 add_s r2,r2,r1
218 bbit0.d r2,23,.Lno_shiftdown
219 add_s r0,r0,r1
220 bic.f 0,r9,r0; check for overflow -> infinity; eq : infinity
221 bmsk r1,r2,22
222 lsr.ne.f r2,r2,2; cc: even ; hi: might round down
223 lsr.ne r1,r1,1
224 rcmp.hi r5,1; hi : round down
225 bclr.hi r0,r0,0
226 j_l.d [blink]
227 sub_s r0,r0,r1
228
229 /* r4: DBL0H & 0x7fffffff
230 r6: DBL1H & 0x7f800000
231 r9: 0x7f800000
232 r10: sign difference
233 r12: shift count (negative) */
234 .balign 4
235 .Ldbl1_gt:
236 brhs r6,r9,.Lret_dbl1 ; inf or NaN
237 neg r8,r12
238 brhi r8,25,.Lret_dbl1
239 .Lsmall_shift_dbl0:
240 breq.d r6,0,.Ldenorm_small_shift_dbl0
241 bmsk_s r0,r0,22
242 bset_s r0,r0,23
243 .Lfixed_denorm_small_shift_dbl0:
244 asl r5,r0,r12
245 brge.d r10,0,.Ladd_dbl1_gt
246 lsr r0,r0,r8
247 /* subtract, abs(DBL0) < abs(DBL1) */
248 /* DBL0: fraction with explicit leading 1, shifted into place
249 DBL1: original value
250 r6: orig. DBL1 & 0x7f800000
251 r9: 0x7f800000
252 r5: guard bits */
253 .balign 4
254 .Lrsub:
255 neg.f r12,r5
256 bmsk r5,r1,22
257 bic r7,r1,r5
258 bset r5,r5,23
259 sbc.f r4,r5,r0
260 bne.d .Lsub_done ; note: r6 is already set up.
261 norm r3,r4
262 /* Fall through */
263
264 /* r4:r12 : unnormalized result fraction
265 r7: result sign and exponent */
266 /* When seeing large cancellation, only the topmost guard bit might be set. */
267 .balign 4
268 .Large_cancel_sub:
269 breq_s r12,0,.Lret0
270 sub r0,r7,24<<23
271 xor.f 0,r0,r7 ; test if exponent is negative
272 tst.pl r9,r0 ; test if exponent is zero
273 jpnz [blink] ; return if non-denormal result
274 bmsk r6,r7,30
275 lsr r3,r6,23
276 xor r0,r6,r7
277 sub_s r3,r3,24-22
278 j_s.d [blink]
279 bset r0,r0,r3
280
281 ; If a denorm is produced, we have an exact result -
282 ; no need for rounding.
283 .balign 4
284 .Ldenorm_sub:
285 sub r3,r6,1
286 lsr.f r3,r3,23
287 xor r0,r6,r7
288 neg_s r1,r3
289 asl.ne r4,r4,r3
290 lsr_s r12,r12,r1
291 add_s r0,r0,r4
292 j_s.d [blink]
293 add.ne r0,r0,r12
294
295 .balign 4
296 .Lsub_done_noshift:
297 add.f 0,r12,r12
298 btst.eq r4,0
299 bclr r4,r4,23
300 add r0,r7,r4
301 j_s.d [blink]
302 adc.ne r0,r0,0
303
304 .balign 4
305 .Lno_shiftdown:
306 add.f 0,r5,r5
307 btst.eq r0,0
308 cmp.eq r5,r5
309 j_s.d [blink]
310 add.cs r0,r0,1
311
312 .Lret_dbl1:
313 j_s.d [blink]
314 mov_l r0,r1
315 .balign 4
316 .Ldenorm_small_shift_dbl0:
317 sub.f r8,r8,1
318 bne.d .Lfixed_denorm_small_shift_dbl0
319 add_s r12,r12,1
320 brlt.d r10,0,.Lrsub
321 mov r5,0
322 .Ladd_dbl1_gt: ; both bit 23 of DBL0 and bit 0 of r5 are clear.
323 bmsk r2,r1,22
324 add_s r2,r2,r0
325 bbit0.d r2,23,.Lno_shiftdown_dbl1_gt
326 add_s r0,r1,r0
327 bic.f 0,r9,r0; check for overflow -> infinity; eq : infinity
328 bmsk r1,r2,22
329 lsr.ne.f r2,r2,2; cc: even ; hi: might round down
330 lsr.ne r1,r1,1
331 rcmp.hi r5,1; hi : round down
332 bclr.hi r0,r0,0
333 j_l.d [blink]
334 sub_s r0,r0,r1
335
336 .balign 4
337 .Lno_shiftdown_dbl1_gt:
338 add.f 0,r5,r5
339 btst.eq r0,0
340 cmp.eq r5,r5
341 j_s.d [blink]
342 add.cs r0,r0,1
343 ENDFUNC(__addsf3)
344 ENDFUNC(__subsf3)