]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgcc/config/arc/ieee-754/adddf3.S
Update copyright years.
[thirdparty/gcc.git] / libgcc / config / arc / ieee-754 / adddf3.S
CommitLineData
a945c346 1/* Copyright (C) 2008-2024 Free Software Foundation, Inc.
d38a64b4
JR
2 Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
3 on behalf of Synopsys Inc.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17Under Section 7 of GPL version 3, you are granted additional
18permissions described in the GCC Runtime Library Exception, version
193.1, as published by the Free Software Foundation.
20
21You should have received a copy of the GNU General Public License and
22a copy of the GCC Runtime Library Exception along with this program;
23see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24<http://www.gnu.org/licenses/>. */
25
26#include "arc-ieee-754.h"
27#if 0 /* DEBUG */
28 .global __adddf3
29 .balign 4
30__adddf3:
31 push_s blink
32 push_s r2
33 push_s r3
34 push_s r0
35 bl.d __adddf3_c
36 push_s r1
37 ld_s r2,[sp,12]
38 ld_s r3,[sp,8]
39 st_s r0,[sp,12]
40 st_s r1,[sp,8]
41 pop_s r1
42 bl.d __adddf3_asm
43 pop_s r0
44 pop_s r3
45 pop_s r2
46 pop_s blink
47 cmp r0,r2
48 cmp.eq r1,r3
49 jeq_s [blink]
50 bl abort
51 .global __subdf3
52 .balign 4
53__subdf3:
54 push_s blink
55 push_s r2
56 push_s r3
57 push_s r0
58 bl.d __subdf3_c
59 push_s r1
60 ld_s r2,[sp,12]
61 ld_s r3,[sp,8]
62 st_s r0,[sp,12]
63 st_s r1,[sp,8]
64 pop_s r1
65 bl.d __subdf3_asm
66 pop_s r0
67 pop_s r3
68 pop_s r2
69 pop_s blink
70 cmp r0,r2
71 cmp.eq r1,r3
72 jeq_s [blink]
73 bl abort
74#define __adddf3 __adddf3_asm
75#define __subdf3 __subdf3_asm
76#endif /* DEBUG */
77/* N.B. This is optimized for ARC700.
78 ARC600 has very different scheduling / instruction selection criteria. */
79
80/* inputs: DBL0, DBL1 (r0-r3)
81 output: DBL0 (r0, r1)
82 clobber: r2-r10, r12, flags
83 All NaN highword bits must be 1. NaN low word is random. */
84
85 .balign 4
86 .global __adddf3
87 .global __subdf3
88 .long 0x7ff00000 ; exponent mask
89 FUNC(__adddf3)
90 FUNC(__subdf3)
91__subdf3:
92 bxor_l DBL1H,DBL1H,31
93__adddf3:
94 ld r9,[pcl,-8]
95 bmsk r4,DBL0H,30
96 xor r10,DBL0H,DBL1H
97 and r6,DBL1H,r9
98 sub.f r12,r4,r6
99 asr_s r12,r12,20
100 blo .Ldbl1_gt
101 brhs r4,r9,.Linf_nan
102 brhs r12,32,.Large_shift
103 brne r12,0,.Lsmall_shift
104 brge r10,0,.Ladd_same_exp ; r12 == 0
105
106/* After subtracting, we need to normalize; when shifting to place the
107 leading 1 into position for the implicit 1 and adding that to DBL0H,
108 we increment the exponent. Thus, we have to subtract one more than
109 the shift count from the exponent beforehand. Iff the exponent drops thus
110 below zero (before adding in the fraction with the leading one), we have
111 generated a denormal number. Denormal handling is basicallly reducing the
112 shift count so that we produce a zero exponent instead; however, this way
113 the shift count can become zero (if we started out with exponent 1).
114 Therefore, a simple min operation is not good enough, since we don't
115 want to handle a zero normalizing shift in the main path.
116 On the plus side, we don't need to check for denorm input, the result
117 of subtracing these looks just the same as denormals generated during
118 subtraction. */
119 bmsk r7,DBL1H,30
120 cmp r4,r7
121 cmp.eq DBL0L,DBL1L
122 blo .L_rsub_same_exp
123 sub.f DBL0L,DBL0L,DBL1L
124 bmsk r12,DBL0H,19
125 bic DBL1H,DBL0H,r12
126 sbc.f r4,r4,r7
127 beq_l .Large_cancel
128 norm DBL1L,r4
129 b.d .Lsub_done_same_exp
130 sub r12,DBL1L,9
131
132 .balign 4
133.Linf_nan:
134 ; If both inputs are inf, but with different signs, the result is NaN.
135 asr r12,r10,31
136 or_s DBL1H,DBL1H,r12
137 j_s.d [blink]
138 or.eq DBL0H,DBL0H,DBL1H
139
140 .balign 4
141.L_rsub_same_exp:
142 rsub.f DBL0L,DBL0L,DBL1L
143 bmsk r12,DBL1H,19
144 bic_s DBL1H,DBL1H,r12
145 sbc.f r4,r7,r4
146 beq_l .Large_cancel
147 norm DBL1L,r4
148
149 sub r12,DBL1L,9
150.Lsub_done_same_exp:
151 asl_s r12,r12,20
152 sub_s DBL1L,DBL1L,10
153 sub DBL0H,DBL1H,r12
154 xor.f 0,DBL0H,DBL1H
155 bmi .Ldenorm
156.Lpast_denorm:
157 neg_s r12,DBL1L
158 lsr r7,DBL0L,r12
159 asl r12,r4,DBL1L
160 asl_s DBL0L,DBL0L,DBL1L
161 add_s r12,r12,r7
162 j_s.d [blink]
163 add_l DBL0H,DBL0H,r12
164 .balign 4
165.Ladd_same_exp:
166 /* This is a special case because we can't test for need to shift
167 down by checking if bit 20 of DBL0H changes. OTOH, here we know
168 that we always need to shift down. */
169 ; The implicit 1 of DBL0 is not shifted together with the
170 ; fraction, thus effectively doubled, compensating for not setting
171 ; implicit1 for DBL1
172 add_s r12,DBL0L,DBL1L
173 lsr.f 0,r12,2 ; round to even
174 breq r6,0,.Ldenorm_add
175 adc.f DBL0L,DBL0L,DBL1L
176 sub r7,DBL1H,DBL0H
177 sub1 r7,r7,r9 ; boost exponent by 2/2
178 rrc DBL0L,DBL0L
179 asr.f r7,r7 ; DBL1.fraction/2 - DBL0.fraction/2 ; exp++
180 add.cs.f DBL0L,DBL0L,0x80000000
181 add_l DBL0H,DBL0H,r7 ; DBL0.implicit1 not shifted for DBL1.implicit1
182 add.cs DBL0H,DBL0H,1
183 bic.f 0,r9,DBL0H ; check for overflow -> infinity.
184 jne_l [blink]
185 and DBL0H,DBL0H,0xfff00000
186 j_s.d [blink]
187 mov_s DBL0L,0
188 .balign 4
189.Large_shift:
190 brhs r12,55,.Lret_dbl0
191 bmsk_s DBL1H,DBL1H,19
192 brne r6,0,.Lno_denorm_large_shift
193 brhi.d r12,33,.Lfixed_denorm_large_shift
194 sub_s r12,r12,1
195 breq r12,31, .Lfixed_denorm_small_shift
196.Lshift32:
197 mov_s r12,DBL1L
198 mov_s DBL1L,DBL1H
199 brlt.d r10,0,.Lsub
200 mov_s DBL1H,0
201 b_s .Ladd
202.Ldenorm_add:
203 cmp_s r12,DBL1L
204 mov_s DBL0L,r12
205 j_s.d [blink]
206 adc DBL0H,r4,DBL1H
207
208.Lret_dbl0:
209 j_s [blink]
210 .balign 4
211.Lsmall_shift:
212 breq.d r6,0,.Ldenorm_small_shift
213 bmsk_s DBL1H,DBL1H,19
214 bset_s DBL1H,DBL1H,20
215.Lfixed_denorm_small_shift:
216 neg r8,r12
217 asl r4,DBL1H,r8
218 lsr_l DBL1H,DBL1H,r12
219 lsr r5,DBL1L,r12
220 asl r12,DBL1L,r8
221 brge.d r10,0,.Ladd
222 or DBL1L,r4,r5
223/* subtract, abs(DBL0) > abs(DBL1) */
224/* DBL0H, DBL0L: original values
225 DBL1H, DBL1L: fraction with explicit leading 1, shifted into place
226 r4: orig. DBL0H & 0x7fffffff
227 r6: orig. DBL1H & 0x7ff00000
228 r9: 0x7ff00000
229 r10: orig. DBL0H ^ DBL1H
230 r12: guard bits */
231 .balign 4
232.Lsub:
233 neg.f r12,r12
234 mov_s r7,DBL1H
235 bmsk r5,DBL0H,19
236 sbc.f DBL0L,DBL0L,DBL1L
237 bic DBL1H,DBL0H,r5
238 bset r5,r5,20
239 sbc.f r4,r5,r7
240 beq_l .Large_cancel_sub
241 norm DBL1L,r4
242 bmsk r6,DBL1H,30
243.Lsub_done:
244 sub_s DBL1L,DBL1L,9
245 breq DBL1L,1,.Lsub_done_noshift
246 asl r5,DBL1L,20
247 sub_s DBL1L,DBL1L,1
248 brlo r6,r5,.Ldenorm_sub
249 sub DBL0H,DBL1H,r5
250.Lpast_denorm_sub:
251 neg_s DBL1H,DBL1L
252 lsr r6,r12,DBL1H
253 asl_s r12,r12,DBL1L
254 and r8,r6,1
255 add1.f 0,r8,r12
256 add.ne.f r12,r12,r12
257 asl r8,DBL0L,DBL1L
258 lsr r12,DBL0L,DBL1H
259 adc.f DBL0L,r8,r6
260 asl r5,r4,DBL1L
261 add_s DBL0H,DBL0H,r12
262 j_s.d [blink]
263 adc DBL0H,DBL0H,r5
264
265 .balign 4
266.Lno_denorm_large_shift:
267 breq.d r12,32,.Lshift32
268 bset_l DBL1H,DBL1H,20
269.Lfixed_denorm_large_shift:
270 neg r8,r12
271 asl r4,DBL1H,r8
272 lsr r5,DBL1L,r12
273 asl.f 0,DBL1L,r8
274 lsr DBL1L,DBL1H,r12
275 or r12,r4,r5
276 tst.eq r12,1
277 or.ne r12,r12,2
278 brlt.d r10,0,.Lsub
279 mov_s DBL1H,0
280 b_l .Ladd
281
282 ; If a denorm is produced without shifting, we have an exact result -
283 ; no need for rounding.
284 .balign 4
285.Ldenorm_sub:
286 lsr DBL1L,r6,20
287 xor DBL0H,r6,DBL1H
288 brne.d DBL1L,1,.Lpast_denorm_sub
289 sub_s DBL1L,DBL1L,1
290.Lsub_done_noshift:
291 add.f 0,r12,r12
292 btst.eq DBL0L,0
293 cmp.eq r12,r12
294 add.cs.f DBL0L,DBL0L,1
295 bclr r4,r4,20
296 j_s.d [blink]
297 adc DBL0H,DBL1H,r4
298
299 .balign 4
300.Ldenorm_small_shift:
301 brne.d r12,1,.Lfixed_denorm_small_shift
302 sub_l r12,r12,1
303 brlt r10,0,.Lsub
304.Ladd: ; bit 20 of DBL1H is clear and bit 0 of r12 does not matter
305 add.f DBL0L,DBL0L,DBL1L
306 add_s DBL1H,DBL1H,DBL0H
307 add.cs DBL1H,DBL1H,1
308 xor_l DBL0H,DBL0H,DBL1H
309 bbit0 DBL0H,20,.Lno_shiftdown
310 lsr.f DBL0H,DBL1H
311 and r4,DBL0L,2
312 bmsk DBL0H,DBL0H,18
313 sbc DBL0H,DBL1H,DBL0H
314 rrc.f DBL0L,DBL0L
315 or.f r12,r12,r4
316 cmp.eq r12,r12
317 add.cs.f DBL0L,DBL0L,1
318 bic.f 0,r9,DBL0H ; check for generating infinity with possible ...
319 jne.d [blink] ; ... non-zero fraction
320 add.cs DBL0H,DBL0H,1
321 mov_s DBL0L,0
322 bmsk DBL1H,DBL0H,19
323 j_s.d [blink]
324 bic_s DBL0H,DBL0H,DBL1H
325.Lno_shiftdown:
326 mov_s DBL0H,DBL1H
327 add.f 0,r12,r12
328 btst.eq DBL0L,0
329 cmp.eq r12,r12
330 add.cs.f DBL0L,DBL0L,1
331 j_s.d [blink]
332 add.cs DBL0H,DBL0H,1
333 .balign 4
334.Ldenorm:
335 bmsk DBL0H,DBL1H,30
336 lsr r12,DBL0H,20
337 xor_s DBL0H,DBL0H,DBL1H
338 sub_l DBL1L,r12,1
339 bgt .Lpast_denorm
340 j_s.d [blink]
341 add_l DBL0H,DBL0H,r4
342
343 .balign 4
344.Large_cancel:
345 ;DBL0L: mantissa DBL1H: sign & exponent
346 norm.f DBL1L,DBL0L
347 bmsk DBL0H,DBL1H,30
348 add_s DBL1L,DBL1L,22
349 mov.mi DBL1L,21
350 add_s r12,DBL1L,1
351 asl_s r12,r12,20
352 beq_s .Lret0
353 brhs.d DBL0H,r12,.Lpast_denorm_large_cancel
354 sub DBL0H,DBL1H,r12
355 bmsk DBL0H,DBL1H,30
356 lsr r12,DBL0H,20
357 xor_s DBL0H,DBL0H,DBL1H
358 sub.f DBL1L,r12,1
359 jle [blink]
360.Lpast_denorm_large_cancel:
361 rsub.f r7,DBL1L,32
362 lsr r7,DBL0L,r7
363 asl_s DBL0L,DBL0L,DBL1L
364 mov.ls r7,DBL0L
365 add_s DBL0H,DBL0H,r7
366 j_s.d [blink]
367 mov.ls DBL0L,0
368.Lret0:
369 j_s.d [blink]
370 mov_l DBL0H,0
371
372/* r4:DBL0L:r12 : unnormalized result fraction
373 DBL1H: result sign and exponent */
374/* When seeing large cancellation, only the topmost guard bit might be set. */
375 .balign 4
376.Large_cancel_sub:
377 norm.f DBL1L,DBL0L
378 bpnz.d 0f
379 bmsk DBL0H,DBL1H,30
380 mov r5,22<<20
381 bne.d 1f
382 mov_s DBL1L,21
383 bset r5,r5,5+20
384 add_s DBL1L,DBL1L,32
385 brne r12,0,1f
386 j_s.d [blink]
387 mov_l DBL0H,0
388 .balign 4
3890: add r5,DBL1L,23
390 asl r5,r5,20
391 add_s DBL1L,DBL1L,22
3921: brlo DBL0H,r5,.Ldenorm_large_cancel_sub
393 sub DBL0H,DBL1H,r5
394.Lpast_denorm_large_cancel_sub:
395 rsub.f r7,DBL1L,32
396 lsr r12,r12,r7
397 lsr r7,DBL0L,r7
398 asl_s DBL0L,DBL0L,DBL1L
399 add.ge DBL0H,DBL0H,r7
400 add_s DBL0L,DBL0L,r12
401 add.lt DBL0H,DBL0H,DBL0L
402 mov.eq DBL0L,r12
403 j_s.d [blink]
404 mov.lt DBL0L,0
405 .balign 4
406.Ldenorm_large_cancel_sub:
407 lsr r5,DBL0H,20
408 xor_s DBL0H,DBL0H,DBL1H
409 brgt.d r5,1,.Lpast_denorm_large_cancel_sub
410 sub DBL1L,r5,1
411 j_l [blink] ; denorm, no shift -> no rounding needed.
412
413/* r4: DBL0H & 0x7fffffff
414 r6: DBL1H & 0x7ff00000
415 r9: 0x7ff00000
416 r10: sign difference
417 r12: shift count (negative) */
418 .balign 4
419.Ldbl1_gt:
420 brhs r6,r9,.Lret_dbl1 ; inf or NaN
421 neg r8,r12
422 brhs r8,32,.Large_shift_dbl0
423.Lsmall_shift_dbl0:
424 breq.d r6,0,.Ldenorm_small_shift_dbl0
425 bmsk_s DBL0H,DBL0H,19
426 bset_s DBL0H,DBL0H,20
427.Lfixed_denorm_small_shift_dbl0:
428 asl r4,DBL0H,r12
429 lsr DBL0H,DBL0H,r8
430 lsr r5,DBL0L,r8
431 asl r12,DBL0L,r12
432 brge.d r10,0,.Ladd_dbl1_gt
433 or DBL0L,r4,r5
434/* subtract, abs(DBL0) < abs(DBL1) */
435/* DBL0H, DBL0L: fraction with explicit leading 1, shifted into place
436 DBL1H, DBL1L: original values
437 r6: orig. DBL1H & 0x7ff00000
438 r9: 0x7ff00000
439 r12: guard bits */
440 .balign 4
441.Lrsub:
442 neg.f r12,r12
443 bmsk r7,DBL1H,19
444 mov_s r5,DBL0H
445 sbc.f DBL0L,DBL1L,DBL0L
446 bic DBL1H,DBL1H,r7
447 bset r7,r7,20
448 sbc.f r4,r7,r5
449 beq_l .Large_cancel_sub
450 norm DBL1L,r4
451 b_l .Lsub_done ; note: r6 is already set up.
452
453.Lret_dbl1:
454 mov_s DBL0H,DBL1H
455 j_s.d [blink]
456 mov_l DBL0L,DBL1L
457 .balign 4
458.Ldenorm_small_shift_dbl0:
459 sub.f r8,r8,1
460 bne.d .Lfixed_denorm_small_shift_dbl0
461 add_s r12,r12,1
462 brlt r10,0,.Lrsub
463.Ladd_dbl1_gt: ; bit 20 of DBL0H is clear and bit 0 of r12 does not matter
464 add.f DBL0L,DBL0L,DBL1L
465 add_s DBL0H,DBL0H,DBL1H
466 add.cs DBL0H,DBL0H,1
467 xor DBL1H,DBL0H,DBL1H
468 bbit0 DBL1H,20,.Lno_shiftdown_dbl1_gt
469 lsr.f DBL1H,DBL0H
470 and r4,DBL0L,2
471 bmsk DBL1H,DBL1H,18
472 sbc DBL0H,DBL0H,DBL1H
473 rrc.f DBL0L,DBL0L
474 or.f r12,r12,r4
475 cmp.eq r12,r12
476 add.cs.f DBL0L,DBL0L,1
477 bic.f 0,r9,DBL0H ; check for generating infinity with possible ...
478 jne.d [blink] ; ... non-zero fraction
479 add.cs DBL0H,DBL0H,1
480 mov_s DBL0L,0
481 bmsk DBL1H,DBL0H,19
482 j_s.d [blink]
483 bic_s DBL0H,DBL0H,DBL1H
484.Lno_shiftdown_dbl1_gt:
485 add.f 0,r12,r12
486 btst.eq DBL0L,0
487 cmp.eq r12,r12
488 add.cs.f DBL0L,DBL0L,1
489 j_s.d [blink]
490 add.cs DBL0H,DBL0H,1
491
492 .balign 4
493.Large_shift_dbl0:
494 brhs r8,55,.Lret_dbl1
495 bmsk_s DBL0H,DBL0H,19
496 brne r6,0,.Lno_denorm_large_shift_dbl0
497 add_s r12,r12,1
498 brne.d r8,33,.Lfixed_denorm_large_shift_dbl0
499 sub r8,r8,1
500 bset_s DBL0H,DBL0H,20
501.Lshift32_dbl0:
502 mov_s r12,DBL0L
503 mov_s DBL0L,DBL0H
504 brlt.d r10,0,.Lrsub
505 mov_s DBL0H,0
506 b_s .Ladd_dbl1_gt
507
508 .balign 4
509.Lno_denorm_large_shift_dbl0:
510 breq.d r8,32,.Lshift32_dbl0
511 bset_l DBL0H,DBL0H,20
512.Lfixed_denorm_large_shift_dbl0:
513 asl r4,DBL0H,r12
514 lsr r5,DBL0L,r8
515 asl.f 0,DBL0L,r12
516 lsr DBL0L,DBL0H,r8
517 or r12,r4,r5
518 tst.eq r12,1
519 or.ne r12,r12,2
520 brlt.d r10,0,.Lrsub
521 mov_s DBL0H,0
522 b_l .Ladd_dbl1_gt
523 ENDFUNC(__adddf3)
524 ENDFUNC(__subdf3)