]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgcc/config/arc/ieee-754/arc600-mul64/divsf3.S
Update copyright years.
[thirdparty/gcc.git] / libgcc / config / arc / ieee-754 / arc600-mul64 / divsf3.S
1 /* Copyright (C) 2008-2021 Free Software Foundation, Inc.
2 Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
3 on behalf of Synopsys Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 /*
27 - calculate 15..18 bit inverse using a table of approximating polynoms.
28 precision is higher for polynoms used to evaluate input with larger
29 value.
30 - do one newton-raphson iteration step to double the precision,
31 then multiply this with the divisor
32 -> more time to decide if dividend is subnormal
33 - the worst error propagation is on the side of the value range
34 with the least initial defect, thus giving us about 30 bits precision.
35 */
36 #include "../arc-ieee-754.h"
37
38 #if 0 /* DEBUG */
39 .global __divsf3
40 FUNC(__divsf3)
41 .balign 4
42 __divsf3:
43 push_s blink
44 push_s r1
45 bl.d __divsf3_c
46 push_s r0
47 ld_s r1,[sp,4]
48 st_s r0,[sp,4]
49 bl.d __divsf3_asm
50 pop_s r0
51 pop_s r1
52 pop_s blink
53 cmp r0,r1
54 #if 1
55 bne abort
56 jeq_s [blink]
57 b abort
58 #else
59 bne abort
60 j_s [blink]
61 #endif
62 ENDFUNC(__divsf3)
63 #define __divsf3 __divsf3_asm
64 #endif /* DEBUG */
65
66 FUNC(__divsf3)
67 .balign 4
68 .Ldivtab:
69 .long 0xfc0ffff0
70 .long 0xf46ffefd
71 .long 0xed1ffd2a
72 .long 0xe627fa8e
73 .long 0xdf7ff73b
74 .long 0xd917f33b
75 .long 0xd2f7eea3
76 .long 0xcd1fe986
77 .long 0xc77fe3e7
78 .long 0xc21fdddb
79 .long 0xbcefd760
80 .long 0xb7f7d08c
81 .long 0xb32fc960
82 .long 0xae97c1ea
83 .long 0xaa27ba26
84 .long 0xa5e7b22e
85 .long 0xa1cfa9fe
86 .long 0x9ddfa1a0
87 .long 0x9a0f990c
88 .long 0x9667905d
89 .long 0x92df878a
90 .long 0x8f6f7e84
91 .long 0x8c27757e
92 .long 0x88f76c54
93 .long 0x85df630c
94 .long 0x82e759c5
95 .long 0x8007506d
96 .long 0x7d3f470a
97 .long 0x7a8f3da2
98 .long 0x77ef341e
99 .long 0x756f2abe
100 .long 0x72f7212d
101 .long 0x709717ad
102 .long 0x6e4f0e44
103 .long 0x6c1704d6
104 .long 0x69e6fb44
105 .long 0x67cef1d7
106 .long 0x65c6e872
107 .long 0x63cedf18
108 .long 0x61e6d5cd
109 .long 0x6006cc6d
110 .long 0x5e36c323
111 .long 0x5c76b9f3
112 .long 0x5abeb0b7
113 .long 0x5916a79b
114 .long 0x57769e77
115 .long 0x55de954d
116 .long 0x54568c4e
117 .long 0x52d6834d
118 .long 0x51667a7f
119 .long 0x4ffe71b5
120 .long 0x4e9e68f1
121 .long 0x4d466035
122 .long 0x4bf65784
123 .long 0x4aae4ede
124 .long 0x496e4646
125 .long 0x48363dbd
126 .long 0x47063547
127 .long 0x45de2ce5
128 .long 0x44be2498
129 .long 0x43a61c64
130 .long 0x4296144a
131 .long 0x41860c0e
132 .long 0x407e03ee
133 .L7f800000:
134 .long 0x7f800000
135 .balign 4
136 .global __divsf3_support
137 __divsf3_support:
138 .Linf_NaN:
139 bclr.f 0,r0,31 ; 0/0 -> NaN
140 xor_s r0,r0,r1
141 bmsk r1,r0,30
142 bic_s r0,r0,r1
143 sub.eq r0,r0,1
144 j_s.d [blink]
145 or r0,r0,r9
146 .Lret0:
147 xor_s r0,r0,r1
148 bmsk r1,r0,30
149 j_s.d [blink]
150 bic_s r0,r0,r1
151 /* N.B. the spacing between divtab and the sub3 to get its address must
152 be a multiple of 8. */
153 __divsf3:
154 lsr r2,r1,17
155 sub3 r3,pcl,37 ; (.-.Ldivtab) >> 3
156 bmsk_s r2,r2,5
157 ld.as r5,[r3,r2]
158 asl r4,r1,9
159 ld.as r9,[pcl,-13]; [pcl,(-((.-.L7f800000) >> 2))] ; 0x7f800000
160 mulu64 r5,r4
161 and.f r11,r1,r9
162 asl r6,r1,8
163 bset r6,r6,31
164 beq.d .Ldenorm_fp1
165 asl r5,r5,13
166 breq.d r11,r9,.Linf_nan_fp1
167 and.f r2,r0,r9
168 sub r7,r5,mhi
169 mulu64 r7,r6
170 beq.d .Ldenorm_fp0
171 asl r12,r0,8
172 breq.d r2,r9,.Linf_nan_fp0
173 mulu64 mhi,r7
174 .Lpast_denorm_fp1:
175 bset r3,r12,31
176 .Lpast_denorm_fp0:
177 cmp_s r3,r6
178 lsr.cc r3,r3,1
179 add_s r2,r2, /* wait for immediate */ \
180 0x3f000000
181 sub r7,r7,mhi ; u1.31 inverse, about 30 bit
182 mulu64 r3,r7
183 sbc r2,r2,r11
184 xor.f 0,r0,r1
185 and r0,r2,r9
186 bclr r3,r9,23 ; 0x7f000000
187 brhs.d r2,r3,.Linf_denorm
188 bxor.mi r0,r0,31
189 .Lpast_denorm:
190 add r3,mhi,0x22 ; round to nearest or higher
191 tst r3,0x3c ; check if rounding was unsafe
192 lsr r3,r3,6
193 jne.d [blink] ; return if rounding was safe.
194 add_s r0,r0,r3
195 /* work out exact rounding if we fall through here. */
196 /* We know that the exact result cannot be represented in single
197 precision. Find the mid-point between the two nearest
198 representable values, multiply with the divisor, and check if
199 the result is larger than the dividend. */
200 add_s r3,r3,r3
201 sub_s r3,r3,1
202 mulu64 r3,r6
203 asr.f 0,r0,1 ; for round-to-even in case this is a denorm
204 rsub r2,r9,25
205 asl_s r12,r12,r2
206 sub.f 0,r12,mlo
207 j_s.d [blink]
208 sub.mi r0,r0,1
209 .Linf_nan_fp1:
210 lsr_s r0,r0,31
211 bmsk.f 0,r1,22
212 asl_s r0,r0,31
213 bne_s 0f ; inf/inf -> nan
214 brne r2,r9,.Lsigned0 ; x/inf -> 0, but x/nan -> nan
215 0: j_s.d [blink]
216 mov r0,-1
217 .Lsigned0:
218 .Linf_nan_fp0:
219 tst_s r1,r1
220 j_s.d [blink]
221 bxor.mi r0,r0,31
222 .balign 4
223 .global __divsf3
224 /* For denormal results, it is possible that an exact result needs
225 rounding, and thus the round-to-even rule has to come into play. */
226 .Linf_denorm:
227 brlo r2,0xc0000000,.Linf
228 .Ldenorm:
229 asr_s r2,r2,23
230 bic r0,r0,r9
231 neg r9,r2
232 brlo.d r9,25,.Lpast_denorm
233 lsr r3,mlo,r9
234 /* Fall through: return +- 0 */
235 j_s [blink]
236 .Linf:
237 j_s.d [blink]
238 or r0,r0,r9
239 .balign 4
240 .Ldenorm_fp1:
241 bclr r6,r6,31
242 norm.f r12,r6 ; flag for x/0 -> Inf check
243 add r6,r6,r6
244 rsub r5,r12,16
245 ror r5,r1,r5
246 asl r6,r6,r12
247 bmsk r5,r5,5
248 ld.as r5,[r3,r5]
249 add r4,r6,r6
250 ; load latency
251 mulu64 r5,r4
252 bic.ne.f 0, \
253 0x60000000,r0 ; large number / denorm -> Inf
254 asl r5,r5,13
255 sub r7,r5,mhi
256 beq.d .Linf_NaN
257 mulu64 r7,r6
258 asl_s r12,r12,23
259 and.f r2,r0,r9
260 add_s r2,r2,r12
261 asl r12,r0,8
262 bne.d .Lpast_denorm_fp1
263 .Ldenorm_fp0: mulu64 mhi,r7
264 bclr r12,r12,31
265 norm.f r3,r12 ; flag for 0/x -> 0 check
266 bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0
267 beq_s .Lret0
268 asl_s r12,r12,r3
269 asl_s r3,r3,23
270 add_s r12,r12,r12
271 add r11,r11,r3
272 b.d .Lpast_denorm_fp0
273 mov_s r3,r12
274 ENDFUNC(__divsf3)