]>
Commit | Line | Data |
---|---|---|
8d9254fc | 1 | /* Copyright (C) 2008-2020 Free Software Foundation, Inc. |
d38a64b4 JR |
2 | Contributor: Joern Rennecke <joern.rennecke@embecosm.com> |
3 | on behalf of Synopsys Inc. | |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it under | |
8 | the terms of the GNU General Public License as published by the Free | |
9 | Software Foundation; either version 3, or (at your option) any later | |
10 | version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
16 | ||
17 | Under Section 7 of GPL version 3, you are granted additional | |
18 | permissions described in the GCC Runtime Library Exception, version | |
19 | 3.1, as published by the Free Software Foundation. | |
20 | ||
21 | You should have received a copy of the GNU General Public License and | |
22 | a copy of the GCC Runtime Library Exception along with this program; | |
23 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
24 | <http://www.gnu.org/licenses/>. */ | |
25 | ||
26 | #include "../arc-ieee-754.h" | |
27 | ||
28 | #if 0 /* DEBUG */ | |
29 | .global __muldf3 | |
30 | .balign 4 | |
31 | __muldf3: | |
32 | push_s blink | |
33 | push_s r2 | |
34 | push_s r3 | |
35 | push_s r0 | |
36 | bl.d __muldf3_c | |
37 | push_s r1 | |
38 | ld_s r2,[sp,12] | |
39 | ld_s r3,[sp,8] | |
40 | st_s r0,[sp,12] | |
41 | st_s r1,[sp,8] | |
42 | pop_s r1 | |
43 | bl.d __muldf3_asm | |
44 | pop_s r0 | |
45 | pop_s r3 | |
46 | pop_s r2 | |
47 | pop_s blink | |
48 | cmp r0,r2 | |
49 | cmp.eq r1,r3 | |
50 | jeq_s [blink] | |
51 | and r12,DBL0H,DBL1H | |
52 | bic.f 0,0x7ff80000,r12 ; both NaN -> OK | |
53 | jeq_s [blink] | |
54 | b abort | |
55 | #define __muldf3 __muldf3_asm | |
56 | #endif /* DEBUG */ | |
57 | ||
58 | __muldf3_support: /* This label makes debugger output saner. */ | |
59 | .balign 4 | |
60 | FUNC(__muldf3) | |
61 | .Ldenorm_2: | |
62 | breq.d DBL1L,0,.Lret0_2 ; 0 input -> 0 output | |
63 | norm.f r12,DBL1L | |
64 | mov.mi r12,21 | |
65 | add.pl r12,r12,22 | |
66 | neg r11,r12 | |
67 | asl_s r12,r12,20 | |
68 | lsr.f DBL1H,DBL1L,r11 | |
69 | ror DBL1L,DBL1L,r11 | |
70 | sub_s DBL0H,DBL0H,r12 | |
71 | mov.eq DBL1H,DBL1L | |
72 | sub_l DBL1L,DBL1L,DBL1H | |
73 | /* Fall through. */ | |
74 | .global __muldf3 | |
75 | .balign 4 | |
76 | __muldf3: | |
77 | mulu64 DBL0L,DBL1L | |
78 | ld.as r9,[pcl,0x68] ; ((.L7ff00000-.+2)/4)] | |
79 | bmsk r6,DBL0H,19 | |
80 | bset r6,r6,20 | |
81 | and r11,DBL0H,r9 | |
82 | breq.d r11,0,.Ldenorm_dbl0 | |
83 | and r12,DBL1H,r9 | |
84 | breq.d r12,0,.Ldenorm_dbl1 | |
85 | mov r8,mlo | |
86 | mov r4,mhi | |
87 | mulu64 r6,DBL1L | |
88 | breq.d r11,r9,.Linf_nan | |
89 | bmsk r10,DBL1H,19 | |
90 | breq.d r12,r9,.Linf_nan | |
91 | bset r10,r10,20 | |
92 | add.f r4,r4,mlo | |
93 | adc r5,mhi,0 | |
94 | mulu64 r10,DBL0L | |
95 | add_s r12,r12,r11 ; add exponents | |
96 | add.f r4,r4,mlo | |
97 | adc r5,r5,mhi | |
98 | mulu64 r6,r10 | |
99 | tst r8,r8 | |
100 | bclr r8,r9,30 ; 0x3ff00000 | |
101 | bset.ne r4,r4,0 ; put least significant word into sticky bit | |
102 | bclr r6,r9,20 ; 0x7fe00000 | |
103 | add.f r5,r5,mlo | |
104 | adc r7,mhi,0 ; fraction product in r7:r5:r4 | |
105 | lsr.f r10,r7,9 | |
106 | rsub.eq r8,r8,r9 ; 0x40000000 | |
107 | sub r12,r12,r8 ; subtract bias + implicit 1 | |
108 | brhs.d r12,r6,.Linf_denorm | |
109 | rsub r10,r10,12 | |
110 | .Lshift_frac: | |
111 | neg r8,r10 | |
112 | asl r6,r4,r10 | |
113 | lsr DBL0L,r4,r8 | |
114 | add.f 0,r6,r6 | |
115 | btst.eq DBL0L,0 | |
116 | cmp.eq r4,r4 ; round to nearest / round to even | |
117 | asl r4,r5,r10 | |
118 | lsr r5,r5,r8 | |
119 | adc.f DBL0L,DBL0L,r4 | |
120 | xor.f 0,DBL0H,DBL1H | |
121 | asl r7,r7,r10 | |
122 | add_s r12,r12,r5 | |
123 | adc DBL0H,r12,r7 | |
124 | j_s.d [blink] | |
125 | bset.mi DBL0H,DBL0H,31 | |
126 | ||
127 | /* N.B. This is optimized for ARC700. | |
128 | ARC600 has very different scheduling / instruction selection criteria. */ | |
129 | ||
130 | /* If one number is denormal, subtract some from the exponent of the other | |
131 | one (if the other exponent is too small, return 0), and normalize the | |
132 | denormal. Then re-run the computation. */ | |
133 | .Lret0_2: | |
134 | lsr_s DBL0H,DBL0H,31 | |
135 | asl_s DBL0H,DBL0H,31 | |
136 | j_s.d [blink] | |
137 | mov_s DBL0L,0 | |
138 | .balign 4 | |
139 | .Ldenorm_dbl0: | |
140 | mov_s r12,DBL0L | |
141 | mov_s DBL0L,DBL1L | |
142 | mov_s DBL1L,r12 | |
143 | mov_s r12,DBL0H | |
144 | mov_s DBL0H,DBL1H | |
145 | mov_s DBL1H,r12 | |
146 | and r11,DBL0H,r9 | |
147 | .Ldenorm_dbl1: | |
148 | brhs r11,r9,.Linf_nan | |
149 | brhs 0x3ca00001,r11,.Lret0 | |
150 | sub_s DBL0H,DBL0H,DBL1H | |
151 | bmsk.f DBL1H,DBL1H,30 | |
152 | add_s DBL0H,DBL0H,DBL1H | |
153 | beq.d .Ldenorm_2 | |
154 | norm r12,DBL1H | |
155 | sub_s r12,r12,10 | |
156 | asl r5,r12,20 | |
157 | asl_s DBL1H,DBL1H,r12 | |
158 | sub DBL0H,DBL0H,r5 | |
159 | neg r5,r12 | |
160 | lsr r6,DBL1L,r5 | |
161 | asl_s DBL1L,DBL1L,r12 | |
162 | b.d __muldf3 | |
163 | add_s DBL1H,DBL1H,r6 | |
164 | ||
165 | .Lret0: xor_s DBL0H,DBL0H,DBL1H | |
166 | bclr DBL1H,DBL0H,31 | |
167 | xor_s DBL0H,DBL0H,DBL1H | |
168 | j_s.d [blink] | |
169 | mov_s DBL0L,0 | |
170 | ||
171 | .balign 4 | |
172 | .Linf_nan: | |
173 | bclr r12,DBL1H,31 | |
174 | xor_s DBL1H,DBL1H,DBL0H | |
175 | bclr_s DBL0H,DBL0H,31 | |
176 | max r8,DBL0H,r12 ; either NaN -> NaN ; otherwise inf | |
177 | or.f 0,DBL0H,DBL0L | |
178 | mov_s DBL0L,0 | |
179 | or.ne.f DBL1L,DBL1L,r12 | |
180 | not_s DBL0H,DBL0L ; inf * 0 -> NaN | |
181 | mov.ne DBL0H,r8 | |
182 | tst_s DBL1H,DBL1H | |
183 | j_s.d [blink] | |
184 | bset.mi DBL0H,DBL0H,31 | |
185 | ||
a07c5b47 | 186 | /* We have checked for infinity / NaN input before, and transformed |
d38a64b4 JR |
187 | denormalized inputs into normalized inputs. Thus, the worst case |
188 | exponent overflows are: | |
189 | 1 + 1 - 0x400 == 0xc02 : maximum underflow | |
190 | 0x7fe + 0x7fe - 0x3ff == 0xbfd ; maximum overflow | |
191 | N.B. 0x7e and 0x7f are also values for overflow. | |
192 | ||
193 | If (r12 <= -54), we have an underflow to zero. */ | |
194 | .balign 4 | |
195 | .Linf_denorm: | |
196 | lsr r6,r12,28 | |
197 | brlo.d r6,0xc,.Linf | |
198 | asr r6,r12,20 | |
199 | add.f r10,r10,r6 | |
200 | brgt.d r10,0,.Lshift_frac | |
201 | mov_s r12,0 | |
202 | beq.d .Lround_frac | |
203 | add r10,r10,32 | |
204 | .Lshift32_frac: | |
205 | tst r4,r4 | |
206 | mov r4,r5 | |
207 | bset.ne r4,r4,1 | |
208 | mov r5,r7 | |
209 | brge.d r10,1,.Lshift_frac | |
210 | mov r7,0 | |
211 | breq.d r10,0,.Lround_frac | |
212 | add r10,r10,32 | |
213 | brgt r10,21,.Lshift32_frac | |
214 | b_s .Lret0 | |
215 | ||
216 | .Lround_frac: | |
217 | add.f 0,r4,r4 | |
218 | btst.eq r5,0 | |
219 | mov_s DBL0L,r5 | |
220 | mov_s DBL0H,r7 | |
221 | adc.eq.f DBL0L,DBL0L,0 | |
222 | j_s.d [blink] | |
223 | adc.eq DBL0H,DBL0H,0 | |
224 | ||
225 | .Linf: mov_s DBL0L,0 | |
226 | xor.f DBL1H,DBL1H,DBL0H | |
227 | mov_s DBL0H,r9 | |
228 | j_s.d [blink] | |
229 | bset.mi DBL0H,DBL0H,31 | |
230 | ENDFUNC(__muldf3) | |
231 | ||
232 | .balign 4 | |
233 | .L7ff00000: | |
234 | .long 0x7ff00000 |