]>
Commit | Line | Data |
---|---|---|
a5544970 | 1 | /* Copyright (C) 2008-2019 Free Software Foundation, Inc. |
d38a64b4 JR |
2 | Contributor: Joern Rennecke <joern.rennecke@embecosm.com> |
3 | on behalf of Synopsys Inc. | |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it under | |
8 | the terms of the GNU General Public License as published by the Free | |
9 | Software Foundation; either version 3, or (at your option) any later | |
10 | version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
16 | ||
17 | Under Section 7 of GPL version 3, you are granted additional | |
18 | permissions described in the GCC Runtime Library Exception, version | |
19 | 3.1, as published by the Free Software Foundation. | |
20 | ||
21 | You should have received a copy of the GNU General Public License and | |
22 | a copy of the GCC Runtime Library Exception along with this program; | |
23 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
24 | <http://www.gnu.org/licenses/>. */ | |
25 | ||
26 | #include "../arc-ieee-754.h" | |
27 | ||
28 | #if 0 /* DEBUG */ | |
29 | .global __muldf3 | |
30 | .balign 4 | |
31 | __muldf3: | |
32 | push_s blink | |
33 | push_s r2 | |
34 | push_s r3 | |
35 | push_s r0 | |
36 | bl.d __muldf3_c | |
37 | push_s r1 | |
38 | ld_s r2,[sp,12] | |
39 | ld_s r3,[sp,8] | |
40 | st_s r0,[sp,12] | |
41 | st_s r1,[sp,8] | |
42 | pop_s r1 | |
43 | bl.d __muldf3_asm | |
44 | pop_s r0 | |
45 | pop_s r3 | |
46 | pop_s r2 | |
47 | pop_s blink | |
48 | cmp r0,r2 | |
49 | cmp.eq r1,r3 | |
50 | jeq_s [blink] | |
51 | b abort | |
52 | #define __muldf3 __muldf3_asm | |
53 | #endif /* DEBUG */ | |
54 | ||
55 | __muldf3_support: /* This label makes debugger output saner. */ | |
56 | .balign 4 | |
57 | FUNC(__muldf3) | |
58 | .Ldenorm_2: | |
59 | breq.d DBL1L,0,.Lret0_2 ; 0 input -> 0 output | |
60 | norm.f r12,DBL1L | |
61 | mov.mi r12,21 | |
62 | add.pl r12,r12,22 | |
63 | neg r11,r12 | |
64 | asl_s r12,r12,20 | |
65 | lsr.f DBL1H,DBL1L,r11 | |
66 | ror DBL1L,DBL1L,r11 | |
67 | sub_s DBL0H,DBL0H,r12 | |
68 | mov.eq DBL1H,DBL1L | |
69 | sub_l DBL1L,DBL1L,DBL1H | |
70 | /* Fall through. */ | |
71 | .global __muldf3 | |
72 | .balign 4 | |
73 | __muldf3: | |
74 | mululw 0,DBL0L,DBL1L | |
75 | machulw r4,DBL0L,DBL1L | |
76 | ld.as r9,[pcl,0x67] ; ((.L7ff00000-.+2)/4)] | |
77 | bmsk r6,DBL0H,19 | |
78 | bset r6,r6,20 | |
79 | mov r8,acc2 | |
80 | mululw 0,r4,1 | |
81 | and r11,DBL0H,r9 | |
82 | breq.d r11,0,.Ldenorm_dbl0 | |
83 | and r12,DBL1H,r9 | |
84 | breq.d r12,0,.Ldenorm_dbl1 | |
85 | maclw 0,r6,DBL1L | |
86 | machulw 0,r6,DBL1L | |
87 | breq.d r11,r9,.Linf_nan | |
88 | bmsk r10,DBL1H,19 | |
89 | breq.d r12,r9,.Linf_nan | |
90 | bset r10,r10,20 | |
91 | maclw 0,r10,DBL0L | |
92 | machulw r5,r10,DBL0L | |
93 | add_s r12,r12,r11 ; add exponents | |
94 | mov r4,acc2 | |
95 | mululw 0,r5,1 | |
96 | maclw 0,r6,r10 | |
97 | machulw r7,r6,r10 ; fraction product in r7:acc2:r4:r8 | |
98 | tst r8,r8 | |
99 | bclr r8,r9,30 ; 0x3ff00000 | |
100 | bset.ne r4,r4,0 ; put least significant word into sticky bit | |
101 | bclr r6,r9,20 ; 0x7fe00000 | |
102 | lsr.f r10,r7,9 | |
103 | rsub.eq r8,r8,r9 ; 0x40000000 | |
104 | sub r12,r12,r8 ; subtract bias + implicit 1 | |
105 | brhs.d r12,r6,.Linf_denorm | |
106 | rsub r10,r10,12 | |
107 | .Lshift_frac: | |
108 | neg r8,r10 | |
109 | asl r6,r4,r10 | |
110 | lsr DBL0L,r4,r8 | |
111 | add.f 0,r6,r6 | |
112 | btst.eq DBL0L,0 | |
113 | cmp.eq r4,r4 ; round to nearest / round to even | |
114 | asl r4,acc2,r10 | |
115 | lsr r5,acc2,r8 | |
116 | adc.f DBL0L,DBL0L,r4 | |
117 | xor.f 0,DBL0H,DBL1H | |
118 | asl r7,r7,r10 | |
119 | add_s r12,r12,r5 | |
120 | adc DBL0H,r12,r7 | |
121 | j_s.d [blink] | |
122 | bset.mi DBL0H,DBL0H,31 | |
123 | ||
124 | /* N.B. This is optimized for ARC700. | |
125 | ARC600 has very different scheduling / instruction selection criteria. */ | |
126 | ||
127 | /* If one number is denormal, subtract some from the exponent of the other | |
128 | one (if the other exponent is too small, return 0), and normalize the | |
129 | denormal. Then re-run the computation. */ | |
130 | .Lret0_2: | |
131 | lsr_s DBL0H,DBL0H,31 | |
132 | asl_s DBL0H,DBL0H,31 | |
133 | j_s.d [blink] | |
134 | mov_s DBL0L,0 | |
135 | .balign 4 | |
136 | .Ldenorm_dbl0: | |
137 | mov_s r12,DBL0L | |
138 | mov_s DBL0L,DBL1L | |
139 | mov_s DBL1L,r12 | |
140 | mov_s r12,DBL0H | |
141 | mov_s DBL0H,DBL1H | |
142 | mov_s DBL1H,r12 | |
143 | and r11,DBL0H,r9 | |
144 | .Ldenorm_dbl1: | |
145 | brhs r11,r9,.Linf_nan | |
146 | brhs 0x3ca00001,r11,.Lret0 | |
147 | sub_s DBL0H,DBL0H,DBL1H | |
148 | bmsk.f DBL1H,DBL1H,30 | |
149 | add_s DBL0H,DBL0H,DBL1H | |
150 | beq.d .Ldenorm_2 | |
151 | norm r12,DBL1H | |
152 | sub_s r12,r12,10 | |
153 | asl r5,r12,20 | |
154 | asl_s DBL1H,DBL1H,r12 | |
155 | sub DBL0H,DBL0H,r5 | |
156 | neg r5,r12 | |
157 | lsr r6,DBL1L,r5 | |
158 | asl_s DBL1L,DBL1L,r12 | |
159 | b.d __muldf3 | |
160 | add_s DBL1H,DBL1H,r6 | |
161 | ||
162 | .Lret0: xor_s DBL0H,DBL0H,DBL1H | |
163 | bclr DBL1H,DBL0H,31 | |
164 | xor_s DBL0H,DBL0H,DBL1H | |
165 | j_s.d [blink] | |
166 | mov_s DBL0L,0 | |
167 | ||
168 | .balign 4 | |
169 | .Linf_nan: | |
170 | bclr r12,DBL1H,31 | |
171 | xor_s DBL1H,DBL1H,DBL0H | |
172 | bclr_s DBL0H,DBL0H,31 | |
173 | max r8,DBL0H,r12 ; either NaN -> NaN ; otherwise inf | |
174 | or.f 0,DBL0H,DBL0L | |
175 | mov_s DBL0L,0 | |
176 | or.ne.f DBL1L,DBL1L,r12 | |
177 | not_s DBL0H,DBL0L ; inf * 0 -> NaN | |
178 | mov.ne DBL0H,r8 | |
179 | tst_s DBL1H,DBL1H | |
180 | j_s.d [blink] | |
181 | bset.mi DBL0H,DBL0H,31 | |
182 | ||
a07c5b47 | 183 | /* We have checked for infinity / NaN input before, and transformed |
d38a64b4 JR |
184 | denormalized inputs into normalized inputs. Thus, the worst case |
185 | exponent overflows are: | |
186 | 1 + 1 - 0x400 == 0xc02 : maximum underflow | |
187 | 0x7fe + 0x7fe - 0x3ff == 0xbfd ; maximum overflow | |
188 | N.B. 0x7e and 0x7f are also values for overflow. | |
189 | ||
190 | If (r12 <= -54), we have an underflow to zero. */ | |
191 | .balign 4 | |
192 | .Linf_denorm: | |
193 | lsr r6,r12,28 | |
194 | brlo.d r6,0xc,.Linf | |
195 | asr r6,r12,20 | |
196 | add.f r10,r10,r6 | |
197 | brgt.d r10,0,.Lshift_frac | |
198 | mov_s r12,0 | |
199 | beq.d .Lround_frac | |
200 | add r10,r10,32 | |
201 | .Lshift32_frac: | |
202 | tst r4,r4 | |
203 | mov r4,acc2 | |
204 | bset.ne r4,r4,1 | |
205 | mululw 0,r7,1 | |
206 | brge.d r10,1,.Lshift_frac | |
207 | mov r7,0 | |
208 | breq.d r10,0,.Lround_frac | |
209 | add r10,r10,32 | |
210 | brgt r10,21,.Lshift32_frac | |
211 | b_s .Lret0 | |
212 | ||
213 | .Lround_frac: | |
214 | add.f 0,r4,r4 | |
215 | btst.eq acc2,0 | |
216 | mov_s DBL0L,acc2 | |
217 | mov_s DBL0H,r7 | |
218 | adc.eq.f DBL0L,DBL0L,0 | |
219 | j_s.d [blink] | |
220 | adc.eq DBL0H,DBL0H,0 | |
221 | ||
222 | .Linf: mov_s DBL0L,0 | |
223 | xor.f DBL1H,DBL1H,DBL0H | |
224 | mov_s DBL0H,r9 | |
225 | j_s.d [blink] | |
226 | bset.mi DBL0H,DBL0H,31 | |
227 | ENDFUNC(__muldf3) | |
228 | ||
229 | .balign 4 | |
230 | .L7ff00000: | |
231 | .long 0x7ff00000 |