]>
Commit | Line | Data |
---|---|---|
0b85d816 HPN |
1 | /* Signed and unsigned multiplication and division and modulus for CRIS. |
2 | Contributed by Axis Communications. | |
3 | Written by Hans-Peter Nilsson <hp@axis.se>, c:a 1992. | |
4 | ||
83ffe9cd | 5 | Copyright (C) 1998-2023 Free Software Foundation, Inc. |
0b85d816 HPN |
6 | |
7 | This file is part of GCC. | |
8 | ||
9 | GCC is free software; you can redistribute it and/or modify it | |
10 | under the terms of the GNU General Public License as published by the | |
748086b7 | 11 | Free Software Foundation; either version 3, or (at your option) any |
0b85d816 HPN |
12 | later version. |
13 | ||
0b85d816 HPN |
14 | This file is distributed in the hope that it will be useful, but |
15 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 | General Public License for more details. | |
18 | ||
748086b7 JJ |
19 | Under Section 7 of GPL version 3, you are granted additional |
20 | permissions described in the GCC Runtime Library Exception, version | |
21 | 3.1, as published by the Free Software Foundation. | |
22 | ||
23 | You should have received a copy of the GNU General Public License and | |
24 | a copy of the GCC Runtime Library Exception along with this program; | |
25 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
26 | <http://www.gnu.org/licenses/>. */ | |
0b85d816 HPN |
27 | |
28 | ||
29 | /* Note that we provide prototypes for all "const" functions, to attach | |
30 | the const attribute. This is necessary in 2.7.2 - adding the | |
31 | attribute to the function *definition* is a syntax error. | |
32 | This did not work with e.g. 2.1; back then, the return type had to | |
33 | be "const". */ | |
34 | ||
35 | #include "config.h" | |
36 | ||
37 | #if defined (__CRIS_arch_version) && __CRIS_arch_version >= 3 | |
e636e508 | 38 | #define LZ(v) __builtin_clz (v) |
0b85d816 HPN |
39 | #endif |
40 | ||
0e499e75 HPN |
41 | /* In (at least) the 4.7 series, GCC doesn't automatically choose the |
42 | most optimal strategy, possibly related to insufficient modelling of | |
43 | delay-slot costs. */ | |
44 | #if defined (__CRIS_arch_version) && __CRIS_arch_version >= 10 | |
45 | #define SIGNMULT(s, a) ((s) * (a)) /* Cheap multiplication, better than branch. */ | |
46 | #else | |
47 | #define SIGNMULT(s, a) ((s) < 0 ? -(a) : (a)) /* Branches are still better. */ | |
48 | #endif | |
0b85d816 HPN |
49 | |
50 | #if defined (L_udivsi3) || defined (L_divsi3) || defined (L_umodsi3) \ | |
51 | || defined (L_modsi3) | |
52 | /* Result type of divmod worker function. */ | |
53 | struct quot_rem | |
54 | { | |
55 | long quot; | |
56 | long rem; | |
57 | }; | |
58 | ||
59 | /* This is the worker function for div and mod. It is inlined into the | |
01ad6816 HPN |
60 | respective library function. Parameter A must have bit 31 == 0. */ |
61 | ||
0b85d816 | 62 | static __inline__ struct quot_rem |
807b20b0 HPN |
63 | do_31div (unsigned long a, unsigned long b) |
64 | __attribute__ ((__const__, __always_inline__)); | |
0b85d816 HPN |
65 | |
66 | static __inline__ struct quot_rem | |
67 | do_31div (unsigned long a, unsigned long b) | |
68 | { | |
69 | /* Adjust operands and result if a is 31 bits. */ | |
70 | long extra = 0; | |
71 | int quot_digits = 0; | |
72 | ||
73 | if (b == 0) | |
74 | { | |
75 | struct quot_rem ret; | |
76 | ret.quot = 0xffffffff; | |
77 | ret.rem = 0xffffffff; | |
78 | return ret; | |
79 | } | |
80 | ||
81 | if (a < b) | |
82 | return (struct quot_rem) { 0, a }; | |
83 | ||
84 | #ifdef LZ | |
85 | if (b <= a) | |
86 | { | |
87 | quot_digits = LZ (b) - LZ (a); | |
88 | quot_digits += (a >= (b << quot_digits)); | |
89 | b <<= quot_digits; | |
90 | } | |
91 | #else | |
92 | while (b <= a) | |
93 | { | |
94 | b <<= 1; | |
95 | quot_digits++; | |
96 | } | |
97 | #endif | |
98 | ||
99 | /* Is a 31 bits? Note that bit 31 is handled by the caller. */ | |
100 | if (a & 0x40000000) | |
101 | { | |
102 | /* Then make b:s highest bit max 0x40000000, because it must have | |
103 | been 0x80000000 to be 1 bit higher than a. */ | |
104 | b >>= 1; | |
105 | ||
106 | /* Adjust a to be maximum 0x3fffffff, i.e. two upper bits zero. */ | |
107 | if (a >= b) | |
108 | { | |
109 | a -= b; | |
110 | extra = 1 << (quot_digits - 1); | |
111 | } | |
112 | else | |
113 | { | |
114 | a -= b >> 1; | |
115 | ||
116 | /* Remember that we adjusted a by subtracting b * 2 ** Something. */ | |
117 | extra = 1 << quot_digits; | |
118 | } | |
119 | ||
120 | /* The number of quotient digits will be one less, because | |
121 | we just adjusted b. */ | |
122 | quot_digits--; | |
123 | } | |
124 | ||
125 | /* Now do the division part. */ | |
126 | ||
127 | /* Subtract b and add ones to the right when a >= b | |
128 | i.e. "a - (b - 1) == (a - b) + 1". */ | |
129 | b--; | |
130 | ||
463f6499 HPN |
131 | #define DS __asm__ ("dstep %2,%0" : "=r" (a) : "0" (a), "r" (b)); \ |
132 | __attribute__ ((__fallthrough__)) | |
0b85d816 HPN |
133 | |
134 | switch (quot_digits) | |
135 | { | |
136 | case 32: DS; case 31: DS; case 30: DS; case 29: DS; | |
137 | case 28: DS; case 27: DS; case 26: DS; case 25: DS; | |
138 | case 24: DS; case 23: DS; case 22: DS; case 21: DS; | |
139 | case 20: DS; case 19: DS; case 18: DS; case 17: DS; | |
140 | case 16: DS; case 15: DS; case 14: DS; case 13: DS; | |
141 | case 12: DS; case 11: DS; case 10: DS; case 9: DS; | |
142 | case 8: DS; case 7: DS; case 6: DS; case 5: DS; | |
143 | case 4: DS; case 3: DS; case 2: DS; case 1: DS; | |
144 | case 0:; | |
145 | } | |
146 | ||
147 | { | |
148 | struct quot_rem ret; | |
149 | ret.quot = (a & ((1 << quot_digits) - 1)) + extra; | |
150 | ret.rem = a >> quot_digits; | |
151 | return ret; | |
152 | } | |
153 | } | |
154 | ||
01ad6816 | 155 | #ifdef L_udivsi3 |
0b85d816 | 156 | unsigned long |
01ad6816 | 157 | __Udiv (unsigned long a, unsigned long b) __attribute__ ((__const__)); |
0b85d816 | 158 | |
0b85d816 HPN |
159 | unsigned long |
160 | __Udiv (unsigned long a, unsigned long b) | |
161 | { | |
162 | long extra = 0; | |
163 | ||
164 | /* Adjust operands and result, if a and/or b is 32 bits. */ | |
165 | /* Effectively: b & 0x80000000. */ | |
166 | if ((long) b < 0) | |
167 | return a >= b; | |
168 | ||
169 | /* Effectively: a & 0x80000000. */ | |
170 | if ((long) a < 0) | |
171 | { | |
172 | int tmp = 0; | |
173 | ||
174 | if (b == 0) | |
175 | return 0xffffffff; | |
176 | #ifdef LZ | |
177 | tmp = LZ (b); | |
178 | #else | |
179 | for (tmp = 31; (((long) b & (1 << tmp)) == 0); tmp--) | |
180 | ; | |
181 | ||
182 | tmp = 31 - tmp; | |
183 | #endif | |
184 | ||
185 | if ((b << tmp) > a) | |
186 | { | |
187 | extra = 1 << (tmp-1); | |
188 | a -= b << (tmp - 1); | |
189 | } | |
190 | else | |
191 | { | |
192 | extra = 1 << tmp; | |
193 | a -= b << tmp; | |
194 | } | |
195 | } | |
196 | ||
197 | return do_31div (a, b).quot+extra; | |
198 | } | |
01ad6816 | 199 | #endif /* L_udivsi3 */ |
0b85d816 HPN |
200 | |
201 | #ifdef L_divsi3 | |
202 | long | |
203 | __Div (long a, long b) __attribute__ ((__const__)); | |
204 | ||
205 | long | |
206 | __Div (long a, long b) | |
207 | { | |
01ad6816 HPN |
208 | long extra = 0; |
209 | long sign = (b < 0) ? -1 : 1; | |
0e499e75 | 210 | long res; |
01ad6816 HPN |
211 | |
212 | /* We need to handle a == -2147483648 as expected and must while | |
213 | doing that avoid producing a sequence like "abs (a) < 0" as GCC | |
214 | may optimize out the test. That sequence may not be obvious as | |
215 | we call inline functions. Testing for a being negative and | |
216 | handling (presumably much rarer than positive) enables us to get | |
217 | a bit of optimization for an (accumulated) reduction of the | |
218 | penalty of the 0x80000000 special-case. */ | |
219 | if (a < 0) | |
220 | { | |
221 | sign = -sign; | |
0b85d816 | 222 | |
01ad6816 HPN |
223 | if ((a & 0x7fffffff) == 0) |
224 | { | |
225 | /* We're at 0x80000000. Tread carefully. */ | |
0e499e75 | 226 | a -= SIGNMULT (sign, b); |
01ad6816 HPN |
227 | extra = sign; |
228 | } | |
229 | a = -a; | |
230 | } | |
0b85d816 | 231 | |
0e499e75 HPN |
232 | res = do_31div (a, __builtin_labs (b)).quot; |
233 | return SIGNMULT (sign, res) + extra; | |
0b85d816 HPN |
234 | } |
235 | #endif /* L_divsi3 */ | |
0b85d816 HPN |
236 | |
237 | ||
01ad6816 | 238 | #ifdef L_umodsi3 |
0b85d816 | 239 | unsigned long |
01ad6816 | 240 | __Umod (unsigned long a, unsigned long b) __attribute__ ((__const__)); |
0b85d816 | 241 | |
0b85d816 HPN |
242 | unsigned long |
243 | __Umod (unsigned long a, unsigned long b) | |
244 | { | |
245 | /* Adjust operands and result if a and/or b is 32 bits. */ | |
246 | if ((long) b < 0) | |
247 | return a >= b ? a - b : a; | |
248 | ||
249 | if ((long) a < 0) | |
250 | { | |
251 | int tmp = 0; | |
252 | ||
253 | if (b == 0) | |
254 | return a; | |
255 | #ifdef LZ | |
256 | tmp = LZ (b); | |
257 | #else | |
258 | for (tmp = 31; (((long) b & (1 << tmp)) == 0); tmp--) | |
259 | ; | |
260 | tmp = 31 - tmp; | |
261 | #endif | |
262 | ||
263 | if ((b << tmp) > a) | |
264 | { | |
265 | a -= b << (tmp - 1); | |
266 | } | |
267 | else | |
268 | { | |
269 | a -= b << tmp; | |
270 | } | |
271 | } | |
272 | ||
273 | return do_31div (a, b).rem; | |
274 | } | |
01ad6816 | 275 | #endif /* L_umodsi3 */ |
0b85d816 HPN |
276 | |
277 | #ifdef L_modsi3 | |
278 | long | |
279 | __Mod (long a, long b) __attribute__ ((__const__)); | |
280 | ||
281 | long | |
282 | __Mod (long a, long b) | |
283 | { | |
01ad6816 | 284 | long sign = 1; |
0e499e75 | 285 | long res; |
01ad6816 HPN |
286 | |
287 | /* We need to handle a == -2147483648 as expected and must while | |
288 | doing that avoid producing a sequence like "abs (a) < 0" as GCC | |
289 | may optimize out the test. That sequence may not be obvious as | |
290 | we call inline functions. Testing for a being negative and | |
291 | handling (presumably much rarer than positive) enables us to get | |
292 | a bit of optimization for an (accumulated) reduction of the | |
293 | penalty of the 0x80000000 special-case. */ | |
294 | if (a < 0) | |
295 | { | |
296 | sign = -1; | |
297 | if ((a & 0x7fffffff) == 0) | |
298 | /* We're at 0x80000000. Tread carefully. */ | |
299 | a += __builtin_labs (b); | |
300 | a = -a; | |
301 | } | |
0b85d816 | 302 | |
0e499e75 HPN |
303 | res = do_31div (a, __builtin_labs (b)).rem; |
304 | return SIGNMULT (sign, res); | |
0b85d816 HPN |
305 | } |
306 | #endif /* L_modsi3 */ | |
0b85d816 HPN |
307 | #endif /* L_udivsi3 || L_divsi3 || L_umodsi3 || L_modsi3 */ |
308 | ||
309 | /* | |
310 | * Local variables: | |
311 | * eval: (c-set-style "gnu") | |
312 | * indent-tabs-mode: t | |
313 | * End: | |
314 | */ |