]>
Commit | Line | Data |
---|---|---|
dd552284 | 1 | /* Division and remainder routines for Tile. |
7adcbafe | 2 | Copyright (C) 2011-2022 Free Software Foundation, Inc. |
dd552284 WL |
3 | Contributed by Walter Lee (walt@tilera.com) |
4 | ||
5 | This file is free software; you can redistribute it and/or modify it | |
6 | under the terms of the GNU General Public License as published by the | |
7 | Free Software Foundation; either version 3, or (at your option) any | |
8 | later version. | |
9 | ||
10 | This file is distributed in the hope that it will be useful, but | |
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | General Public License for more details. | |
14 | ||
15 | Under Section 7 of GPL version 3, you are granted additional | |
16 | permissions described in the GCC Runtime Library Exception, version | |
17 | 3.1, as published by the Free Software Foundation. | |
18 | ||
19 | You should have received a copy of the GNU General Public License and | |
20 | a copy of the GCC Runtime Library Exception along with this program; | |
21 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
22 | <http://www.gnu.org/licenses/>. */ | |
23 | ||
24 | typedef int int32_t; | |
25 | typedef unsigned uint32_t; | |
26 | typedef long long int64_t; | |
27 | typedef unsigned long long uint64_t; | |
28 | ||
29 | /* Raise signal 8 (SIGFPE) with code 1 (FPE_INTDIV). */ | |
30 | static inline void | |
31 | raise_intdiv (void) | |
32 | { | |
33 | asm ("{ raise; moveli zero, 8 + (1 << 6) }"); | |
34 | } | |
35 | ||
36 | ||
37 | #ifndef __tilegx__ | |
38 | /*__udivsi3 - 32 bit integer unsigned divide */ | |
39 | static inline uint32_t __attribute__ ((always_inline)) | |
40 | __udivsi3_inline (uint32_t dividend, uint32_t divisor) | |
41 | { | |
42 | /* Divide out any power of two factor from dividend and divisor. | |
43 | Note that when dividing by zero the divisor will remain zero, | |
44 | which is all we need to detect that case below. */ | |
45 | const int power_of_two_factor = __insn_ctz (divisor); | |
46 | divisor >>= power_of_two_factor; | |
47 | dividend >>= power_of_two_factor; | |
48 | ||
49 | /* Checks for division by power of two or division by zero. */ | |
50 | if (divisor <= 1) | |
51 | { | |
52 | if (divisor == 0) | |
53 | { | |
54 | raise_intdiv (); | |
55 | return 0; | |
56 | } | |
57 | return dividend; | |
58 | } | |
59 | ||
60 | /* Compute (a / b) by repeatedly finding the largest N | |
61 | such that (b << N) <= a. For each such N, set bit N in the | |
62 | quotient, subtract (b << N) from a, and keep going. Think of this as | |
63 | the reverse of the "shift-and-add" that a multiply does. The values | |
64 | of N are precisely those shift counts. | |
65 | ||
66 | Finding N is easy. First, use clz(b) - clz(a) to find the N | |
67 | that lines up the high bit of (b << N) with the high bit of a. | |
68 | Any larger value of N would definitely make (b << N) > a, | |
69 | which is too big. | |
70 | ||
71 | Then, if (b << N) > a (because it has larger low bits), decrement | |
72 | N by one. This adjustment will definitely make (b << N) less | |
73 | than a, because a's high bit is now one higher than b's. */ | |
74 | ||
75 | /* Precomputing the max_ values allows us to avoid a subtract | |
76 | in the inner loop and just right shift by clz(remainder). */ | |
77 | const int divisor_clz = __insn_clz (divisor); | |
78 | const uint32_t max_divisor = divisor << divisor_clz; | |
79 | const uint32_t max_qbit = 1 << divisor_clz; | |
80 | ||
81 | uint32_t quotient = 0; | |
82 | uint32_t remainder = dividend; | |
83 | ||
84 | while (remainder >= divisor) | |
85 | { | |
86 | int shift = __insn_clz (remainder); | |
87 | uint32_t scaled_divisor = max_divisor >> shift; | |
88 | uint32_t quotient_bit = max_qbit >> shift; | |
89 | ||
90 | int too_big = (scaled_divisor > remainder); | |
91 | scaled_divisor >>= too_big; | |
92 | quotient_bit >>= too_big; | |
93 | remainder -= scaled_divisor; | |
94 | quotient |= quotient_bit; | |
95 | } | |
96 | return quotient; | |
97 | } | |
98 | #endif /* !__tilegx__ */ | |
99 | ||
100 | ||
101 | /* __udivdi3 - 64 bit integer unsigned divide */ | |
102 | static inline uint64_t __attribute__ ((always_inline)) | |
103 | __udivdi3_inline (uint64_t dividend, uint64_t divisor) | |
104 | { | |
105 | /* Divide out any power of two factor from dividend and divisor. | |
106 | Note that when dividing by zero the divisor will remain zero, | |
107 | which is all we need to detect that case below. */ | |
108 | const int power_of_two_factor = __builtin_ctzll (divisor); | |
109 | divisor >>= power_of_two_factor; | |
110 | dividend >>= power_of_two_factor; | |
111 | ||
112 | /* Checks for division by power of two or division by zero. */ | |
113 | if (divisor <= 1) | |
114 | { | |
115 | if (divisor == 0) | |
116 | { | |
117 | raise_intdiv (); | |
118 | return 0; | |
119 | } | |
120 | return dividend; | |
121 | } | |
122 | ||
123 | #ifndef __tilegx__ | |
124 | if (((uint32_t) (dividend >> 32) | ((uint32_t) (divisor >> 32))) == 0) | |
125 | { | |
126 | /* Operands both fit in 32 bits, so use faster 32 bit algorithm. */ | |
127 | return __udivsi3_inline ((uint32_t) dividend, (uint32_t) divisor); | |
128 | } | |
129 | #endif /* !__tilegx__ */ | |
130 | ||
131 | /* See algorithm description in __udivsi3 */ | |
132 | ||
133 | const int divisor_clz = __builtin_clzll (divisor); | |
134 | const uint64_t max_divisor = divisor << divisor_clz; | |
135 | const uint64_t max_qbit = 1ULL << divisor_clz; | |
136 | ||
137 | uint64_t quotient = 0; | |
138 | uint64_t remainder = dividend; | |
139 | ||
140 | while (remainder >= divisor) | |
141 | { | |
142 | int shift = __builtin_clzll (remainder); | |
143 | uint64_t scaled_divisor = max_divisor >> shift; | |
144 | uint64_t quotient_bit = max_qbit >> shift; | |
145 | ||
146 | int too_big = (scaled_divisor > remainder); | |
147 | scaled_divisor >>= too_big; | |
148 | quotient_bit >>= too_big; | |
149 | remainder -= scaled_divisor; | |
150 | quotient |= quotient_bit; | |
151 | } | |
152 | return quotient; | |
153 | } | |
154 | ||
155 | ||
156 | #ifndef __tilegx__ | |
157 | /* __umodsi3 - 32 bit integer unsigned modulo */ | |
158 | static inline uint32_t __attribute__ ((always_inline)) | |
159 | __umodsi3_inline (uint32_t dividend, uint32_t divisor) | |
160 | { | |
161 | /* Shortcircuit mod by a power of two (and catch mod by zero). */ | |
162 | const uint32_t mask = divisor - 1; | |
163 | if ((divisor & mask) == 0) | |
164 | { | |
165 | if (divisor == 0) | |
166 | { | |
167 | raise_intdiv (); | |
168 | return 0; | |
169 | } | |
170 | return dividend & mask; | |
171 | } | |
172 | ||
173 | /* We compute the remainder (a % b) by repeatedly subtracting off | |
174 | multiples of b from a until a < b. The key is that subtracting | |
175 | off a multiple of b does not affect the result mod b. | |
176 | ||
177 | To make the algorithm run efficiently, we need to subtract | |
178 | off a large multiple of b at each step. We subtract the largest | |
179 | (b << N) that is <= a. | |
180 | ||
181 | Finding N is easy. First, use clz(b) - clz(a) to find the N | |
182 | that lines up the high bit of (b << N) with the high bit of a. | |
183 | Any larger value of N would definitely make (b << N) > a, | |
184 | which is too big. | |
185 | ||
186 | Then, if (b << N) > a (because it has larger low bits), decrement | |
187 | N by one. This adjustment will definitely make (b << N) less | |
188 | than a, because a's high bit is now one higher than b's. */ | |
189 | const uint32_t max_divisor = divisor << __insn_clz (divisor); | |
190 | ||
191 | uint32_t remainder = dividend; | |
192 | while (remainder >= divisor) | |
193 | { | |
194 | const int shift = __insn_clz (remainder); | |
195 | uint32_t scaled_divisor = max_divisor >> shift; | |
196 | scaled_divisor >>= (scaled_divisor > remainder); | |
197 | remainder -= scaled_divisor; | |
198 | } | |
199 | ||
200 | return remainder; | |
201 | } | |
202 | #endif /* !__tilegx__ */ | |
203 | ||
204 | ||
205 | /* __umoddi3 - 64 bit integer unsigned modulo */ | |
206 | static inline uint64_t __attribute__ ((always_inline)) | |
207 | __umoddi3_inline (uint64_t dividend, uint64_t divisor) | |
208 | { | |
209 | #ifndef __tilegx__ | |
210 | if (((uint32_t) (dividend >> 32) | ((uint32_t) (divisor >> 32))) == 0) | |
211 | { | |
212 | /* Operands both fit in 32 bits, so use faster 32 bit algorithm. */ | |
213 | return __umodsi3_inline ((uint32_t) dividend, (uint32_t) divisor); | |
214 | } | |
215 | #endif /* !__tilegx__ */ | |
216 | ||
217 | /* Shortcircuit mod by a power of two (and catch mod by zero). */ | |
218 | const uint64_t mask = divisor - 1; | |
219 | if ((divisor & mask) == 0) | |
220 | { | |
221 | if (divisor == 0) | |
222 | { | |
223 | raise_intdiv (); | |
224 | return 0; | |
225 | } | |
226 | return dividend & mask; | |
227 | } | |
228 | ||
229 | /* See algorithm description in __umodsi3 */ | |
230 | const uint64_t max_divisor = divisor << __builtin_clzll (divisor); | |
231 | ||
232 | uint64_t remainder = dividend; | |
233 | while (remainder >= divisor) | |
234 | { | |
235 | const int shift = __builtin_clzll (remainder); | |
236 | uint64_t scaled_divisor = max_divisor >> shift; | |
237 | scaled_divisor >>= (scaled_divisor > remainder); | |
238 | remainder -= scaled_divisor; | |
239 | } | |
240 | ||
241 | return remainder; | |
242 | } | |
243 | ||
244 | ||
245 | uint32_t __udivsi3 (uint32_t dividend, uint32_t divisor); | |
246 | #ifdef L_tile_udivsi3 | |
247 | uint32_t | |
248 | __udivsi3 (uint32_t dividend, uint32_t divisor) | |
249 | { | |
250 | #ifndef __tilegx__ | |
251 | return __udivsi3_inline (dividend, divisor); | |
252 | #else /* !__tilegx__ */ | |
253 | uint64_t n = __udivdi3_inline (((uint64_t) dividend), ((uint64_t) divisor)); | |
254 | return (uint32_t) n; | |
255 | #endif /* !__tilegx__ */ | |
256 | } | |
257 | #endif | |
258 | ||
259 | #define ABS(x) ((x) >= 0 ? (x) : -(x)) | |
260 | ||
261 | int32_t __divsi3 (int32_t dividend, int32_t divisor); | |
262 | #ifdef L_tile_divsi3 | |
263 | /* __divsi3 - 32 bit integer signed divide */ | |
264 | int32_t | |
265 | __divsi3 (int32_t dividend, int32_t divisor) | |
266 | { | |
267 | #ifndef __tilegx__ | |
268 | uint32_t n = __udivsi3_inline (ABS (dividend), ABS (divisor)); | |
269 | #else /* !__tilegx__ */ | |
270 | uint64_t n = | |
271 | __udivdi3_inline (ABS ((int64_t) dividend), ABS ((int64_t) divisor)); | |
272 | #endif /* !__tilegx__ */ | |
273 | if ((dividend ^ divisor) < 0) | |
274 | n = -n; | |
275 | return (int32_t) n; | |
276 | } | |
277 | #endif | |
278 | ||
279 | ||
280 | uint64_t __udivdi3 (uint64_t dividend, uint64_t divisor); | |
281 | #ifdef L_tile_udivdi3 | |
282 | uint64_t | |
283 | __udivdi3 (uint64_t dividend, uint64_t divisor) | |
284 | { | |
285 | return __udivdi3_inline (dividend, divisor); | |
286 | } | |
287 | #endif | |
288 | ||
289 | /*__divdi3 - 64 bit integer signed divide */ | |
290 | int64_t __divdi3 (int64_t dividend, int64_t divisor); | |
291 | #ifdef L_tile_divdi3 | |
292 | int64_t | |
293 | __divdi3 (int64_t dividend, int64_t divisor) | |
294 | { | |
295 | uint64_t n = __udivdi3_inline (ABS (dividend), ABS (divisor)); | |
296 | if ((dividend ^ divisor) < 0) | |
297 | n = -n; | |
298 | return (int64_t) n; | |
299 | } | |
300 | #endif | |
301 | ||
302 | ||
303 | uint32_t __umodsi3 (uint32_t dividend, uint32_t divisor); | |
304 | #ifdef L_tile_umodsi3 | |
305 | uint32_t | |
306 | __umodsi3 (uint32_t dividend, uint32_t divisor) | |
307 | { | |
308 | #ifndef __tilegx__ | |
309 | return __umodsi3_inline (dividend, divisor); | |
310 | #else /* !__tilegx__ */ | |
311 | return __umoddi3_inline ((uint64_t) dividend, (uint64_t) divisor); | |
312 | #endif /* !__tilegx__ */ | |
313 | } | |
314 | #endif | |
315 | ||
316 | ||
317 | /* __modsi3 - 32 bit integer signed modulo */ | |
318 | int32_t __modsi3 (int32_t dividend, int32_t divisor); | |
319 | #ifdef L_tile_modsi3 | |
320 | int32_t | |
321 | __modsi3 (int32_t dividend, int32_t divisor) | |
322 | { | |
323 | #ifndef __tilegx__ | |
324 | uint32_t remainder = __umodsi3_inline (ABS (dividend), ABS (divisor)); | |
325 | #else /* !__tilegx__ */ | |
326 | uint64_t remainder = | |
327 | __umoddi3_inline (ABS ((int64_t) dividend), ABS ((int64_t) divisor)); | |
328 | #endif /* !__tilegx__ */ | |
329 | return (int32_t) ((dividend >= 0) ? remainder : -remainder); | |
330 | } | |
331 | #endif | |
332 | ||
333 | ||
334 | uint64_t __umoddi3 (uint64_t dividend, uint64_t divisor); | |
335 | #ifdef L_tile_umoddi3 | |
336 | uint64_t | |
337 | __umoddi3 (uint64_t dividend, uint64_t divisor) | |
338 | { | |
339 | return __umoddi3_inline (dividend, divisor); | |
340 | } | |
341 | #endif | |
342 | ||
343 | ||
344 | /* __moddi3 - 64 bit integer signed modulo */ | |
345 | int64_t __moddi3 (int64_t dividend, int64_t divisor); | |
346 | #ifdef L_tile_moddi3 | |
347 | int64_t | |
348 | __moddi3 (int64_t dividend, int64_t divisor) | |
349 | { | |
350 | uint64_t remainder = __umoddi3_inline (ABS (dividend), ABS (divisor)); | |
351 | return (int64_t) ((dividend >= 0) ? remainder : -remainder); | |
352 | } | |
353 | #endif |