]>
Commit | Line | Data |
---|---|---|
c5a543ea MV |
1 | /* |
2 | * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines | |
3 | * | |
4 | * Author: Nicolas Pitre <nico@fluxnic.net> | |
5 | * - contributed to gcc-3.4 on Sep 30, 2003 | |
6 | * - adapted for the Linux kernel on Oct 2, 2003 | |
7 | */ | |
8 | ||
9 | /* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc. | |
10 | ||
11 | * SPDX-License-Identifier: GPL-2.0+ | |
12 | */ | |
13 | ||
14 | ||
15 | #include <linux/linkage.h> | |
16 | #include <asm/assembler.h> | |
17 | ||
18 | /* | |
19 | * U-Boot compatibility bit, define empty UNWIND() macro as, since we | |
20 | * do not support stack unwinding and define CONFIG_AEABI to make all | |
21 | * of the functions available without diverging from Linux code. | |
22 | */ | |
23 | #ifdef __UBOOT__ | |
24 | #define UNWIND(x...) | |
25 | #define CONFIG_AEABI | |
26 | #endif | |
27 | ||
28 | .macro ARM_DIV_BODY dividend, divisor, result, curbit | |
29 | ||
30 | #if __LINUX_ARM_ARCH__ >= 5 | |
31 | ||
32 | clz \curbit, \divisor | |
33 | clz \result, \dividend | |
34 | sub \result, \curbit, \result | |
35 | mov \curbit, #1 | |
36 | mov \divisor, \divisor, lsl \result | |
37 | mov \curbit, \curbit, lsl \result | |
38 | mov \result, #0 | |
39 | ||
40 | #else | |
41 | ||
42 | @ Initially shift the divisor left 3 bits if possible, | |
43 | @ set curbit accordingly. This allows for curbit to be located | |
44 | @ at the left end of each 4 bit nibbles in the division loop | |
45 | @ to save one loop in most cases. | |
46 | tst \divisor, #0xe0000000 | |
47 | moveq \divisor, \divisor, lsl #3 | |
48 | moveq \curbit, #8 | |
49 | movne \curbit, #1 | |
50 | ||
51 | @ Unless the divisor is very big, shift it up in multiples of | |
52 | @ four bits, since this is the amount of unwinding in the main | |
53 | @ division loop. Continue shifting until the divisor is | |
54 | @ larger than the dividend. | |
55 | 1: cmp \divisor, #0x10000000 | |
56 | cmplo \divisor, \dividend | |
57 | movlo \divisor, \divisor, lsl #4 | |
58 | movlo \curbit, \curbit, lsl #4 | |
59 | blo 1b | |
60 | ||
61 | @ For very big divisors, we must shift it a bit at a time, or | |
62 | @ we will be in danger of overflowing. | |
63 | 1: cmp \divisor, #0x80000000 | |
64 | cmplo \divisor, \dividend | |
65 | movlo \divisor, \divisor, lsl #1 | |
66 | movlo \curbit, \curbit, lsl #1 | |
67 | blo 1b | |
68 | ||
69 | mov \result, #0 | |
70 | ||
71 | #endif | |
72 | ||
73 | @ Division loop | |
74 | 1: cmp \dividend, \divisor | |
75 | subhs \dividend, \dividend, \divisor | |
76 | orrhs \result, \result, \curbit | |
77 | cmp \dividend, \divisor, lsr #1 | |
78 | subhs \dividend, \dividend, \divisor, lsr #1 | |
79 | orrhs \result, \result, \curbit, lsr #1 | |
80 | cmp \dividend, \divisor, lsr #2 | |
81 | subhs \dividend, \dividend, \divisor, lsr #2 | |
82 | orrhs \result, \result, \curbit, lsr #2 | |
83 | cmp \dividend, \divisor, lsr #3 | |
84 | subhs \dividend, \dividend, \divisor, lsr #3 | |
85 | orrhs \result, \result, \curbit, lsr #3 | |
86 | cmp \dividend, #0 @ Early termination? | |
40d67c75 | 87 | movsne \curbit, \curbit, lsr #4 @ No, any more bits to do? |
c5a543ea MV |
88 | movne \divisor, \divisor, lsr #4 |
89 | bne 1b | |
90 | ||
91 | .endm | |
92 | ||
93 | ||
94 | .macro ARM_DIV2_ORDER divisor, order | |
95 | ||
96 | #if __LINUX_ARM_ARCH__ >= 5 | |
97 | ||
98 | clz \order, \divisor | |
99 | rsb \order, \order, #31 | |
100 | ||
101 | #else | |
102 | ||
103 | cmp \divisor, #(1 << 16) | |
104 | movhs \divisor, \divisor, lsr #16 | |
105 | movhs \order, #16 | |
106 | movlo \order, #0 | |
107 | ||
108 | cmp \divisor, #(1 << 8) | |
109 | movhs \divisor, \divisor, lsr #8 | |
110 | addhs \order, \order, #8 | |
111 | ||
112 | cmp \divisor, #(1 << 4) | |
113 | movhs \divisor, \divisor, lsr #4 | |
114 | addhs \order, \order, #4 | |
115 | ||
116 | cmp \divisor, #(1 << 2) | |
117 | addhi \order, \order, #3 | |
118 | addls \order, \order, \divisor, lsr #1 | |
119 | ||
120 | #endif | |
121 | ||
122 | .endm | |
123 | ||
124 | ||
125 | .macro ARM_MOD_BODY dividend, divisor, order, spare | |
126 | ||
127 | #if __LINUX_ARM_ARCH__ >= 5 | |
128 | ||
129 | clz \order, \divisor | |
130 | clz \spare, \dividend | |
131 | sub \order, \order, \spare | |
132 | mov \divisor, \divisor, lsl \order | |
133 | ||
134 | #else | |
135 | ||
136 | mov \order, #0 | |
137 | ||
138 | @ Unless the divisor is very big, shift it up in multiples of | |
139 | @ four bits, since this is the amount of unwinding in the main | |
140 | @ division loop. Continue shifting until the divisor is | |
141 | @ larger than the dividend. | |
142 | 1: cmp \divisor, #0x10000000 | |
143 | cmplo \divisor, \dividend | |
144 | movlo \divisor, \divisor, lsl #4 | |
145 | addlo \order, \order, #4 | |
146 | blo 1b | |
147 | ||
148 | @ For very big divisors, we must shift it a bit at a time, or | |
149 | @ we will be in danger of overflowing. | |
150 | 1: cmp \divisor, #0x80000000 | |
151 | cmplo \divisor, \dividend | |
152 | movlo \divisor, \divisor, lsl #1 | |
153 | addlo \order, \order, #1 | |
154 | blo 1b | |
155 | ||
156 | #endif | |
157 | ||
158 | @ Perform all needed subtractions to keep only the reminder. | |
159 | @ Do comparisons in batch of 4 first. | |
160 | subs \order, \order, #3 @ yes, 3 is intended here | |
161 | blt 2f | |
162 | ||
163 | 1: cmp \dividend, \divisor | |
164 | subhs \dividend, \dividend, \divisor | |
165 | cmp \dividend, \divisor, lsr #1 | |
166 | subhs \dividend, \dividend, \divisor, lsr #1 | |
167 | cmp \dividend, \divisor, lsr #2 | |
168 | subhs \dividend, \dividend, \divisor, lsr #2 | |
169 | cmp \dividend, \divisor, lsr #3 | |
170 | subhs \dividend, \dividend, \divisor, lsr #3 | |
171 | cmp \dividend, #1 | |
172 | mov \divisor, \divisor, lsr #4 | |
40d67c75 | 173 | subsge \order, \order, #4 |
c5a543ea MV |
174 | bge 1b |
175 | ||
176 | tst \order, #3 | |
177 | teqne \dividend, #0 | |
178 | beq 5f | |
179 | ||
180 | @ Either 1, 2 or 3 comparison/subtractions are left. | |
181 | 2: cmn \order, #2 | |
182 | blt 4f | |
183 | beq 3f | |
184 | cmp \dividend, \divisor | |
185 | subhs \dividend, \dividend, \divisor | |
186 | mov \divisor, \divisor, lsr #1 | |
187 | 3: cmp \dividend, \divisor | |
188 | subhs \dividend, \dividend, \divisor | |
189 | mov \divisor, \divisor, lsr #1 | |
190 | 4: cmp \dividend, \divisor | |
191 | subhs \dividend, \dividend, \divisor | |
192 | 5: | |
193 | .endm | |
194 | ||
195 | ||
b2f18584 | 196 | .pushsection .text.__udivsi3, "ax" |
c5a543ea MV |
197 | ENTRY(__udivsi3) |
198 | ENTRY(__aeabi_uidiv) | |
199 | UNWIND(.fnstart) | |
200 | ||
201 | subs r2, r1, #1 | |
202 | reteq lr | |
203 | bcc Ldiv0 | |
204 | cmp r0, r1 | |
205 | bls 11f | |
206 | tst r1, r2 | |
207 | beq 12f | |
208 | ||
209 | ARM_DIV_BODY r0, r1, r2, r3 | |
210 | ||
211 | mov r0, r2 | |
212 | ret lr | |
213 | ||
214 | 11: moveq r0, #1 | |
215 | movne r0, #0 | |
216 | ret lr | |
217 | ||
218 | 12: ARM_DIV2_ORDER r1, r2 | |
219 | ||
220 | mov r0, r0, lsr r2 | |
221 | ret lr | |
222 | ||
223 | UNWIND(.fnend) | |
224 | ENDPROC(__udivsi3) | |
225 | ENDPROC(__aeabi_uidiv) | |
b2f18584 | 226 | .popsection |
c5a543ea | 227 | |
b2f18584 | 228 | .pushsection .text.__umodsi3, "ax" |
c5a543ea MV |
229 | ENTRY(__umodsi3) |
230 | UNWIND(.fnstart) | |
231 | ||
232 | subs r2, r1, #1 @ compare divisor with 1 | |
233 | bcc Ldiv0 | |
234 | cmpne r0, r1 @ compare dividend with divisor | |
235 | moveq r0, #0 | |
236 | tsthi r1, r2 @ see if divisor is power of 2 | |
237 | andeq r0, r0, r2 | |
238 | retls lr | |
239 | ||
240 | ARM_MOD_BODY r0, r1, r2, r3 | |
241 | ||
242 | ret lr | |
243 | ||
244 | UNWIND(.fnend) | |
245 | ENDPROC(__umodsi3) | |
b2f18584 | 246 | .popsection |
c5a543ea | 247 | |
b2f18584 | 248 | .pushsection .text.__divsi3, "ax" |
c5a543ea MV |
249 | ENTRY(__divsi3) |
250 | ENTRY(__aeabi_idiv) | |
251 | UNWIND(.fnstart) | |
252 | ||
253 | cmp r1, #0 | |
254 | eor ip, r0, r1 @ save the sign of the result. | |
255 | beq Ldiv0 | |
256 | rsbmi r1, r1, #0 @ loops below use unsigned. | |
257 | subs r2, r1, #1 @ division by 1 or -1 ? | |
258 | beq 10f | |
259 | movs r3, r0 | |
260 | rsbmi r3, r0, #0 @ positive dividend value | |
261 | cmp r3, r1 | |
262 | bls 11f | |
263 | tst r1, r2 @ divisor is power of 2 ? | |
264 | beq 12f | |
265 | ||
266 | ARM_DIV_BODY r3, r1, r0, r2 | |
267 | ||
268 | cmp ip, #0 | |
269 | rsbmi r0, r0, #0 | |
270 | ret lr | |
271 | ||
272 | 10: teq ip, r0 @ same sign ? | |
273 | rsbmi r0, r0, #0 | |
274 | ret lr | |
275 | ||
276 | 11: movlo r0, #0 | |
277 | moveq r0, ip, asr #31 | |
278 | orreq r0, r0, #1 | |
279 | ret lr | |
280 | ||
281 | 12: ARM_DIV2_ORDER r1, r2 | |
282 | ||
283 | cmp ip, #0 | |
284 | mov r0, r3, lsr r2 | |
285 | rsbmi r0, r0, #0 | |
286 | ret lr | |
287 | ||
288 | UNWIND(.fnend) | |
289 | ENDPROC(__divsi3) | |
290 | ENDPROC(__aeabi_idiv) | |
b2f18584 | 291 | .popsection |
c5a543ea | 292 | |
b2f18584 | 293 | .pushsection .text.__modsi3, "ax" |
c5a543ea MV |
294 | ENTRY(__modsi3) |
295 | UNWIND(.fnstart) | |
296 | ||
297 | cmp r1, #0 | |
298 | beq Ldiv0 | |
299 | rsbmi r1, r1, #0 @ loops below use unsigned. | |
300 | movs ip, r0 @ preserve sign of dividend | |
301 | rsbmi r0, r0, #0 @ if negative make positive | |
302 | subs r2, r1, #1 @ compare divisor with 1 | |
303 | cmpne r0, r1 @ compare dividend with divisor | |
304 | moveq r0, #0 | |
305 | tsthi r1, r2 @ see if divisor is power of 2 | |
306 | andeq r0, r0, r2 | |
307 | bls 10f | |
308 | ||
309 | ARM_MOD_BODY r0, r1, r2, r3 | |
310 | ||
311 | 10: cmp ip, #0 | |
312 | rsbmi r0, r0, #0 | |
313 | ret lr | |
314 | ||
315 | UNWIND(.fnend) | |
316 | ENDPROC(__modsi3) | |
b2f18584 | 317 | .popsection |
c5a543ea MV |
318 | |
319 | #ifdef CONFIG_AEABI | |
320 | ||
b2f18584 | 321 | .pushsection .text.__aeabi_uidivmod, "ax" |
c5a543ea MV |
322 | ENTRY(__aeabi_uidivmod) |
323 | UNWIND(.fnstart) | |
324 | UNWIND(.save {r0, r1, ip, lr} ) | |
325 | ||
326 | stmfd sp!, {r0, r1, ip, lr} | |
327 | bl __aeabi_uidiv | |
328 | ldmfd sp!, {r1, r2, ip, lr} | |
329 | mul r3, r0, r2 | |
330 | sub r1, r1, r3 | |
331 | ret lr | |
332 | ||
333 | UNWIND(.fnend) | |
334 | ENDPROC(__aeabi_uidivmod) | |
b2f18584 | 335 | .popsection |
c5a543ea | 336 | |
b2f18584 | 337 | .pushsection .text.__aeabi_uidivmod, "ax" |
c5a543ea MV |
338 | ENTRY(__aeabi_idivmod) |
339 | UNWIND(.fnstart) | |
340 | UNWIND(.save {r0, r1, ip, lr} ) | |
13b0a91a | 341 | |
c5a543ea MV |
342 | stmfd sp!, {r0, r1, ip, lr} |
343 | bl __aeabi_idiv | |
344 | ldmfd sp!, {r1, r2, ip, lr} | |
345 | mul r3, r0, r2 | |
346 | sub r1, r1, r3 | |
347 | ret lr | |
348 | ||
349 | UNWIND(.fnend) | |
350 | ENDPROC(__aeabi_idivmod) | |
b2f18584 | 351 | .popsection |
c5a543ea MV |
352 | |
353 | #endif | |
354 | ||
b2f18584 | 355 | .pushsection .text.Ldiv0, "ax" |
c5a543ea MV |
356 | Ldiv0: |
357 | UNWIND(.fnstart) | |
358 | UNWIND(.pad #4) | |
359 | UNWIND(.save {lr}) | |
13b0a91a | 360 | |
c5a543ea MV |
361 | str lr, [sp, #-8]! |
362 | bl __div0 | |
363 | mov r0, #0 @ About as wrong as it could be. | |
364 | ldr pc, [sp], #8 | |
13b0a91a | 365 | |
c5a543ea MV |
366 | UNWIND(.fnend) |
367 | ENDPROC(Ldiv0) | |
b2f18584 | 368 | .popsection |
806f86bd MV |
369 | |
370 | /* Thumb-1 specialities */ | |
3a649407 | 371 | #if CONFIG_IS_ENABLED(SYS_THUMB_BUILD) && !defined(CONFIG_HAS_THUMB2) |
756e76f0 | 372 | .pushsection .text.__gnu_thumb1_case_sqi, "ax" |
806f86bd MV |
373 | ENTRY(__gnu_thumb1_case_sqi) |
374 | push {r1} | |
375 | mov r1, lr | |
376 | lsrs r1, r1, #1 | |
377 | lsls r1, r1, #1 | |
378 | ldrsb r1, [r1, r0] | |
379 | lsls r1, r1, #1 | |
380 | add lr, lr, r1 | |
381 | pop {r1} | |
382 | bx lr | |
383 | ENDPROC(__gnu_thumb1_case_sqi) | |
b2f18584 | 384 | .popsection |
806f86bd | 385 | |
756e76f0 | 386 | .pushsection .text.__gnu_thumb1_case_uqi, "ax" |
806f86bd MV |
387 | ENTRY(__gnu_thumb1_case_uqi) |
388 | push {r1} | |
389 | mov r1, lr | |
390 | lsrs r1, r1, #1 | |
391 | lsls r1, r1, #1 | |
392 | ldrb r1, [r1, r0] | |
393 | lsls r1, r1, #1 | |
394 | add lr, lr, r1 | |
395 | pop {r1} | |
396 | bx lr | |
397 | ENDPROC(__gnu_thumb1_case_uqi) | |
b2f18584 | 398 | .popsection |
806f86bd | 399 | |
13b0a91a | 400 | .pushsection .text.__gnu_thumb1_case_shi, "ax" |
b2f18584 | 401 | ENTRY(__gnu_thumb1_case_shi) |
806f86bd MV |
402 | push {r0, r1} |
403 | mov r1, lr | |
404 | lsrs r1, r1, #1 | |
405 | lsls r0, r0, #1 | |
406 | lsls r1, r1, #1 | |
407 | ldrsh r1, [r1, r0] | |
408 | lsls r1, r1, #1 | |
409 | add lr, lr, r1 | |
410 | pop {r0, r1} | |
411 | bx lr | |
412 | ENDPROC(__gnu_thumb1_case_shi) | |
b2f18584 | 413 | .popsection |
806f86bd | 414 | |
13b0a91a | 415 | .pushsection .text.__gnu_thumb1_case_uhi, "ax" |
b2f18584 | 416 | ENTRY(__gnu_thumb1_case_uhi) |
806f86bd MV |
417 | push {r0, r1} |
418 | mov r1, lr | |
419 | lsrs r1, r1, #1 | |
420 | lsls r0, r0, #1 | |
421 | lsls r1, r1, #1 | |
422 | ldrh r1, [r1, r0] | |
423 | lsls r1, r1, #1 | |
424 | add lr, lr, r1 | |
425 | pop {r0, r1} | |
426 | bx lr | |
427 | ENDPROC(__gnu_thumb1_case_uhi) | |
b2f18584 | 428 | .popsection |
806f86bd | 429 | #endif |