]>
Commit | Line | Data |
---|---|---|
feeeff5c | 1 | /* Unsigned 32 bit division optimized for Epiphany. |
7adcbafe | 2 | Copyright (C) 2009-2022 Free Software Foundation, Inc. |
feeeff5c JR |
3 | Contributed by Embecosm on behalf of Adapteva, Inc. |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | This file is free software; you can redistribute it and/or modify it | |
8 | under the terms of the GNU General Public License as published by the | |
9 | Free Software Foundation; either version 3, or (at your option) any | |
10 | later version. | |
11 | ||
12 | This file is distributed in the hope that it will be useful, but | |
13 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | General Public License for more details. | |
16 | ||
17 | Under Section 7 of GPL version 3, you are granted additional | |
18 | permissions described in the GCC Runtime Library Exception, version | |
19 | 3.1, as published by the Free Software Foundation. | |
20 | ||
21 | You should have received a copy of the GNU General Public License and | |
22 | a copy of the GCC Runtime Library Exception along with this program; | |
23 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
24 | <http://www.gnu.org/licenses/>. */ | |
25 | ||
26 | #include "epiphany-asm.h" | |
27 | ||
28 | FSTAB (__udivsi3,T_UINT) | |
29 | .global SYM(__udivsi3) | |
30 | .balign 4 | |
31 | HIDDEN_FUNC(__udivsi3) | |
32 | SYM(__udivsi3): | |
33 | sub TMP0,r0,r1 | |
34 | bltu .Lret0 | |
35 | float TMP2,r0 | |
36 | mov TMP1,%low(0xb0800000) ; ??? this would be faster with small data | |
37 | float TMP3,r1 | |
38 | movt TMP1,%high(0xb0800000) | |
39 | asr TMP0,r0,8 | |
40 | sub TMP0,TMP0,TMP1 | |
41 | movt TMP1,%high(0x00810000) | |
42 | movgteu TMP2,TMP0 | |
43 | bblt .Lret1 | |
44 | sub TMP2,TMP2,TMP1 | |
45 | sub TMP2,TMP2,TMP3 | |
46 | mov TMP3,0 | |
47 | movltu TMP2,TMP3 | |
48 | lsr TMP2,TMP2,23 | |
49 | lsl r1,r1,TMP2 | |
50 | mov TMP0,1 | |
51 | lsl TMP0,TMP0,TMP2 | |
52 | sub r0,r0,r1 | |
53 | bltu .Ladd_back | |
54 | add TMP3,TMP3,TMP0 | |
55 | sub r0,r0,r1 | |
56 | bltu .Ladd_back | |
57 | .Lsub_loop:; More than two iterations are rare, so it makes sense to leave | |
58 | ; this label here to reduce average branch penalties. | |
59 | add TMP3,TMP3,TMP0 | |
60 | sub r0,r0,r1 | |
61 | bgteu .Lsub_loop | |
62 | .Ladd_back: | |
63 | add r0,r0,r1 | |
64 | sub TMP1,r1,1 | |
65 | mov r1,%low(.L0step) | |
66 | movt r1,%high(.L0step) | |
67 | lsl TMP2,TMP2,3 | |
68 | sub r1,r1,TMP2 | |
69 | jr r1 | |
70 | .rep 30 | |
71 | lsl r0,r0,1 | |
72 | sub.l r1,r0,TMP1 | |
73 | movgteu r0,r1 | |
74 | .endr | |
75 | .L0step:sub r1,TMP0,1 ; mask result bits from steps ... | |
76 | and r0,r0,r1 | |
77 | orr r0,r0,TMP3 ; ... and combine with first bits. | |
78 | rts | |
79 | .Lret0: mov r0,0 | |
80 | rts | |
81 | .Lret1: mov r0,1 | |
82 | rts | |
83 | ENDFUNC(__udivsi3) |