]>
Commit | Line | Data |
---|---|---|
0d4a78eb | 1 | /* libgcc functions for Blackfin. |
83ffe9cd | 2 | Copyright (C) 2005-2023 Free Software Foundation, Inc. |
0d4a78eb BS |
3 | Contributed by Analog Devices. |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify | |
8 | it under the terms of the GNU General Public License as published by | |
748086b7 | 9 | the Free Software Foundation; either version 3, or (at your option) |
0d4a78eb BS |
10 | any later version. |
11 | ||
12 | GCC is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
16 | ||
748086b7 JJ |
17 | Under Section 7 of GPL version 3, you are granted additional |
18 | permissions described in the GCC Runtime Library Exception, version | |
19 | 3.1, as published by the Free Software Foundation. | |
0d4a78eb | 20 | |
748086b7 JJ |
21 | You should have received a copy of the GNU General Public License and |
22 | a copy of the GCC Runtime Library Exception along with this program; | |
23 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
24 | <http://www.gnu.org/licenses/>. */ | |
0d4a78eb BS |
25 | |
26 | #ifdef L_divsi3 | |
27 | .text | |
28 | .align 2 | |
29 | .global ___divsi3; | |
30 | .type ___divsi3, STT_FUNC; | |
31 | ||
32 | ___divsi3: | |
33 | [--SP]= RETS; | |
34 | [--SP] = R7; | |
35 | ||
36 | R2 = -R0; | |
37 | CC = R0 < 0; | |
38 | IF CC R0 = R2; | |
39 | R7 = CC; | |
40 | ||
41 | R2 = -R1; | |
42 | CC = R1 < 0; | |
43 | IF CC R1 = R2; | |
44 | R2 = CC; | |
45 | R7 = R7 ^ R2; | |
46 | ||
47 | CALL ___udivsi3; | |
48 | ||
49 | CC = R7; | |
50 | R1 = -R0; | |
51 | IF CC R0 = R1; | |
52 | ||
53 | R7 = [SP++]; | |
54 | RETS = [SP++]; | |
55 | RTS; | |
56 | #endif | |
57 | ||
58 | #ifdef L_modsi3 | |
59 | .align 2 | |
60 | .global ___modsi3; | |
61 | .type ___modsi3, STT_FUNC; | |
62 | ||
63 | ___modsi3: | |
dcef6aba BS |
64 | [--SP] = RETS; |
65 | [--SP] = R0; | |
66 | [--SP] = R1; | |
67 | CALL ___divsi3; | |
68 | R2 = [SP++]; | |
69 | R1 = [SP++]; | |
0d4a78eb BS |
70 | R2 *= R0; |
71 | R0 = R1 - R2; | |
72 | RETS = [SP++]; | |
dcef6aba | 73 | RTS; |
0d4a78eb BS |
74 | #endif |
75 | ||
76 | #ifdef L_udivsi3 | |
77 | .align 2 | |
78 | .global ___udivsi3; | |
79 | .type ___udivsi3, STT_FUNC; | |
80 | ||
81 | ___udivsi3: | |
82 | P0 = 32; | |
83 | LSETUP (0f, 1f) LC0 = P0; | |
84 | /* upper half of dividend */ | |
85 | R3 = 0; | |
86 | 0: | |
87 | /* The first time round in the loop we shift in garbage, but since we | |
88 | perform 33 shifts, it doesn't matter. */ | |
89 | R0 = ROT R0 BY 1; | |
90 | R3 = ROT R3 BY 1; | |
91 | R2 = R3 - R1; | |
92 | CC = R3 < R1 (IU); | |
93 | 1: | |
94 | /* Last instruction of the loop. */ | |
95 | IF ! CC R3 = R2; | |
96 | ||
97 | /* Shift in the last bit. */ | |
98 | R0 = ROT R0 BY 1; | |
99 | /* R0 is the result, R3 contains the remainder. */ | |
100 | R0 = ~ R0; | |
101 | RTS; | |
102 | #endif | |
103 | ||
104 | #ifdef L_umodsi3 | |
105 | .align 2 | |
106 | .global ___umodsi3; | |
107 | .type ___umodsi3, STT_FUNC; | |
108 | ||
109 | ___umodsi3: | |
a94aa272 BS |
110 | [--SP] = RETS; |
111 | CALL ___udivsi3; | |
0d4a78eb | 112 | R0 = R3; |
a94aa272 BS |
113 | RETS = [SP++]; |
114 | RTS; | |
0d4a78eb BS |
115 | #endif |
116 | ||
01e7cd6e BS |
117 | #ifdef L_umulsi3_highpart |
118 | .align 2 | |
119 | .global ___umulsi3_highpart; | |
120 | .type ___umulsi3_highpart, STT_FUNC; | |
121 | ||
122 | ___umulsi3_highpart: | |
3fbee523 BS |
123 | A1 = R1.L * R0.L (FU); |
124 | A1 = A1 >> 16; | |
125 | A0 = R1.H * R0.H, A1 += R1.L * R0.H (FU); | |
126 | A1 += R0.L * R1.H (FU); | |
127 | A1 = A1 >> 16; | |
314f9913 BS |
128 | A0 += A1; |
129 | R0 = A0 (FU); | |
01e7cd6e BS |
130 | RTS; |
131 | #endif | |
132 | ||
133 | #ifdef L_smulsi3_highpart | |
134 | .align 2 | |
135 | .global ___smulsi3_highpart; | |
136 | .type ___smulsi3_highpart, STT_FUNC; | |
137 | ||
138 | ___smulsi3_highpart: | |
3fbee523 BS |
139 | A1 = R1.L * R0.L (FU); |
140 | A1 = A1 >> 16; | |
141 | A0 = R0.H * R1.H, A1 += R0.H * R1.L (IS,M); | |
142 | A1 += R1.H * R0.L (IS,M); | |
143 | A1 = A1 >>> 16; | |
144 | R0 = (A0 += A1); | |
01e7cd6e BS |
145 | RTS; |
146 | #endif | |
f0b568f3 SH |
147 | |
148 | #ifdef L_muldi3 | |
149 | .align 2 | |
150 | .global ___muldi3; | |
151 | .type ___muldi3, STT_FUNC; | |
152 | ||
153 | /* | |
154 | R1:R0 * R3:R2 | |
155 | = R1.h:R1.l:R0.h:R0.l * R3.h:R3.l:R2.h:R2.l | |
156 | [X] = (R1.h * R3.h) * 2^96 | |
157 | [X] + (R1.h * R3.l + R1.l * R3.h) * 2^80 | |
158 | [X] + (R1.h * R2.h + R1.l * R3.l + R3.h * R0.h) * 2^64 | |
159 | [T1] + (R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h) * 2^48 | |
160 | [T2] + (R1.l * R2.l + R3.l * R0.l + R0.h * R2.h) * 2^32 | |
161 | [T3] + (R0.l * R2.h + R2.l * R0.h) * 2^16 | |
162 | [T4] + (R0.l * R2.l) | |
163 | ||
164 | We can discard the first three lines marked "X" since we produce | |
165 | only a 64 bit result. So, we need ten 16-bit multiplies. | |
166 | ||
167 | Individual mul-acc results: | |
168 | [E1] = R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h | |
169 | [E2] = R1.l * R2.l + R3.l * R0.l + R0.h * R2.h | |
170 | [E3] = R0.l * R2.h + R2.l * R0.h | |
171 | [E4] = R0.l * R2.l | |
172 | ||
173 | We also need to add high parts from lower-level results to higher ones: | |
174 | E[n]c = E[n] + (E[n+1]c >> 16), where E4c := E4 | |
175 | ||
176 | One interesting property is that all parts of the result that depend | |
177 | on the sign of the multiplication are discarded. Those would be the | |
178 | multiplications involving R1.h and R3.h, but only the top 16 bit of | |
179 | the 32 bit result depend on the sign, and since R1.h and R3.h only | |
180 | occur in E1, the top half of these results is cut off. | |
181 | So, we can just use FU mode for all of the 16-bit multiplies, and | |
182 | ignore questions of when to use mixed mode. */ | |
183 | ||
184 | ___muldi3: | |
185 | /* [SP] technically is part of the caller's frame, but we can | |
186 | use it as scratch space. */ | |
187 | A0 = R2.H * R1.L, A1 = R2.L * R1.H (FU) || R3 = [SP + 12]; /* E1 */ | |
188 | A0 += R3.H * R0.L, A1 += R3.L * R0.H (FU) || [SP] = R4; /* E1 */ | |
189 | A0 += A1; /* E1 */ | |
190 | R4 = A0.w; | |
191 | A0 = R0.l * R3.l (FU); /* E2 */ | |
192 | A0 += R2.l * R1.l (FU); /* E2 */ | |
193 | ||
194 | A1 = R2.L * R0.L (FU); /* E4 */ | |
195 | R3 = A1.w; | |
196 | A1 = A1 >> 16; /* E3c */ | |
197 | A0 += R2.H * R0.H, A1 += R2.L * R0.H (FU); /* E2, E3c */ | |
198 | A1 += R0.L * R2.H (FU); /* E3c */ | |
199 | R0 = A1.w; | |
200 | A1 = A1 >> 16; /* E2c */ | |
201 | A0 += A1; /* E2c */ | |
202 | R1 = A0.w; | |
203 | ||
204 | /* low(result) = low(E3c):low(E4) */ | |
205 | R0 = PACK (R0.l, R3.l); | |
206 | /* high(result) = E2c + (E1 << 16) */ | |
207 | R1.h = R1.h + R4.l (NS) || R4 = [SP]; | |
208 | RTS; | |
209 | ||
210 | .size ___muldi3, .-___muldi3 | |
211 | #endif |