]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgcc/config/bfin/lib1funcs.S
Update copyright years.
[thirdparty/gcc.git] / libgcc / config / bfin / lib1funcs.S
CommitLineData
0d4a78eb 1/* libgcc functions for Blackfin.
83ffe9cd 2 Copyright (C) 2005-2023 Free Software Foundation, Inc.
0d4a78eb
BS
3 Contributed by Analog Devices.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
748086b7 9the Free Software Foundation; either version 3, or (at your option)
0d4a78eb
BS
10any later version.
11
12GCC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
748086b7
JJ
17Under Section 7 of GPL version 3, you are granted additional
18permissions described in the GCC Runtime Library Exception, version
193.1, as published by the Free Software Foundation.
0d4a78eb 20
748086b7
JJ
21You should have received a copy of the GNU General Public License and
22a copy of the GCC Runtime Library Exception along with this program;
23see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24<http://www.gnu.org/licenses/>. */
0d4a78eb
BS
25
26#ifdef L_divsi3
27.text
28.align 2
29.global ___divsi3;
30.type ___divsi3, STT_FUNC;
31
32___divsi3:
33 [--SP]= RETS;
34 [--SP] = R7;
35
36 R2 = -R0;
37 CC = R0 < 0;
38 IF CC R0 = R2;
39 R7 = CC;
40
41 R2 = -R1;
42 CC = R1 < 0;
43 IF CC R1 = R2;
44 R2 = CC;
45 R7 = R7 ^ R2;
46
47 CALL ___udivsi3;
48
49 CC = R7;
50 R1 = -R0;
51 IF CC R0 = R1;
52
53 R7 = [SP++];
54 RETS = [SP++];
55 RTS;
56#endif
57
58#ifdef L_modsi3
59.align 2
60.global ___modsi3;
61.type ___modsi3, STT_FUNC;
62
63___modsi3:
dcef6aba
BS
64 [--SP] = RETS;
65 [--SP] = R0;
66 [--SP] = R1;
67 CALL ___divsi3;
68 R2 = [SP++];
69 R1 = [SP++];
0d4a78eb
BS
70 R2 *= R0;
71 R0 = R1 - R2;
72 RETS = [SP++];
dcef6aba 73 RTS;
0d4a78eb
BS
74#endif
75
76#ifdef L_udivsi3
77.align 2
78.global ___udivsi3;
79.type ___udivsi3, STT_FUNC;
80
81___udivsi3:
82 P0 = 32;
83 LSETUP (0f, 1f) LC0 = P0;
84 /* upper half of dividend */
85 R3 = 0;
860:
87 /* The first time round in the loop we shift in garbage, but since we
88 perform 33 shifts, it doesn't matter. */
89 R0 = ROT R0 BY 1;
90 R3 = ROT R3 BY 1;
91 R2 = R3 - R1;
92 CC = R3 < R1 (IU);
931:
94 /* Last instruction of the loop. */
95 IF ! CC R3 = R2;
96
97 /* Shift in the last bit. */
98 R0 = ROT R0 BY 1;
99 /* R0 is the result, R3 contains the remainder. */
100 R0 = ~ R0;
101 RTS;
102#endif
103
104#ifdef L_umodsi3
105.align 2
106.global ___umodsi3;
107.type ___umodsi3, STT_FUNC;
108
109___umodsi3:
a94aa272
BS
110 [--SP] = RETS;
111 CALL ___udivsi3;
0d4a78eb 112 R0 = R3;
a94aa272
BS
113 RETS = [SP++];
114 RTS;
0d4a78eb
BS
115#endif
116
01e7cd6e
BS
117#ifdef L_umulsi3_highpart
118.align 2
119.global ___umulsi3_highpart;
120.type ___umulsi3_highpart, STT_FUNC;
121
122___umulsi3_highpart:
3fbee523
BS
123 A1 = R1.L * R0.L (FU);
124 A1 = A1 >> 16;
125 A0 = R1.H * R0.H, A1 += R1.L * R0.H (FU);
126 A1 += R0.L * R1.H (FU);
127 A1 = A1 >> 16;
314f9913
BS
128 A0 += A1;
129 R0 = A0 (FU);
01e7cd6e
BS
130 RTS;
131#endif
132
133#ifdef L_smulsi3_highpart
134.align 2
135.global ___smulsi3_highpart;
136.type ___smulsi3_highpart, STT_FUNC;
137
138___smulsi3_highpart:
3fbee523
BS
139 A1 = R1.L * R0.L (FU);
140 A1 = A1 >> 16;
141 A0 = R0.H * R1.H, A1 += R0.H * R1.L (IS,M);
142 A1 += R1.H * R0.L (IS,M);
143 A1 = A1 >>> 16;
144 R0 = (A0 += A1);
01e7cd6e
BS
145 RTS;
146#endif
f0b568f3
SH
147
148#ifdef L_muldi3
149.align 2
150.global ___muldi3;
151.type ___muldi3, STT_FUNC;
152
153/*
154 R1:R0 * R3:R2
155 = R1.h:R1.l:R0.h:R0.l * R3.h:R3.l:R2.h:R2.l
156[X] = (R1.h * R3.h) * 2^96
157[X] + (R1.h * R3.l + R1.l * R3.h) * 2^80
158[X] + (R1.h * R2.h + R1.l * R3.l + R3.h * R0.h) * 2^64
159[T1] + (R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h) * 2^48
160[T2] + (R1.l * R2.l + R3.l * R0.l + R0.h * R2.h) * 2^32
161[T3] + (R0.l * R2.h + R2.l * R0.h) * 2^16
162[T4] + (R0.l * R2.l)
163
164 We can discard the first three lines marked "X" since we produce
165 only a 64 bit result. So, we need ten 16-bit multiplies.
166
167 Individual mul-acc results:
168[E1] = R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h
169[E2] = R1.l * R2.l + R3.l * R0.l + R0.h * R2.h
170[E3] = R0.l * R2.h + R2.l * R0.h
171[E4] = R0.l * R2.l
172
173 We also need to add high parts from lower-level results to higher ones:
174 E[n]c = E[n] + (E[n+1]c >> 16), where E4c := E4
175
176 One interesting property is that all parts of the result that depend
177 on the sign of the multiplication are discarded. Those would be the
178 multiplications involving R1.h and R3.h, but only the top 16 bit of
179 the 32 bit result depend on the sign, and since R1.h and R3.h only
180 occur in E1, the top half of these results is cut off.
181 So, we can just use FU mode for all of the 16-bit multiplies, and
182 ignore questions of when to use mixed mode. */
183
184___muldi3:
185 /* [SP] technically is part of the caller's frame, but we can
186 use it as scratch space. */
187 A0 = R2.H * R1.L, A1 = R2.L * R1.H (FU) || R3 = [SP + 12]; /* E1 */
188 A0 += R3.H * R0.L, A1 += R3.L * R0.H (FU) || [SP] = R4; /* E1 */
189 A0 += A1; /* E1 */
190 R4 = A0.w;
191 A0 = R0.l * R3.l (FU); /* E2 */
192 A0 += R2.l * R1.l (FU); /* E2 */
193
194 A1 = R2.L * R0.L (FU); /* E4 */
195 R3 = A1.w;
196 A1 = A1 >> 16; /* E3c */
197 A0 += R2.H * R0.H, A1 += R2.L * R0.H (FU); /* E2, E3c */
198 A1 += R0.L * R2.H (FU); /* E3c */
199 R0 = A1.w;
200 A1 = A1 >> 16; /* E2c */
201 A0 += A1; /* E2c */
202 R1 = A0.w;
203
204 /* low(result) = low(E3c):low(E4) */
205 R0 = PACK (R0.l, R3.l);
206 /* high(result) = E2c + (E1 << 16) */
207 R1.h = R1.h + R4.l (NS) || R4 = [SP];
208 RTS;
209
210.size ___muldi3, .-___muldi3
211#endif