]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/sparc/sparc32/sub_n.S
c9895368b7ff2831f15b1e3fb43e5b24984779b8
[thirdparty/glibc.git] / sysdeps / sparc / sparc32 / sub_n.S
1 ! SPARC __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
2 ! store difference in a third limb vector.
3 !
4 ! Copyright (C) 1995-2014 Free Software Foundation, Inc.
5 !
6 ! This file is part of the GNU MP Library.
7 !
8 ! The GNU MP Library is free software; you can redistribute it and/or modify
9 ! it under the terms of the GNU Lesser General Public License as published by
10 ! the Free Software Foundation; either version 2.1 of the License, or (at your
11 ! option) any later version.
12 !
13 ! The GNU MP Library is distributed in the hope that it will be useful, but
14 ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 ! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 ! License for more details.
17 !
18 ! You should have received a copy of the GNU Lesser General Public License
19 ! along with the GNU MP Library; see the file COPYING.LIB. If not,
20 ! see <http://www.gnu.org/licenses/>.
21
22
23 ! INPUT PARAMETERS
24 #define RES_PTR %o0
25 #define S1_PTR %o1
26 #define S2_PTR %o2
27 #define SIZE %o3
28
29 #include <sysdep.h>
30
31 ENTRY(__mpn_sub_n)
32 xor S2_PTR,RES_PTR,%g1
33 andcc %g1,4,%g0
34 bne LOC(1) ! branch if alignment differs
35 nop
36 ! ** V1a **
37 andcc RES_PTR,4,%g0 ! RES_PTR unaligned? Side effect: cy=0
38 be LOC(v1) ! if no, branch
39 nop
40 /* Add least significant limb separately to align RES_PTR and S2_PTR */
41 ld [S1_PTR],%g4
42 add S1_PTR,4,S1_PTR
43 ld [S2_PTR],%g2
44 add S2_PTR,4,S2_PTR
45 add SIZE,-1,SIZE
46 subcc %g4,%g2,%o4
47 st %o4,[RES_PTR]
48 add RES_PTR,4,RES_PTR
49 LOC(v1):
50 addx %g0,%g0,%o4 ! save cy in register
51 cmp SIZE,2 ! if SIZE < 2 ...
52 bl LOC(end2) ! ... branch to tail code
53 subcc %g0,%o4,%g0 ! restore cy
54
55 ld [S1_PTR+0],%g4
56 addcc SIZE,-10,SIZE
57 ld [S1_PTR+4],%g1
58 ldd [S2_PTR+0],%g2
59 blt LOC(fin1)
60 subcc %g0,%o4,%g0 ! restore cy
61 /* Add blocks of 8 limbs until less than 8 limbs remain */
62 LOC(loop1):
63 subxcc %g4,%g2,%o4
64 ld [S1_PTR+8],%g4
65 subxcc %g1,%g3,%o5
66 ld [S1_PTR+12],%g1
67 ldd [S2_PTR+8],%g2
68 std %o4,[RES_PTR+0]
69 subxcc %g4,%g2,%o4
70 ld [S1_PTR+16],%g4
71 subxcc %g1,%g3,%o5
72 ld [S1_PTR+20],%g1
73 ldd [S2_PTR+16],%g2
74 std %o4,[RES_PTR+8]
75 subxcc %g4,%g2,%o4
76 ld [S1_PTR+24],%g4
77 subxcc %g1,%g3,%o5
78 ld [S1_PTR+28],%g1
79 ldd [S2_PTR+24],%g2
80 std %o4,[RES_PTR+16]
81 subxcc %g4,%g2,%o4
82 ld [S1_PTR+32],%g4
83 subxcc %g1,%g3,%o5
84 ld [S1_PTR+36],%g1
85 ldd [S2_PTR+32],%g2
86 std %o4,[RES_PTR+24]
87 addx %g0,%g0,%o4 ! save cy in register
88 addcc SIZE,-8,SIZE
89 add S1_PTR,32,S1_PTR
90 add S2_PTR,32,S2_PTR
91 add RES_PTR,32,RES_PTR
92 bge LOC(loop1)
93 subcc %g0,%o4,%g0 ! restore cy
94
95 LOC(fin1):
96 addcc SIZE,8-2,SIZE
97 blt LOC(end1)
98 subcc %g0,%o4,%g0 ! restore cy
99 /* Add blocks of 2 limbs until less than 2 limbs remain */
100 LOC(loope1):
101 subxcc %g4,%g2,%o4
102 ld [S1_PTR+8],%g4
103 subxcc %g1,%g3,%o5
104 ld [S1_PTR+12],%g1
105 ldd [S2_PTR+8],%g2
106 std %o4,[RES_PTR+0]
107 addx %g0,%g0,%o4 ! save cy in register
108 addcc SIZE,-2,SIZE
109 add S1_PTR,8,S1_PTR
110 add S2_PTR,8,S2_PTR
111 add RES_PTR,8,RES_PTR
112 bge LOC(loope1)
113 subcc %g0,%o4,%g0 ! restore cy
114 LOC(end1):
115 subxcc %g4,%g2,%o4
116 subxcc %g1,%g3,%o5
117 std %o4,[RES_PTR+0]
118 addx %g0,%g0,%o4 ! save cy in register
119
120 andcc SIZE,1,%g0
121 be LOC(ret1)
122 subcc %g0,%o4,%g0 ! restore cy
123 /* Add last limb */
124 ld [S1_PTR+8],%g4
125 ld [S2_PTR+8],%g2
126 subxcc %g4,%g2,%o4
127 st %o4,[RES_PTR+8]
128
129 LOC(ret1):
130 retl
131 addx %g0,%g0,%o0 ! return carry-out from most sign. limb
132
133 LOC(1): xor S1_PTR,RES_PTR,%g1
134 andcc %g1,4,%g0
135 bne LOC(2)
136 nop
137 ! ** V1b **
138 andcc RES_PTR,4,%g0 ! RES_PTR unaligned? Side effect: cy=0
139 be LOC(v1b) ! if no, branch
140 nop
141 /* Add least significant limb separately to align RES_PTR and S1_PTR */
142 ld [S2_PTR],%g4
143 add S2_PTR,4,S2_PTR
144 ld [S1_PTR],%g2
145 add S1_PTR,4,S1_PTR
146 add SIZE,-1,SIZE
147 subcc %g2,%g4,%o4
148 st %o4,[RES_PTR]
149 add RES_PTR,4,RES_PTR
150 LOC(v1b):
151 addx %g0,%g0,%o4 ! save cy in register
152 cmp SIZE,2 ! if SIZE < 2 ...
153 bl LOC(end2) ! ... branch to tail code
154 subcc %g0,%o4,%g0 ! restore cy
155
156 ld [S2_PTR+0],%g4
157 addcc SIZE,-10,SIZE
158 ld [S2_PTR+4],%g1
159 ldd [S1_PTR+0],%g2
160 blt LOC(fin1b)
161 subcc %g0,%o4,%g0 ! restore cy
162 /* Add blocks of 8 limbs until less than 8 limbs remain */
163 LOC(loop1b):
164 subxcc %g2,%g4,%o4
165 ld [S2_PTR+8],%g4
166 subxcc %g3,%g1,%o5
167 ld [S2_PTR+12],%g1
168 ldd [S1_PTR+8],%g2
169 std %o4,[RES_PTR+0]
170 subxcc %g2,%g4,%o4
171 ld [S2_PTR+16],%g4
172 subxcc %g3,%g1,%o5
173 ld [S2_PTR+20],%g1
174 ldd [S1_PTR+16],%g2
175 std %o4,[RES_PTR+8]
176 subxcc %g2,%g4,%o4
177 ld [S2_PTR+24],%g4
178 subxcc %g3,%g1,%o5
179 ld [S2_PTR+28],%g1
180 ldd [S1_PTR+24],%g2
181 std %o4,[RES_PTR+16]
182 subxcc %g2,%g4,%o4
183 ld [S2_PTR+32],%g4
184 subxcc %g3,%g1,%o5
185 ld [S2_PTR+36],%g1
186 ldd [S1_PTR+32],%g2
187 std %o4,[RES_PTR+24]
188 addx %g0,%g0,%o4 ! save cy in register
189 addcc SIZE,-8,SIZE
190 add S1_PTR,32,S1_PTR
191 add S2_PTR,32,S2_PTR
192 add RES_PTR,32,RES_PTR
193 bge LOC(loop1b)
194 subcc %g0,%o4,%g0 ! restore cy
195
196 LOC(fin1b):
197 addcc SIZE,8-2,SIZE
198 blt LOC(end1b)
199 subcc %g0,%o4,%g0 ! restore cy
200 /* Add blocks of 2 limbs until less than 2 limbs remain */
201 LOC(loope1b):
202 subxcc %g2,%g4,%o4
203 ld [S2_PTR+8],%g4
204 subxcc %g3,%g1,%o5
205 ld [S2_PTR+12],%g1
206 ldd [S1_PTR+8],%g2
207 std %o4,[RES_PTR+0]
208 addx %g0,%g0,%o4 ! save cy in register
209 addcc SIZE,-2,SIZE
210 add S1_PTR,8,S1_PTR
211 add S2_PTR,8,S2_PTR
212 add RES_PTR,8,RES_PTR
213 bge LOC(loope1b)
214 subcc %g0,%o4,%g0 ! restore cy
215 LOC(end1b):
216 subxcc %g2,%g4,%o4
217 subxcc %g3,%g1,%o5
218 std %o4,[RES_PTR+0]
219 addx %g0,%g0,%o4 ! save cy in register
220
221 andcc SIZE,1,%g0
222 be LOC(ret1b)
223 subcc %g0,%o4,%g0 ! restore cy
224 /* Add last limb */
225 ld [S2_PTR+8],%g4
226 ld [S1_PTR+8],%g2
227 subxcc %g2,%g4,%o4
228 st %o4,[RES_PTR+8]
229
230 LOC(ret1b):
231 retl
232 addx %g0,%g0,%o0 ! return carry-out from most sign. limb
233
234 ! ** V2 **
235 /* If we come here, the alignment of S1_PTR and RES_PTR as well as the
236 alignment of S2_PTR and RES_PTR differ. Since there are only two ways
237 things can be aligned (that we care about) we now know that the alignment
238 of S1_PTR and S2_PTR are the same. */
239
240 LOC(2): cmp SIZE,1
241 be LOC(jone)
242 nop
243 andcc S1_PTR,4,%g0 ! S1_PTR unaligned? Side effect: cy=0
244 be LOC(v2) ! if no, branch
245 nop
246 /* Add least significant limb separately to align S1_PTR and S2_PTR */
247 ld [S1_PTR],%g4
248 add S1_PTR,4,S1_PTR
249 ld [S2_PTR],%g2
250 add S2_PTR,4,S2_PTR
251 add SIZE,-1,SIZE
252 subcc %g4,%g2,%o4
253 st %o4,[RES_PTR]
254 add RES_PTR,4,RES_PTR
255
256 LOC(v2):
257 addx %g0,%g0,%o4 ! save cy in register
258 addcc SIZE,-8,SIZE
259 blt LOC(fin2)
260 subcc %g0,%o4,%g0 ! restore cy
261 /* Add blocks of 8 limbs until less than 8 limbs remain */
262 LOC(loop2):
263 ldd [S1_PTR+0],%g2
264 ldd [S2_PTR+0],%o4
265 subxcc %g2,%o4,%g2
266 st %g2,[RES_PTR+0]
267 subxcc %g3,%o5,%g3
268 st %g3,[RES_PTR+4]
269 ldd [S1_PTR+8],%g2
270 ldd [S2_PTR+8],%o4
271 subxcc %g2,%o4,%g2
272 st %g2,[RES_PTR+8]
273 subxcc %g3,%o5,%g3
274 st %g3,[RES_PTR+12]
275 ldd [S1_PTR+16],%g2
276 ldd [S2_PTR+16],%o4
277 subxcc %g2,%o4,%g2
278 st %g2,[RES_PTR+16]
279 subxcc %g3,%o5,%g3
280 st %g3,[RES_PTR+20]
281 ldd [S1_PTR+24],%g2
282 ldd [S2_PTR+24],%o4
283 subxcc %g2,%o4,%g2
284 st %g2,[RES_PTR+24]
285 subxcc %g3,%o5,%g3
286 st %g3,[RES_PTR+28]
287 addx %g0,%g0,%o4 ! save cy in register
288 addcc SIZE,-8,SIZE
289 add S1_PTR,32,S1_PTR
290 add S2_PTR,32,S2_PTR
291 add RES_PTR,32,RES_PTR
292 bge LOC(loop2)
293 subcc %g0,%o4,%g0 ! restore cy
294
295 LOC(fin2):
296 addcc SIZE,8-2,SIZE
297 blt LOC(end2)
298 subcc %g0,%o4,%g0 ! restore cy
299 LOC(loope2):
300 ldd [S1_PTR+0],%g2
301 ldd [S2_PTR+0],%o4
302 subxcc %g2,%o4,%g2
303 st %g2,[RES_PTR+0]
304 subxcc %g3,%o5,%g3
305 st %g3,[RES_PTR+4]
306 addx %g0,%g0,%o4 ! save cy in register
307 addcc SIZE,-2,SIZE
308 add S1_PTR,8,S1_PTR
309 add S2_PTR,8,S2_PTR
310 add RES_PTR,8,RES_PTR
311 bge LOC(loope2)
312 subcc %g0,%o4,%g0 ! restore cy
313 LOC(end2):
314 andcc SIZE,1,%g0
315 be LOC(ret2)
316 subcc %g0,%o4,%g0 ! restore cy
317 /* Add last limb */
318 LOC(jone):
319 ld [S1_PTR],%g4
320 ld [S2_PTR],%g2
321 subxcc %g4,%g2,%o4
322 st %o4,[RES_PTR]
323
324 LOC(ret2):
325 retl
326 addx %g0,%g0,%o0 ! return carry-out from most sign. limb
327
328 END(__mpn_sub_n)