]>
Commit | Line | Data |
---|---|---|
6b628d36 | 1 | ! SPARC __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and |
28f540f4 | 2 | ! store difference in a third limb vector. |
f41c8091 | 3 | ! |
6b628d36 | 4 | ! Copyright (C) 1995, 1996 Free Software Foundation, Inc. |
f41c8091 | 5 | ! |
28f540f4 | 6 | ! This file is part of the GNU MP Library. |
f41c8091 | 7 | ! |
28f540f4 | 8 | ! The GNU MP Library is free software; you can redistribute it and/or modify |
6d84f89a AJ |
9 | ! it under the terms of the GNU Lesser General Public License as published by |
10 | ! the Free Software Foundation; either version 2.1 of the License, or (at your | |
28f540f4 | 11 | ! option) any later version. |
f41c8091 | 12 | ! |
28f540f4 RM |
13 | ! The GNU MP Library is distributed in the hope that it will be useful, but |
14 | ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
6d84f89a | 15 | ! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
28f540f4 | 16 | ! License for more details. |
f41c8091 | 17 | ! |
6d84f89a | 18 | ! You should have received a copy of the GNU Lesser General Public License |
28f540f4 | 19 | ! along with the GNU MP Library; see the file COPYING.LIB. If not, write to |
b928942e RM |
20 | ! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
21 | ! MA 02111-1307, USA. | |
28f540f4 RM |
22 | |
23 | ||
24 | ! INPUT PARAMETERS | |
f41c8091 UD |
25 | #define RES_PTR %o0 |
26 | #define S1_PTR %o1 | |
27 | #define S2_PTR %o2 | |
28 | #define SIZE %o3 | |
28f540f4 | 29 | |
f41c8091 | 30 | #include <sysdep.h> |
28f540f4 | 31 | |
f41c8091 UD |
32 | ENTRY(__mpn_sub_n) |
33 | xor S2_PTR,RES_PTR,%g1 | |
ba848785 | 34 | andcc %g1,4,%g0 |
f41c8091 | 35 | bne LOC(1) ! branch if alignment differs |
ba848785 RM |
36 | nop |
37 | ! ** V1a ** | |
f41c8091 UD |
38 | andcc RES_PTR,4,%g0 ! RES_PTR unaligned? Side effect: cy=0 |
39 | be LOC(v1) ! if no, branch | |
ba848785 | 40 | nop |
f41c8091 UD |
41 | /* Add least significant limb separately to align RES_PTR and S2_PTR */ |
42 | ld [S1_PTR],%g4 | |
43 | add S1_PTR,4,S1_PTR | |
44 | ld [S2_PTR],%g2 | |
45 | add S2_PTR,4,S2_PTR | |
46 | add SIZE,-1,SIZE | |
ba848785 | 47 | subcc %g4,%g2,%o4 |
f41c8091 UD |
48 | st %o4,[RES_PTR] |
49 | add RES_PTR,4,RES_PTR | |
50 | LOC(v1): | |
51 | addx %g0,%g0,%o4 ! save cy in register | |
52 | cmp SIZE,2 ! if SIZE < 2 ... | |
53 | bl LOC(end2) ! ... branch to tail code | |
ba848785 RM |
54 | subcc %g0,%o4,%g0 ! restore cy |
55 | ||
f41c8091 UD |
56 | ld [S1_PTR+0],%g4 |
57 | addcc SIZE,-10,SIZE | |
58 | ld [S1_PTR+4],%g1 | |
59 | ldd [S2_PTR+0],%g2 | |
60 | blt LOC(fin1) | |
ba848785 RM |
61 | subcc %g0,%o4,%g0 ! restore cy |
62 | /* Add blocks of 8 limbs until less than 8 limbs remain */ | |
f41c8091 UD |
63 | LOC(loop1): |
64 | subxcc %g4,%g2,%o4 | |
65 | ld [S1_PTR+8],%g4 | |
ba848785 | 66 | subxcc %g1,%g3,%o5 |
f41c8091 UD |
67 | ld [S1_PTR+12],%g1 |
68 | ldd [S2_PTR+8],%g2 | |
69 | std %o4,[RES_PTR+0] | |
ba848785 | 70 | subxcc %g4,%g2,%o4 |
f41c8091 | 71 | ld [S1_PTR+16],%g4 |
ba848785 | 72 | subxcc %g1,%g3,%o5 |
f41c8091 UD |
73 | ld [S1_PTR+20],%g1 |
74 | ldd [S2_PTR+16],%g2 | |
75 | std %o4,[RES_PTR+8] | |
ba848785 | 76 | subxcc %g4,%g2,%o4 |
f41c8091 | 77 | ld [S1_PTR+24],%g4 |
ba848785 | 78 | subxcc %g1,%g3,%o5 |
f41c8091 UD |
79 | ld [S1_PTR+28],%g1 |
80 | ldd [S2_PTR+24],%g2 | |
81 | std %o4,[RES_PTR+16] | |
ba848785 | 82 | subxcc %g4,%g2,%o4 |
f41c8091 | 83 | ld [S1_PTR+32],%g4 |
ba848785 | 84 | subxcc %g1,%g3,%o5 |
f41c8091 UD |
85 | ld [S1_PTR+36],%g1 |
86 | ldd [S2_PTR+32],%g2 | |
87 | std %o4,[RES_PTR+24] | |
ba848785 | 88 | addx %g0,%g0,%o4 ! save cy in register |
f41c8091 UD |
89 | addcc SIZE,-8,SIZE |
90 | add S1_PTR,32,S1_PTR | |
91 | add S2_PTR,32,S2_PTR | |
92 | add RES_PTR,32,RES_PTR | |
93 | bge LOC(loop1) | |
ba848785 RM |
94 | subcc %g0,%o4,%g0 ! restore cy |
95 | ||
f41c8091 UD |
96 | LOC(fin1): |
97 | addcc SIZE,8-2,SIZE | |
98 | blt LOC(end1) | |
ba848785 RM |
99 | subcc %g0,%o4,%g0 ! restore cy |
100 | /* Add blocks of 2 limbs until less than 2 limbs remain */ | |
f41c8091 UD |
101 | LOC(loope1): |
102 | subxcc %g4,%g2,%o4 | |
103 | ld [S1_PTR+8],%g4 | |
ba848785 | 104 | subxcc %g1,%g3,%o5 |
f41c8091 UD |
105 | ld [S1_PTR+12],%g1 |
106 | ldd [S2_PTR+8],%g2 | |
107 | std %o4,[RES_PTR+0] | |
ba848785 | 108 | addx %g0,%g0,%o4 ! save cy in register |
f41c8091 UD |
109 | addcc SIZE,-2,SIZE |
110 | add S1_PTR,8,S1_PTR | |
111 | add S2_PTR,8,S2_PTR | |
112 | add RES_PTR,8,RES_PTR | |
113 | bge LOC(loope1) | |
ba848785 | 114 | subcc %g0,%o4,%g0 ! restore cy |
f41c8091 UD |
115 | LOC(end1): |
116 | subxcc %g4,%g2,%o4 | |
ba848785 | 117 | subxcc %g1,%g3,%o5 |
f41c8091 | 118 | std %o4,[RES_PTR+0] |
ba848785 RM |
119 | addx %g0,%g0,%o4 ! save cy in register |
120 | ||
f41c8091 UD |
121 | andcc SIZE,1,%g0 |
122 | be LOC(ret1) | |
ba848785 RM |
123 | subcc %g0,%o4,%g0 ! restore cy |
124 | /* Add last limb */ | |
f41c8091 UD |
125 | ld [S1_PTR+8],%g4 |
126 | ld [S2_PTR+8],%g2 | |
ba848785 | 127 | subxcc %g4,%g2,%o4 |
f41c8091 | 128 | st %o4,[RES_PTR+8] |
ba848785 | 129 | |
f41c8091 UD |
130 | LOC(ret1): |
131 | retl | |
ba848785 RM |
132 | addx %g0,%g0,%o0 ! return carry-out from most sign. limb |
133 | ||
f41c8091 | 134 | LOC(1): xor S1_PTR,RES_PTR,%g1 |
ba848785 | 135 | andcc %g1,4,%g0 |
f41c8091 | 136 | bne LOC(2) |
ba848785 RM |
137 | nop |
138 | ! ** V1b ** | |
f41c8091 UD |
139 | andcc RES_PTR,4,%g0 ! RES_PTR unaligned? Side effect: cy=0 |
140 | be LOC(v1b) ! if no, branch | |
ba848785 | 141 | nop |
f41c8091 UD |
142 | /* Add least significant limb separately to align RES_PTR and S1_PTR */ |
143 | ld [S2_PTR],%g4 | |
144 | add S2_PTR,4,S2_PTR | |
145 | ld [S1_PTR],%g2 | |
146 | add S1_PTR,4,S1_PTR | |
147 | add SIZE,-1,SIZE | |
ba848785 | 148 | subcc %g2,%g4,%o4 |
f41c8091 UD |
149 | st %o4,[RES_PTR] |
150 | add RES_PTR,4,RES_PTR | |
151 | LOC(v1b): | |
152 | addx %g0,%g0,%o4 ! save cy in register | |
153 | cmp SIZE,2 ! if SIZE < 2 ... | |
154 | bl LOC(end2) ! ... branch to tail code | |
ba848785 RM |
155 | subcc %g0,%o4,%g0 ! restore cy |
156 | ||
f41c8091 UD |
157 | ld [S2_PTR+0],%g4 |
158 | addcc SIZE,-10,SIZE | |
159 | ld [S2_PTR+4],%g1 | |
160 | ldd [S1_PTR+0],%g2 | |
161 | blt LOC(fin1b) | |
ba848785 RM |
162 | subcc %g0,%o4,%g0 ! restore cy |
163 | /* Add blocks of 8 limbs until less than 8 limbs remain */ | |
f41c8091 UD |
164 | LOC(loop1b): |
165 | subxcc %g2,%g4,%o4 | |
166 | ld [S2_PTR+8],%g4 | |
ba848785 | 167 | subxcc %g3,%g1,%o5 |
f41c8091 UD |
168 | ld [S2_PTR+12],%g1 |
169 | ldd [S1_PTR+8],%g2 | |
170 | std %o4,[RES_PTR+0] | |
ba848785 | 171 | subxcc %g2,%g4,%o4 |
f41c8091 | 172 | ld [S2_PTR+16],%g4 |
ba848785 | 173 | subxcc %g3,%g1,%o5 |
f41c8091 UD |
174 | ld [S2_PTR+20],%g1 |
175 | ldd [S1_PTR+16],%g2 | |
176 | std %o4,[RES_PTR+8] | |
ba848785 | 177 | subxcc %g2,%g4,%o4 |
f41c8091 | 178 | ld [S2_PTR+24],%g4 |
ba848785 | 179 | subxcc %g3,%g1,%o5 |
f41c8091 UD |
180 | ld [S2_PTR+28],%g1 |
181 | ldd [S1_PTR+24],%g2 | |
182 | std %o4,[RES_PTR+16] | |
ba848785 | 183 | subxcc %g2,%g4,%o4 |
f41c8091 | 184 | ld [S2_PTR+32],%g4 |
ba848785 | 185 | subxcc %g3,%g1,%o5 |
f41c8091 UD |
186 | ld [S2_PTR+36],%g1 |
187 | ldd [S1_PTR+32],%g2 | |
188 | std %o4,[RES_PTR+24] | |
ba848785 | 189 | addx %g0,%g0,%o4 ! save cy in register |
f41c8091 UD |
190 | addcc SIZE,-8,SIZE |
191 | add S1_PTR,32,S1_PTR | |
192 | add S2_PTR,32,S2_PTR | |
193 | add RES_PTR,32,RES_PTR | |
194 | bge LOC(loop1b) | |
ba848785 RM |
195 | subcc %g0,%o4,%g0 ! restore cy |
196 | ||
f41c8091 UD |
197 | LOC(fin1b): |
198 | addcc SIZE,8-2,SIZE | |
199 | blt LOC(end1b) | |
ba848785 RM |
200 | subcc %g0,%o4,%g0 ! restore cy |
201 | /* Add blocks of 2 limbs until less than 2 limbs remain */ | |
f41c8091 UD |
202 | LOC(loope1b): |
203 | subxcc %g2,%g4,%o4 | |
204 | ld [S2_PTR+8],%g4 | |
ba848785 | 205 | subxcc %g3,%g1,%o5 |
f41c8091 UD |
206 | ld [S2_PTR+12],%g1 |
207 | ldd [S1_PTR+8],%g2 | |
208 | std %o4,[RES_PTR+0] | |
ba848785 | 209 | addx %g0,%g0,%o4 ! save cy in register |
f41c8091 UD |
210 | addcc SIZE,-2,SIZE |
211 | add S1_PTR,8,S1_PTR | |
212 | add S2_PTR,8,S2_PTR | |
213 | add RES_PTR,8,RES_PTR | |
214 | bge LOC(loope1b) | |
ba848785 | 215 | subcc %g0,%o4,%g0 ! restore cy |
f41c8091 UD |
216 | LOC(end1b): |
217 | subxcc %g2,%g4,%o4 | |
ba848785 | 218 | subxcc %g3,%g1,%o5 |
f41c8091 | 219 | std %o4,[RES_PTR+0] |
ba848785 RM |
220 | addx %g0,%g0,%o4 ! save cy in register |
221 | ||
f41c8091 UD |
222 | andcc SIZE,1,%g0 |
223 | be LOC(ret1b) | |
ba848785 RM |
224 | subcc %g0,%o4,%g0 ! restore cy |
225 | /* Add last limb */ | |
f41c8091 UD |
226 | ld [S2_PTR+8],%g4 |
227 | ld [S1_PTR+8],%g2 | |
ba848785 | 228 | subxcc %g2,%g4,%o4 |
f41c8091 | 229 | st %o4,[RES_PTR+8] |
ba848785 | 230 | |
f41c8091 UD |
231 | LOC(ret1b): |
232 | retl | |
ba848785 RM |
233 | addx %g0,%g0,%o0 ! return carry-out from most sign. limb |
234 | ||
235 | ! ** V2 ** | |
f41c8091 UD |
236 | /* If we come here, the alignment of S1_PTR and RES_PTR as well as the |
237 | alignment of S2_PTR and RES_PTR differ. Since there are only two ways | |
ba848785 | 238 | things can be aligned (that we care about) we now know that the alignment |
f41c8091 | 239 | of S1_PTR and S2_PTR are the same. */ |
ba848785 | 240 | |
f41c8091 UD |
241 | LOC(2): cmp SIZE,1 |
242 | be LOC(jone) | |
ba848785 | 243 | nop |
f41c8091 UD |
244 | andcc S1_PTR,4,%g0 ! S1_PTR unaligned? Side effect: cy=0 |
245 | be LOC(v2) ! if no, branch | |
ba848785 | 246 | nop |
f41c8091 UD |
247 | /* Add least significant limb separately to align S1_PTR and S2_PTR */ |
248 | ld [S1_PTR],%g4 | |
249 | add S1_PTR,4,S1_PTR | |
250 | ld [S2_PTR],%g2 | |
251 | add S2_PTR,4,S2_PTR | |
252 | add SIZE,-1,SIZE | |
ba848785 | 253 | subcc %g4,%g2,%o4 |
f41c8091 UD |
254 | st %o4,[RES_PTR] |
255 | add RES_PTR,4,RES_PTR | |
ba848785 | 256 | |
f41c8091 UD |
257 | LOC(v2): |
258 | addx %g0,%g0,%o4 ! save cy in register | |
259 | addcc SIZE,-8,SIZE | |
260 | blt LOC(fin2) | |
ba848785 RM |
261 | subcc %g0,%o4,%g0 ! restore cy |
262 | /* Add blocks of 8 limbs until less than 8 limbs remain */ | |
f41c8091 UD |
263 | LOC(loop2): |
264 | ldd [S1_PTR+0],%g2 | |
265 | ldd [S2_PTR+0],%o4 | |
ba848785 | 266 | subxcc %g2,%o4,%g2 |
f41c8091 | 267 | st %g2,[RES_PTR+0] |
ba848785 | 268 | subxcc %g3,%o5,%g3 |
f41c8091 UD |
269 | st %g3,[RES_PTR+4] |
270 | ldd [S1_PTR+8],%g2 | |
271 | ldd [S2_PTR+8],%o4 | |
ba848785 | 272 | subxcc %g2,%o4,%g2 |
f41c8091 | 273 | st %g2,[RES_PTR+8] |
ba848785 | 274 | subxcc %g3,%o5,%g3 |
f41c8091 UD |
275 | st %g3,[RES_PTR+12] |
276 | ldd [S1_PTR+16],%g2 | |
277 | ldd [S2_PTR+16],%o4 | |
ba848785 | 278 | subxcc %g2,%o4,%g2 |
f41c8091 | 279 | st %g2,[RES_PTR+16] |
ba848785 | 280 | subxcc %g3,%o5,%g3 |
f41c8091 UD |
281 | st %g3,[RES_PTR+20] |
282 | ldd [S1_PTR+24],%g2 | |
283 | ldd [S2_PTR+24],%o4 | |
ba848785 | 284 | subxcc %g2,%o4,%g2 |
f41c8091 | 285 | st %g2,[RES_PTR+24] |
ba848785 | 286 | subxcc %g3,%o5,%g3 |
f41c8091 | 287 | st %g3,[RES_PTR+28] |
ba848785 | 288 | addx %g0,%g0,%o4 ! save cy in register |
f41c8091 UD |
289 | addcc SIZE,-8,SIZE |
290 | add S1_PTR,32,S1_PTR | |
291 | add S2_PTR,32,S2_PTR | |
292 | add RES_PTR,32,RES_PTR | |
293 | bge LOC(loop2) | |
ba848785 RM |
294 | subcc %g0,%o4,%g0 ! restore cy |
295 | ||
f41c8091 UD |
296 | LOC(fin2): |
297 | addcc SIZE,8-2,SIZE | |
298 | blt LOC(end2) | |
ba848785 | 299 | subcc %g0,%o4,%g0 ! restore cy |
f41c8091 UD |
300 | LOC(loope2): |
301 | ldd [S1_PTR+0],%g2 | |
302 | ldd [S2_PTR+0],%o4 | |
ba848785 | 303 | subxcc %g2,%o4,%g2 |
f41c8091 | 304 | st %g2,[RES_PTR+0] |
ba848785 | 305 | subxcc %g3,%o5,%g3 |
f41c8091 | 306 | st %g3,[RES_PTR+4] |
ba848785 | 307 | addx %g0,%g0,%o4 ! save cy in register |
f41c8091 UD |
308 | addcc SIZE,-2,SIZE |
309 | add S1_PTR,8,S1_PTR | |
310 | add S2_PTR,8,S2_PTR | |
311 | add RES_PTR,8,RES_PTR | |
312 | bge LOC(loope2) | |
ba848785 | 313 | subcc %g0,%o4,%g0 ! restore cy |
f41c8091 UD |
314 | LOC(end2): |
315 | andcc SIZE,1,%g0 | |
316 | be LOC(ret2) | |
ba848785 RM |
317 | subcc %g0,%o4,%g0 ! restore cy |
318 | /* Add last limb */ | |
f41c8091 UD |
319 | LOC(jone): |
320 | ld [S1_PTR],%g4 | |
321 | ld [S2_PTR],%g2 | |
ba848785 | 322 | subxcc %g4,%g2,%o4 |
f41c8091 | 323 | st %o4,[RES_PTR] |
ba848785 | 324 | |
f41c8091 UD |
325 | LOC(ret2): |
326 | retl | |
ba848785 | 327 | addx %g0,%g0,%o0 ! return carry-out from most sign. limb |
f41c8091 UD |
328 | |
329 | END(__mpn_sub_n) |