]>
Commit | Line | Data |
---|---|---|
6b628d36 | 1 | ! SPARC __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and |
28f540f4 | 2 | ! store difference in a third limb vector. |
f41c8091 | 3 | ! |
04277e02 | 4 | ! Copyright (C) 1995-2019 Free Software Foundation, Inc. |
f41c8091 | 5 | ! |
28f540f4 | 6 | ! This file is part of the GNU MP Library. |
f41c8091 | 7 | ! |
28f540f4 | 8 | ! The GNU MP Library is free software; you can redistribute it and/or modify |
6d84f89a AJ |
9 | ! it under the terms of the GNU Lesser General Public License as published by |
10 | ! the Free Software Foundation; either version 2.1 of the License, or (at your | |
28f540f4 | 11 | ! option) any later version. |
f41c8091 | 12 | ! |
28f540f4 RM |
13 | ! The GNU MP Library is distributed in the hope that it will be useful, but |
14 | ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
6d84f89a | 15 | ! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
28f540f4 | 16 | ! License for more details. |
f41c8091 | 17 | ! |
6d84f89a | 18 | ! You should have received a copy of the GNU Lesser General Public License |
59ba27a6 | 19 | ! along with the GNU MP Library; see the file COPYING.LIB. If not, |
5a82c748 | 20 | ! see <https://www.gnu.org/licenses/>. |
28f540f4 RM |
21 | |
22 | ||
23 | ! INPUT PARAMETERS | |
f41c8091 UD |
24 | #define RES_PTR %o0 |
25 | #define S1_PTR %o1 | |
26 | #define S2_PTR %o2 | |
27 | #define SIZE %o3 | |
28f540f4 | 28 | |
f41c8091 | 29 | #include <sysdep.h> |
28f540f4 | 30 | |
f41c8091 UD |
31 | ENTRY(__mpn_sub_n) |
32 | xor S2_PTR,RES_PTR,%g1 | |
ba848785 | 33 | andcc %g1,4,%g0 |
f41c8091 | 34 | bne LOC(1) ! branch if alignment differs |
ba848785 RM |
35 | nop |
36 | ! ** V1a ** | |
f41c8091 UD |
37 | andcc RES_PTR,4,%g0 ! RES_PTR unaligned? Side effect: cy=0 |
38 | be LOC(v1) ! if no, branch | |
ba848785 | 39 | nop |
f41c8091 UD |
40 | /* Add least significant limb separately to align RES_PTR and S2_PTR */ |
41 | ld [S1_PTR],%g4 | |
42 | add S1_PTR,4,S1_PTR | |
43 | ld [S2_PTR],%g2 | |
44 | add S2_PTR,4,S2_PTR | |
45 | add SIZE,-1,SIZE | |
ba848785 | 46 | subcc %g4,%g2,%o4 |
f41c8091 UD |
47 | st %o4,[RES_PTR] |
48 | add RES_PTR,4,RES_PTR | |
49 | LOC(v1): | |
50 | addx %g0,%g0,%o4 ! save cy in register | |
51 | cmp SIZE,2 ! if SIZE < 2 ... | |
52 | bl LOC(end2) ! ... branch to tail code | |
ba848785 RM |
53 | subcc %g0,%o4,%g0 ! restore cy |
54 | ||
f41c8091 UD |
55 | ld [S1_PTR+0],%g4 |
56 | addcc SIZE,-10,SIZE | |
57 | ld [S1_PTR+4],%g1 | |
58 | ldd [S2_PTR+0],%g2 | |
59 | blt LOC(fin1) | |
ba848785 RM |
60 | subcc %g0,%o4,%g0 ! restore cy |
61 | /* Add blocks of 8 limbs until less than 8 limbs remain */ | |
f41c8091 UD |
62 | LOC(loop1): |
63 | subxcc %g4,%g2,%o4 | |
64 | ld [S1_PTR+8],%g4 | |
ba848785 | 65 | subxcc %g1,%g3,%o5 |
f41c8091 UD |
66 | ld [S1_PTR+12],%g1 |
67 | ldd [S2_PTR+8],%g2 | |
68 | std %o4,[RES_PTR+0] | |
ba848785 | 69 | subxcc %g4,%g2,%o4 |
f41c8091 | 70 | ld [S1_PTR+16],%g4 |
ba848785 | 71 | subxcc %g1,%g3,%o5 |
f41c8091 UD |
72 | ld [S1_PTR+20],%g1 |
73 | ldd [S2_PTR+16],%g2 | |
74 | std %o4,[RES_PTR+8] | |
ba848785 | 75 | subxcc %g4,%g2,%o4 |
f41c8091 | 76 | ld [S1_PTR+24],%g4 |
ba848785 | 77 | subxcc %g1,%g3,%o5 |
f41c8091 UD |
78 | ld [S1_PTR+28],%g1 |
79 | ldd [S2_PTR+24],%g2 | |
80 | std %o4,[RES_PTR+16] | |
ba848785 | 81 | subxcc %g4,%g2,%o4 |
f41c8091 | 82 | ld [S1_PTR+32],%g4 |
ba848785 | 83 | subxcc %g1,%g3,%o5 |
f41c8091 UD |
84 | ld [S1_PTR+36],%g1 |
85 | ldd [S2_PTR+32],%g2 | |
86 | std %o4,[RES_PTR+24] | |
ba848785 | 87 | addx %g0,%g0,%o4 ! save cy in register |
f41c8091 UD |
88 | addcc SIZE,-8,SIZE |
89 | add S1_PTR,32,S1_PTR | |
90 | add S2_PTR,32,S2_PTR | |
91 | add RES_PTR,32,RES_PTR | |
92 | bge LOC(loop1) | |
ba848785 RM |
93 | subcc %g0,%o4,%g0 ! restore cy |
94 | ||
f41c8091 UD |
95 | LOC(fin1): |
96 | addcc SIZE,8-2,SIZE | |
97 | blt LOC(end1) | |
ba848785 RM |
98 | subcc %g0,%o4,%g0 ! restore cy |
99 | /* Add blocks of 2 limbs until less than 2 limbs remain */ | |
f41c8091 UD |
100 | LOC(loope1): |
101 | subxcc %g4,%g2,%o4 | |
102 | ld [S1_PTR+8],%g4 | |
ba848785 | 103 | subxcc %g1,%g3,%o5 |
f41c8091 UD |
104 | ld [S1_PTR+12],%g1 |
105 | ldd [S2_PTR+8],%g2 | |
106 | std %o4,[RES_PTR+0] | |
ba848785 | 107 | addx %g0,%g0,%o4 ! save cy in register |
f41c8091 UD |
108 | addcc SIZE,-2,SIZE |
109 | add S1_PTR,8,S1_PTR | |
110 | add S2_PTR,8,S2_PTR | |
111 | add RES_PTR,8,RES_PTR | |
112 | bge LOC(loope1) | |
ba848785 | 113 | subcc %g0,%o4,%g0 ! restore cy |
f41c8091 UD |
114 | LOC(end1): |
115 | subxcc %g4,%g2,%o4 | |
ba848785 | 116 | subxcc %g1,%g3,%o5 |
f41c8091 | 117 | std %o4,[RES_PTR+0] |
ba848785 RM |
118 | addx %g0,%g0,%o4 ! save cy in register |
119 | ||
f41c8091 UD |
120 | andcc SIZE,1,%g0 |
121 | be LOC(ret1) | |
ba848785 RM |
122 | subcc %g0,%o4,%g0 ! restore cy |
123 | /* Add last limb */ | |
f41c8091 UD |
124 | ld [S1_PTR+8],%g4 |
125 | ld [S2_PTR+8],%g2 | |
ba848785 | 126 | subxcc %g4,%g2,%o4 |
f41c8091 | 127 | st %o4,[RES_PTR+8] |
ba848785 | 128 | |
f41c8091 UD |
129 | LOC(ret1): |
130 | retl | |
ba848785 RM |
131 | addx %g0,%g0,%o0 ! return carry-out from most sign. limb |
132 | ||
f41c8091 | 133 | LOC(1): xor S1_PTR,RES_PTR,%g1 |
ba848785 | 134 | andcc %g1,4,%g0 |
f41c8091 | 135 | bne LOC(2) |
ba848785 RM |
136 | nop |
137 | ! ** V1b ** | |
f41c8091 UD |
138 | andcc RES_PTR,4,%g0 ! RES_PTR unaligned? Side effect: cy=0 |
139 | be LOC(v1b) ! if no, branch | |
ba848785 | 140 | nop |
f41c8091 UD |
141 | /* Add least significant limb separately to align RES_PTR and S1_PTR */ |
142 | ld [S2_PTR],%g4 | |
143 | add S2_PTR,4,S2_PTR | |
144 | ld [S1_PTR],%g2 | |
145 | add S1_PTR,4,S1_PTR | |
146 | add SIZE,-1,SIZE | |
ba848785 | 147 | subcc %g2,%g4,%o4 |
f41c8091 UD |
148 | st %o4,[RES_PTR] |
149 | add RES_PTR,4,RES_PTR | |
150 | LOC(v1b): | |
151 | addx %g0,%g0,%o4 ! save cy in register | |
152 | cmp SIZE,2 ! if SIZE < 2 ... | |
153 | bl LOC(end2) ! ... branch to tail code | |
ba848785 RM |
154 | subcc %g0,%o4,%g0 ! restore cy |
155 | ||
f41c8091 UD |
156 | ld [S2_PTR+0],%g4 |
157 | addcc SIZE,-10,SIZE | |
158 | ld [S2_PTR+4],%g1 | |
159 | ldd [S1_PTR+0],%g2 | |
160 | blt LOC(fin1b) | |
ba848785 RM |
161 | subcc %g0,%o4,%g0 ! restore cy |
162 | /* Add blocks of 8 limbs until less than 8 limbs remain */ | |
f41c8091 UD |
163 | LOC(loop1b): |
164 | subxcc %g2,%g4,%o4 | |
165 | ld [S2_PTR+8],%g4 | |
ba848785 | 166 | subxcc %g3,%g1,%o5 |
f41c8091 UD |
167 | ld [S2_PTR+12],%g1 |
168 | ldd [S1_PTR+8],%g2 | |
169 | std %o4,[RES_PTR+0] | |
ba848785 | 170 | subxcc %g2,%g4,%o4 |
f41c8091 | 171 | ld [S2_PTR+16],%g4 |
ba848785 | 172 | subxcc %g3,%g1,%o5 |
f41c8091 UD |
173 | ld [S2_PTR+20],%g1 |
174 | ldd [S1_PTR+16],%g2 | |
175 | std %o4,[RES_PTR+8] | |
ba848785 | 176 | subxcc %g2,%g4,%o4 |
f41c8091 | 177 | ld [S2_PTR+24],%g4 |
ba848785 | 178 | subxcc %g3,%g1,%o5 |
f41c8091 UD |
179 | ld [S2_PTR+28],%g1 |
180 | ldd [S1_PTR+24],%g2 | |
181 | std %o4,[RES_PTR+16] | |
ba848785 | 182 | subxcc %g2,%g4,%o4 |
f41c8091 | 183 | ld [S2_PTR+32],%g4 |
ba848785 | 184 | subxcc %g3,%g1,%o5 |
f41c8091 UD |
185 | ld [S2_PTR+36],%g1 |
186 | ldd [S1_PTR+32],%g2 | |
187 | std %o4,[RES_PTR+24] | |
ba848785 | 188 | addx %g0,%g0,%o4 ! save cy in register |
f41c8091 UD |
189 | addcc SIZE,-8,SIZE |
190 | add S1_PTR,32,S1_PTR | |
191 | add S2_PTR,32,S2_PTR | |
192 | add RES_PTR,32,RES_PTR | |
193 | bge LOC(loop1b) | |
ba848785 RM |
194 | subcc %g0,%o4,%g0 ! restore cy |
195 | ||
f41c8091 UD |
196 | LOC(fin1b): |
197 | addcc SIZE,8-2,SIZE | |
198 | blt LOC(end1b) | |
ba848785 RM |
199 | subcc %g0,%o4,%g0 ! restore cy |
200 | /* Add blocks of 2 limbs until less than 2 limbs remain */ | |
f41c8091 UD |
201 | LOC(loope1b): |
202 | subxcc %g2,%g4,%o4 | |
203 | ld [S2_PTR+8],%g4 | |
ba848785 | 204 | subxcc %g3,%g1,%o5 |
f41c8091 UD |
205 | ld [S2_PTR+12],%g1 |
206 | ldd [S1_PTR+8],%g2 | |
207 | std %o4,[RES_PTR+0] | |
ba848785 | 208 | addx %g0,%g0,%o4 ! save cy in register |
f41c8091 UD |
209 | addcc SIZE,-2,SIZE |
210 | add S1_PTR,8,S1_PTR | |
211 | add S2_PTR,8,S2_PTR | |
212 | add RES_PTR,8,RES_PTR | |
213 | bge LOC(loope1b) | |
ba848785 | 214 | subcc %g0,%o4,%g0 ! restore cy |
f41c8091 UD |
215 | LOC(end1b): |
216 | subxcc %g2,%g4,%o4 | |
ba848785 | 217 | subxcc %g3,%g1,%o5 |
f41c8091 | 218 | std %o4,[RES_PTR+0] |
ba848785 RM |
219 | addx %g0,%g0,%o4 ! save cy in register |
220 | ||
f41c8091 UD |
221 | andcc SIZE,1,%g0 |
222 | be LOC(ret1b) | |
ba848785 RM |
223 | subcc %g0,%o4,%g0 ! restore cy |
224 | /* Add last limb */ | |
f41c8091 UD |
225 | ld [S2_PTR+8],%g4 |
226 | ld [S1_PTR+8],%g2 | |
ba848785 | 227 | subxcc %g2,%g4,%o4 |
f41c8091 | 228 | st %o4,[RES_PTR+8] |
ba848785 | 229 | |
f41c8091 UD |
230 | LOC(ret1b): |
231 | retl | |
ba848785 RM |
232 | addx %g0,%g0,%o0 ! return carry-out from most sign. limb |
233 | ||
234 | ! ** V2 ** | |
f41c8091 UD |
235 | /* If we come here, the alignment of S1_PTR and RES_PTR as well as the |
236 | alignment of S2_PTR and RES_PTR differ. Since there are only two ways | |
ba848785 | 237 | things can be aligned (that we care about) we now know that the alignment |
f41c8091 | 238 | of S1_PTR and S2_PTR are the same. */ |
ba848785 | 239 | |
f41c8091 UD |
240 | LOC(2): cmp SIZE,1 |
241 | be LOC(jone) | |
ba848785 | 242 | nop |
f41c8091 UD |
243 | andcc S1_PTR,4,%g0 ! S1_PTR unaligned? Side effect: cy=0 |
244 | be LOC(v2) ! if no, branch | |
ba848785 | 245 | nop |
f41c8091 UD |
246 | /* Add least significant limb separately to align S1_PTR and S2_PTR */ |
247 | ld [S1_PTR],%g4 | |
248 | add S1_PTR,4,S1_PTR | |
249 | ld [S2_PTR],%g2 | |
250 | add S2_PTR,4,S2_PTR | |
251 | add SIZE,-1,SIZE | |
ba848785 | 252 | subcc %g4,%g2,%o4 |
f41c8091 UD |
253 | st %o4,[RES_PTR] |
254 | add RES_PTR,4,RES_PTR | |
ba848785 | 255 | |
f41c8091 UD |
256 | LOC(v2): |
257 | addx %g0,%g0,%o4 ! save cy in register | |
258 | addcc SIZE,-8,SIZE | |
259 | blt LOC(fin2) | |
ba848785 RM |
260 | subcc %g0,%o4,%g0 ! restore cy |
261 | /* Add blocks of 8 limbs until less than 8 limbs remain */ | |
f41c8091 UD |
262 | LOC(loop2): |
263 | ldd [S1_PTR+0],%g2 | |
264 | ldd [S2_PTR+0],%o4 | |
ba848785 | 265 | subxcc %g2,%o4,%g2 |
f41c8091 | 266 | st %g2,[RES_PTR+0] |
ba848785 | 267 | subxcc %g3,%o5,%g3 |
f41c8091 UD |
268 | st %g3,[RES_PTR+4] |
269 | ldd [S1_PTR+8],%g2 | |
270 | ldd [S2_PTR+8],%o4 | |
ba848785 | 271 | subxcc %g2,%o4,%g2 |
f41c8091 | 272 | st %g2,[RES_PTR+8] |
ba848785 | 273 | subxcc %g3,%o5,%g3 |
f41c8091 UD |
274 | st %g3,[RES_PTR+12] |
275 | ldd [S1_PTR+16],%g2 | |
276 | ldd [S2_PTR+16],%o4 | |
ba848785 | 277 | subxcc %g2,%o4,%g2 |
f41c8091 | 278 | st %g2,[RES_PTR+16] |
ba848785 | 279 | subxcc %g3,%o5,%g3 |
f41c8091 UD |
280 | st %g3,[RES_PTR+20] |
281 | ldd [S1_PTR+24],%g2 | |
282 | ldd [S2_PTR+24],%o4 | |
ba848785 | 283 | subxcc %g2,%o4,%g2 |
f41c8091 | 284 | st %g2,[RES_PTR+24] |
ba848785 | 285 | subxcc %g3,%o5,%g3 |
f41c8091 | 286 | st %g3,[RES_PTR+28] |
ba848785 | 287 | addx %g0,%g0,%o4 ! save cy in register |
f41c8091 UD |
288 | addcc SIZE,-8,SIZE |
289 | add S1_PTR,32,S1_PTR | |
290 | add S2_PTR,32,S2_PTR | |
291 | add RES_PTR,32,RES_PTR | |
292 | bge LOC(loop2) | |
ba848785 RM |
293 | subcc %g0,%o4,%g0 ! restore cy |
294 | ||
f41c8091 UD |
295 | LOC(fin2): |
296 | addcc SIZE,8-2,SIZE | |
297 | blt LOC(end2) | |
ba848785 | 298 | subcc %g0,%o4,%g0 ! restore cy |
f41c8091 UD |
299 | LOC(loope2): |
300 | ldd [S1_PTR+0],%g2 | |
301 | ldd [S2_PTR+0],%o4 | |
ba848785 | 302 | subxcc %g2,%o4,%g2 |
f41c8091 | 303 | st %g2,[RES_PTR+0] |
ba848785 | 304 | subxcc %g3,%o5,%g3 |
f41c8091 | 305 | st %g3,[RES_PTR+4] |
ba848785 | 306 | addx %g0,%g0,%o4 ! save cy in register |
f41c8091 UD |
307 | addcc SIZE,-2,SIZE |
308 | add S1_PTR,8,S1_PTR | |
309 | add S2_PTR,8,S2_PTR | |
310 | add RES_PTR,8,RES_PTR | |
311 | bge LOC(loope2) | |
ba848785 | 312 | subcc %g0,%o4,%g0 ! restore cy |
f41c8091 UD |
313 | LOC(end2): |
314 | andcc SIZE,1,%g0 | |
315 | be LOC(ret2) | |
ba848785 RM |
316 | subcc %g0,%o4,%g0 ! restore cy |
317 | /* Add last limb */ | |
f41c8091 UD |
318 | LOC(jone): |
319 | ld [S1_PTR],%g4 | |
320 | ld [S2_PTR],%g2 | |
ba848785 | 321 | subxcc %g4,%g2,%o4 |
f41c8091 | 322 | st %o4,[RES_PTR] |
ba848785 | 323 | |
f41c8091 UD |
324 | LOC(ret2): |
325 | retl | |
ba848785 | 326 | addx %g0,%g0,%o0 ! return carry-out from most sign. limb |
f41c8091 UD |
327 | |
328 | END(__mpn_sub_n) |