]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/sparc/sparc32/sub_n.S
Prefer https to http for gnu.org and fsf.org URLs
[thirdparty/glibc.git] / sysdeps / sparc / sparc32 / sub_n.S
CommitLineData
6b628d36 1! SPARC __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
28f540f4 2! store difference in a third limb vector.
f41c8091 3!
04277e02 4! Copyright (C) 1995-2019 Free Software Foundation, Inc.
f41c8091 5!
28f540f4 6! This file is part of the GNU MP Library.
f41c8091 7!
28f540f4 8! The GNU MP Library is free software; you can redistribute it and/or modify
6d84f89a
AJ
9! it under the terms of the GNU Lesser General Public License as published by
10! the Free Software Foundation; either version 2.1 of the License, or (at your
28f540f4 11! option) any later version.
f41c8091 12!
28f540f4
RM
13! The GNU MP Library is distributed in the hope that it will be useful, but
14! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
6d84f89a 15! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
28f540f4 16! License for more details.
f41c8091 17!
6d84f89a 18! You should have received a copy of the GNU Lesser General Public License
59ba27a6 19! along with the GNU MP Library; see the file COPYING.LIB. If not,
5a82c748 20! see <https://www.gnu.org/licenses/>.
28f540f4
RM
21
22
23! INPUT PARAMETERS
f41c8091
UD
24#define RES_PTR %o0
25#define S1_PTR %o1
26#define S2_PTR %o2
27#define SIZE %o3
28f540f4 28
f41c8091 29#include <sysdep.h>
28f540f4 30
f41c8091
UD
31ENTRY(__mpn_sub_n)
32 xor S2_PTR,RES_PTR,%g1
ba848785 33 andcc %g1,4,%g0
f41c8091 34 bne LOC(1) ! branch if alignment differs
ba848785
RM
35 nop
36! ** V1a **
f41c8091
UD
37 andcc RES_PTR,4,%g0 ! RES_PTR unaligned? Side effect: cy=0
38 be LOC(v1) ! if no, branch
ba848785 39 nop
f41c8091
UD
40/* Add least significant limb separately to align RES_PTR and S2_PTR */
41 ld [S1_PTR],%g4
42 add S1_PTR,4,S1_PTR
43 ld [S2_PTR],%g2
44 add S2_PTR,4,S2_PTR
45 add SIZE,-1,SIZE
ba848785 46 subcc %g4,%g2,%o4
f41c8091
UD
47 st %o4,[RES_PTR]
48 add RES_PTR,4,RES_PTR
49LOC(v1):
50 addx %g0,%g0,%o4 ! save cy in register
51 cmp SIZE,2 ! if SIZE < 2 ...
52 bl LOC(end2) ! ... branch to tail code
ba848785
RM
53 subcc %g0,%o4,%g0 ! restore cy
54
f41c8091
UD
55 ld [S1_PTR+0],%g4
56 addcc SIZE,-10,SIZE
57 ld [S1_PTR+4],%g1
58 ldd [S2_PTR+0],%g2
59 blt LOC(fin1)
ba848785
RM
60 subcc %g0,%o4,%g0 ! restore cy
61/* Add blocks of 8 limbs until less than 8 limbs remain */
f41c8091
UD
62LOC(loop1):
63 subxcc %g4,%g2,%o4
64 ld [S1_PTR+8],%g4
ba848785 65 subxcc %g1,%g3,%o5
f41c8091
UD
66 ld [S1_PTR+12],%g1
67 ldd [S2_PTR+8],%g2
68 std %o4,[RES_PTR+0]
ba848785 69 subxcc %g4,%g2,%o4
f41c8091 70 ld [S1_PTR+16],%g4
ba848785 71 subxcc %g1,%g3,%o5
f41c8091
UD
72 ld [S1_PTR+20],%g1
73 ldd [S2_PTR+16],%g2
74 std %o4,[RES_PTR+8]
ba848785 75 subxcc %g4,%g2,%o4
f41c8091 76 ld [S1_PTR+24],%g4
ba848785 77 subxcc %g1,%g3,%o5
f41c8091
UD
78 ld [S1_PTR+28],%g1
79 ldd [S2_PTR+24],%g2
80 std %o4,[RES_PTR+16]
ba848785 81 subxcc %g4,%g2,%o4
f41c8091 82 ld [S1_PTR+32],%g4
ba848785 83 subxcc %g1,%g3,%o5
f41c8091
UD
84 ld [S1_PTR+36],%g1
85 ldd [S2_PTR+32],%g2
86 std %o4,[RES_PTR+24]
ba848785 87 addx %g0,%g0,%o4 ! save cy in register
f41c8091
UD
88 addcc SIZE,-8,SIZE
89 add S1_PTR,32,S1_PTR
90 add S2_PTR,32,S2_PTR
91 add RES_PTR,32,RES_PTR
92 bge LOC(loop1)
ba848785
RM
93 subcc %g0,%o4,%g0 ! restore cy
94
f41c8091
UD
95LOC(fin1):
96 addcc SIZE,8-2,SIZE
97 blt LOC(end1)
ba848785
RM
98 subcc %g0,%o4,%g0 ! restore cy
99/* Add blocks of 2 limbs until less than 2 limbs remain */
f41c8091
UD
100LOC(loope1):
101 subxcc %g4,%g2,%o4
102 ld [S1_PTR+8],%g4
ba848785 103 subxcc %g1,%g3,%o5
f41c8091
UD
104 ld [S1_PTR+12],%g1
105 ldd [S2_PTR+8],%g2
106 std %o4,[RES_PTR+0]
ba848785 107 addx %g0,%g0,%o4 ! save cy in register
f41c8091
UD
108 addcc SIZE,-2,SIZE
109 add S1_PTR,8,S1_PTR
110 add S2_PTR,8,S2_PTR
111 add RES_PTR,8,RES_PTR
112 bge LOC(loope1)
ba848785 113 subcc %g0,%o4,%g0 ! restore cy
f41c8091
UD
114LOC(end1):
115 subxcc %g4,%g2,%o4
ba848785 116 subxcc %g1,%g3,%o5
f41c8091 117 std %o4,[RES_PTR+0]
ba848785
RM
118 addx %g0,%g0,%o4 ! save cy in register
119
f41c8091
UD
120 andcc SIZE,1,%g0
121 be LOC(ret1)
ba848785
RM
122 subcc %g0,%o4,%g0 ! restore cy
123/* Add last limb */
f41c8091
UD
124 ld [S1_PTR+8],%g4
125 ld [S2_PTR+8],%g2
ba848785 126 subxcc %g4,%g2,%o4
f41c8091 127 st %o4,[RES_PTR+8]
ba848785 128
f41c8091
UD
129LOC(ret1):
130 retl
ba848785
RM
131 addx %g0,%g0,%o0 ! return carry-out from most sign. limb
132
f41c8091 133LOC(1): xor S1_PTR,RES_PTR,%g1
ba848785 134 andcc %g1,4,%g0
f41c8091 135 bne LOC(2)
ba848785
RM
136 nop
137! ** V1b **
f41c8091
UD
138 andcc RES_PTR,4,%g0 ! RES_PTR unaligned? Side effect: cy=0
139 be LOC(v1b) ! if no, branch
ba848785 140 nop
f41c8091
UD
141/* Add least significant limb separately to align RES_PTR and S1_PTR */
142 ld [S2_PTR],%g4
143 add S2_PTR,4,S2_PTR
144 ld [S1_PTR],%g2
145 add S1_PTR,4,S1_PTR
146 add SIZE,-1,SIZE
ba848785 147 subcc %g2,%g4,%o4
f41c8091
UD
148 st %o4,[RES_PTR]
149 add RES_PTR,4,RES_PTR
150LOC(v1b):
151 addx %g0,%g0,%o4 ! save cy in register
152 cmp SIZE,2 ! if SIZE < 2 ...
153 bl LOC(end2) ! ... branch to tail code
ba848785
RM
154 subcc %g0,%o4,%g0 ! restore cy
155
f41c8091
UD
156 ld [S2_PTR+0],%g4
157 addcc SIZE,-10,SIZE
158 ld [S2_PTR+4],%g1
159 ldd [S1_PTR+0],%g2
160 blt LOC(fin1b)
ba848785
RM
161 subcc %g0,%o4,%g0 ! restore cy
162/* Add blocks of 8 limbs until less than 8 limbs remain */
f41c8091
UD
163LOC(loop1b):
164 subxcc %g2,%g4,%o4
165 ld [S2_PTR+8],%g4
ba848785 166 subxcc %g3,%g1,%o5
f41c8091
UD
167 ld [S2_PTR+12],%g1
168 ldd [S1_PTR+8],%g2
169 std %o4,[RES_PTR+0]
ba848785 170 subxcc %g2,%g4,%o4
f41c8091 171 ld [S2_PTR+16],%g4
ba848785 172 subxcc %g3,%g1,%o5
f41c8091
UD
173 ld [S2_PTR+20],%g1
174 ldd [S1_PTR+16],%g2
175 std %o4,[RES_PTR+8]
ba848785 176 subxcc %g2,%g4,%o4
f41c8091 177 ld [S2_PTR+24],%g4
ba848785 178 subxcc %g3,%g1,%o5
f41c8091
UD
179 ld [S2_PTR+28],%g1
180 ldd [S1_PTR+24],%g2
181 std %o4,[RES_PTR+16]
ba848785 182 subxcc %g2,%g4,%o4
f41c8091 183 ld [S2_PTR+32],%g4
ba848785 184 subxcc %g3,%g1,%o5
f41c8091
UD
185 ld [S2_PTR+36],%g1
186 ldd [S1_PTR+32],%g2
187 std %o4,[RES_PTR+24]
ba848785 188 addx %g0,%g0,%o4 ! save cy in register
f41c8091
UD
189 addcc SIZE,-8,SIZE
190 add S1_PTR,32,S1_PTR
191 add S2_PTR,32,S2_PTR
192 add RES_PTR,32,RES_PTR
193 bge LOC(loop1b)
ba848785
RM
194 subcc %g0,%o4,%g0 ! restore cy
195
f41c8091
UD
196LOC(fin1b):
197 addcc SIZE,8-2,SIZE
198 blt LOC(end1b)
ba848785
RM
199 subcc %g0,%o4,%g0 ! restore cy
200/* Add blocks of 2 limbs until less than 2 limbs remain */
f41c8091
UD
201LOC(loope1b):
202 subxcc %g2,%g4,%o4
203 ld [S2_PTR+8],%g4
ba848785 204 subxcc %g3,%g1,%o5
f41c8091
UD
205 ld [S2_PTR+12],%g1
206 ldd [S1_PTR+8],%g2
207 std %o4,[RES_PTR+0]
ba848785 208 addx %g0,%g0,%o4 ! save cy in register
f41c8091
UD
209 addcc SIZE,-2,SIZE
210 add S1_PTR,8,S1_PTR
211 add S2_PTR,8,S2_PTR
212 add RES_PTR,8,RES_PTR
213 bge LOC(loope1b)
ba848785 214 subcc %g0,%o4,%g0 ! restore cy
f41c8091
UD
215LOC(end1b):
216 subxcc %g2,%g4,%o4
ba848785 217 subxcc %g3,%g1,%o5
f41c8091 218 std %o4,[RES_PTR+0]
ba848785
RM
219 addx %g0,%g0,%o4 ! save cy in register
220
f41c8091
UD
221 andcc SIZE,1,%g0
222 be LOC(ret1b)
ba848785
RM
223 subcc %g0,%o4,%g0 ! restore cy
224/* Add last limb */
f41c8091
UD
225 ld [S2_PTR+8],%g4
226 ld [S1_PTR+8],%g2
ba848785 227 subxcc %g2,%g4,%o4
f41c8091 228 st %o4,[RES_PTR+8]
ba848785 229
f41c8091
UD
230LOC(ret1b):
231 retl
ba848785
RM
232 addx %g0,%g0,%o0 ! return carry-out from most sign. limb
233
234! ** V2 **
f41c8091
UD
235/* If we come here, the alignment of S1_PTR and RES_PTR as well as the
236 alignment of S2_PTR and RES_PTR differ. Since there are only two ways
ba848785 237 things can be aligned (that we care about) we now know that the alignment
f41c8091 238 of S1_PTR and S2_PTR are the same. */
ba848785 239
f41c8091
UD
240LOC(2): cmp SIZE,1
241 be LOC(jone)
ba848785 242 nop
f41c8091
UD
243 andcc S1_PTR,4,%g0 ! S1_PTR unaligned? Side effect: cy=0
244 be LOC(v2) ! if no, branch
ba848785 245 nop
f41c8091
UD
246/* Add least significant limb separately to align S1_PTR and S2_PTR */
247 ld [S1_PTR],%g4
248 add S1_PTR,4,S1_PTR
249 ld [S2_PTR],%g2
250 add S2_PTR,4,S2_PTR
251 add SIZE,-1,SIZE
ba848785 252 subcc %g4,%g2,%o4
f41c8091
UD
253 st %o4,[RES_PTR]
254 add RES_PTR,4,RES_PTR
ba848785 255
f41c8091
UD
256LOC(v2):
257 addx %g0,%g0,%o4 ! save cy in register
258 addcc SIZE,-8,SIZE
259 blt LOC(fin2)
ba848785
RM
260 subcc %g0,%o4,%g0 ! restore cy
261/* Add blocks of 8 limbs until less than 8 limbs remain */
f41c8091
UD
262LOC(loop2):
263 ldd [S1_PTR+0],%g2
264 ldd [S2_PTR+0],%o4
ba848785 265 subxcc %g2,%o4,%g2
f41c8091 266 st %g2,[RES_PTR+0]
ba848785 267 subxcc %g3,%o5,%g3
f41c8091
UD
268 st %g3,[RES_PTR+4]
269 ldd [S1_PTR+8],%g2
270 ldd [S2_PTR+8],%o4
ba848785 271 subxcc %g2,%o4,%g2
f41c8091 272 st %g2,[RES_PTR+8]
ba848785 273 subxcc %g3,%o5,%g3
f41c8091
UD
274 st %g3,[RES_PTR+12]
275 ldd [S1_PTR+16],%g2
276 ldd [S2_PTR+16],%o4
ba848785 277 subxcc %g2,%o4,%g2
f41c8091 278 st %g2,[RES_PTR+16]
ba848785 279 subxcc %g3,%o5,%g3
f41c8091
UD
280 st %g3,[RES_PTR+20]
281 ldd [S1_PTR+24],%g2
282 ldd [S2_PTR+24],%o4
ba848785 283 subxcc %g2,%o4,%g2
f41c8091 284 st %g2,[RES_PTR+24]
ba848785 285 subxcc %g3,%o5,%g3
f41c8091 286 st %g3,[RES_PTR+28]
ba848785 287 addx %g0,%g0,%o4 ! save cy in register
f41c8091
UD
288 addcc SIZE,-8,SIZE
289 add S1_PTR,32,S1_PTR
290 add S2_PTR,32,S2_PTR
291 add RES_PTR,32,RES_PTR
292 bge LOC(loop2)
ba848785
RM
293 subcc %g0,%o4,%g0 ! restore cy
294
f41c8091
UD
295LOC(fin2):
296 addcc SIZE,8-2,SIZE
297 blt LOC(end2)
ba848785 298 subcc %g0,%o4,%g0 ! restore cy
f41c8091
UD
299LOC(loope2):
300 ldd [S1_PTR+0],%g2
301 ldd [S2_PTR+0],%o4
ba848785 302 subxcc %g2,%o4,%g2
f41c8091 303 st %g2,[RES_PTR+0]
ba848785 304 subxcc %g3,%o5,%g3
f41c8091 305 st %g3,[RES_PTR+4]
ba848785 306 addx %g0,%g0,%o4 ! save cy in register
f41c8091
UD
307 addcc SIZE,-2,SIZE
308 add S1_PTR,8,S1_PTR
309 add S2_PTR,8,S2_PTR
310 add RES_PTR,8,RES_PTR
311 bge LOC(loope2)
ba848785 312 subcc %g0,%o4,%g0 ! restore cy
f41c8091
UD
313LOC(end2):
314 andcc SIZE,1,%g0
315 be LOC(ret2)
ba848785
RM
316 subcc %g0,%o4,%g0 ! restore cy
317/* Add last limb */
f41c8091
UD
318LOC(jone):
319 ld [S1_PTR],%g4
320 ld [S2_PTR],%g2
ba848785 321 subxcc %g4,%g2,%o4
f41c8091 322 st %o4,[RES_PTR]
ba848785 323
f41c8091
UD
324LOC(ret2):
325 retl
ba848785 326 addx %g0,%g0,%o0 ! return carry-out from most sign. limb
f41c8091
UD
327
328END(__mpn_sub_n)