]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/sparc/sparc32/sub_n.S
Use <> for include of kernel-features.h.
[thirdparty/glibc.git] / sysdeps / sparc / sparc32 / sub_n.S
CommitLineData
6b628d36 1! SPARC __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
28f540f4 2! store difference in a third limb vector.
f41c8091 3!
6b628d36 4! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
f41c8091 5!
28f540f4 6! This file is part of the GNU MP Library.
f41c8091 7!
28f540f4 8! The GNU MP Library is free software; you can redistribute it and/or modify
6d84f89a
AJ
9! it under the terms of the GNU Lesser General Public License as published by
10! the Free Software Foundation; either version 2.1 of the License, or (at your
28f540f4 11! option) any later version.
f41c8091 12!
28f540f4
RM
13! The GNU MP Library is distributed in the hope that it will be useful, but
14! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
6d84f89a 15! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
28f540f4 16! License for more details.
f41c8091 17!
6d84f89a 18! You should have received a copy of the GNU Lesser General Public License
28f540f4 19! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
b928942e
RM
20! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21! MA 02111-1307, USA.
28f540f4
RM
22
23
24! INPUT PARAMETERS
f41c8091
UD
25#define RES_PTR %o0
26#define S1_PTR %o1
27#define S2_PTR %o2
28#define SIZE %o3
28f540f4 29
f41c8091 30#include <sysdep.h>
28f540f4 31
f41c8091
UD
32ENTRY(__mpn_sub_n)
33 xor S2_PTR,RES_PTR,%g1
ba848785 34 andcc %g1,4,%g0
f41c8091 35 bne LOC(1) ! branch if alignment differs
ba848785
RM
36 nop
37! ** V1a **
f41c8091
UD
38 andcc RES_PTR,4,%g0 ! RES_PTR unaligned? Side effect: cy=0
39 be LOC(v1) ! if no, branch
ba848785 40 nop
f41c8091
UD
41/* Add least significant limb separately to align RES_PTR and S2_PTR */
42 ld [S1_PTR],%g4
43 add S1_PTR,4,S1_PTR
44 ld [S2_PTR],%g2
45 add S2_PTR,4,S2_PTR
46 add SIZE,-1,SIZE
ba848785 47 subcc %g4,%g2,%o4
f41c8091
UD
48 st %o4,[RES_PTR]
49 add RES_PTR,4,RES_PTR
50LOC(v1):
51 addx %g0,%g0,%o4 ! save cy in register
52 cmp SIZE,2 ! if SIZE < 2 ...
53 bl LOC(end2) ! ... branch to tail code
ba848785
RM
54 subcc %g0,%o4,%g0 ! restore cy
55
f41c8091
UD
56 ld [S1_PTR+0],%g4
57 addcc SIZE,-10,SIZE
58 ld [S1_PTR+4],%g1
59 ldd [S2_PTR+0],%g2
60 blt LOC(fin1)
ba848785
RM
61 subcc %g0,%o4,%g0 ! restore cy
62/* Add blocks of 8 limbs until less than 8 limbs remain */
f41c8091
UD
63LOC(loop1):
64 subxcc %g4,%g2,%o4
65 ld [S1_PTR+8],%g4
ba848785 66 subxcc %g1,%g3,%o5
f41c8091
UD
67 ld [S1_PTR+12],%g1
68 ldd [S2_PTR+8],%g2
69 std %o4,[RES_PTR+0]
ba848785 70 subxcc %g4,%g2,%o4
f41c8091 71 ld [S1_PTR+16],%g4
ba848785 72 subxcc %g1,%g3,%o5
f41c8091
UD
73 ld [S1_PTR+20],%g1
74 ldd [S2_PTR+16],%g2
75 std %o4,[RES_PTR+8]
ba848785 76 subxcc %g4,%g2,%o4
f41c8091 77 ld [S1_PTR+24],%g4
ba848785 78 subxcc %g1,%g3,%o5
f41c8091
UD
79 ld [S1_PTR+28],%g1
80 ldd [S2_PTR+24],%g2
81 std %o4,[RES_PTR+16]
ba848785 82 subxcc %g4,%g2,%o4
f41c8091 83 ld [S1_PTR+32],%g4
ba848785 84 subxcc %g1,%g3,%o5
f41c8091
UD
85 ld [S1_PTR+36],%g1
86 ldd [S2_PTR+32],%g2
87 std %o4,[RES_PTR+24]
ba848785 88 addx %g0,%g0,%o4 ! save cy in register
f41c8091
UD
89 addcc SIZE,-8,SIZE
90 add S1_PTR,32,S1_PTR
91 add S2_PTR,32,S2_PTR
92 add RES_PTR,32,RES_PTR
93 bge LOC(loop1)
ba848785
RM
94 subcc %g0,%o4,%g0 ! restore cy
95
f41c8091
UD
96LOC(fin1):
97 addcc SIZE,8-2,SIZE
98 blt LOC(end1)
ba848785
RM
99 subcc %g0,%o4,%g0 ! restore cy
100/* Add blocks of 2 limbs until less than 2 limbs remain */
f41c8091
UD
101LOC(loope1):
102 subxcc %g4,%g2,%o4
103 ld [S1_PTR+8],%g4
ba848785 104 subxcc %g1,%g3,%o5
f41c8091
UD
105 ld [S1_PTR+12],%g1
106 ldd [S2_PTR+8],%g2
107 std %o4,[RES_PTR+0]
ba848785 108 addx %g0,%g0,%o4 ! save cy in register
f41c8091
UD
109 addcc SIZE,-2,SIZE
110 add S1_PTR,8,S1_PTR
111 add S2_PTR,8,S2_PTR
112 add RES_PTR,8,RES_PTR
113 bge LOC(loope1)
ba848785 114 subcc %g0,%o4,%g0 ! restore cy
f41c8091
UD
115LOC(end1):
116 subxcc %g4,%g2,%o4
ba848785 117 subxcc %g1,%g3,%o5
f41c8091 118 std %o4,[RES_PTR+0]
ba848785
RM
119 addx %g0,%g0,%o4 ! save cy in register
120
f41c8091
UD
121 andcc SIZE,1,%g0
122 be LOC(ret1)
ba848785
RM
123 subcc %g0,%o4,%g0 ! restore cy
124/* Add last limb */
f41c8091
UD
125 ld [S1_PTR+8],%g4
126 ld [S2_PTR+8],%g2
ba848785 127 subxcc %g4,%g2,%o4
f41c8091 128 st %o4,[RES_PTR+8]
ba848785 129
f41c8091
UD
130LOC(ret1):
131 retl
ba848785
RM
132 addx %g0,%g0,%o0 ! return carry-out from most sign. limb
133
f41c8091 134LOC(1): xor S1_PTR,RES_PTR,%g1
ba848785 135 andcc %g1,4,%g0
f41c8091 136 bne LOC(2)
ba848785
RM
137 nop
138! ** V1b **
f41c8091
UD
139 andcc RES_PTR,4,%g0 ! RES_PTR unaligned? Side effect: cy=0
140 be LOC(v1b) ! if no, branch
ba848785 141 nop
f41c8091
UD
142/* Add least significant limb separately to align RES_PTR and S1_PTR */
143 ld [S2_PTR],%g4
144 add S2_PTR,4,S2_PTR
145 ld [S1_PTR],%g2
146 add S1_PTR,4,S1_PTR
147 add SIZE,-1,SIZE
ba848785 148 subcc %g2,%g4,%o4
f41c8091
UD
149 st %o4,[RES_PTR]
150 add RES_PTR,4,RES_PTR
151LOC(v1b):
152 addx %g0,%g0,%o4 ! save cy in register
153 cmp SIZE,2 ! if SIZE < 2 ...
154 bl LOC(end2) ! ... branch to tail code
ba848785
RM
155 subcc %g0,%o4,%g0 ! restore cy
156
f41c8091
UD
157 ld [S2_PTR+0],%g4
158 addcc SIZE,-10,SIZE
159 ld [S2_PTR+4],%g1
160 ldd [S1_PTR+0],%g2
161 blt LOC(fin1b)
ba848785
RM
162 subcc %g0,%o4,%g0 ! restore cy
163/* Add blocks of 8 limbs until less than 8 limbs remain */
f41c8091
UD
164LOC(loop1b):
165 subxcc %g2,%g4,%o4
166 ld [S2_PTR+8],%g4
ba848785 167 subxcc %g3,%g1,%o5
f41c8091
UD
168 ld [S2_PTR+12],%g1
169 ldd [S1_PTR+8],%g2
170 std %o4,[RES_PTR+0]
ba848785 171 subxcc %g2,%g4,%o4
f41c8091 172 ld [S2_PTR+16],%g4
ba848785 173 subxcc %g3,%g1,%o5
f41c8091
UD
174 ld [S2_PTR+20],%g1
175 ldd [S1_PTR+16],%g2
176 std %o4,[RES_PTR+8]
ba848785 177 subxcc %g2,%g4,%o4
f41c8091 178 ld [S2_PTR+24],%g4
ba848785 179 subxcc %g3,%g1,%o5
f41c8091
UD
180 ld [S2_PTR+28],%g1
181 ldd [S1_PTR+24],%g2
182 std %o4,[RES_PTR+16]
ba848785 183 subxcc %g2,%g4,%o4
f41c8091 184 ld [S2_PTR+32],%g4
ba848785 185 subxcc %g3,%g1,%o5
f41c8091
UD
186 ld [S2_PTR+36],%g1
187 ldd [S1_PTR+32],%g2
188 std %o4,[RES_PTR+24]
ba848785 189 addx %g0,%g0,%o4 ! save cy in register
f41c8091
UD
190 addcc SIZE,-8,SIZE
191 add S1_PTR,32,S1_PTR
192 add S2_PTR,32,S2_PTR
193 add RES_PTR,32,RES_PTR
194 bge LOC(loop1b)
ba848785
RM
195 subcc %g0,%o4,%g0 ! restore cy
196
f41c8091
UD
197LOC(fin1b):
198 addcc SIZE,8-2,SIZE
199 blt LOC(end1b)
ba848785
RM
200 subcc %g0,%o4,%g0 ! restore cy
201/* Add blocks of 2 limbs until less than 2 limbs remain */
f41c8091
UD
202LOC(loope1b):
203 subxcc %g2,%g4,%o4
204 ld [S2_PTR+8],%g4
ba848785 205 subxcc %g3,%g1,%o5
f41c8091
UD
206 ld [S2_PTR+12],%g1
207 ldd [S1_PTR+8],%g2
208 std %o4,[RES_PTR+0]
ba848785 209 addx %g0,%g0,%o4 ! save cy in register
f41c8091
UD
210 addcc SIZE,-2,SIZE
211 add S1_PTR,8,S1_PTR
212 add S2_PTR,8,S2_PTR
213 add RES_PTR,8,RES_PTR
214 bge LOC(loope1b)
ba848785 215 subcc %g0,%o4,%g0 ! restore cy
f41c8091
UD
216LOC(end1b):
217 subxcc %g2,%g4,%o4
ba848785 218 subxcc %g3,%g1,%o5
f41c8091 219 std %o4,[RES_PTR+0]
ba848785
RM
220 addx %g0,%g0,%o4 ! save cy in register
221
f41c8091
UD
222 andcc SIZE,1,%g0
223 be LOC(ret1b)
ba848785
RM
224 subcc %g0,%o4,%g0 ! restore cy
225/* Add last limb */
f41c8091
UD
226 ld [S2_PTR+8],%g4
227 ld [S1_PTR+8],%g2
ba848785 228 subxcc %g2,%g4,%o4
f41c8091 229 st %o4,[RES_PTR+8]
ba848785 230
f41c8091
UD
231LOC(ret1b):
232 retl
ba848785
RM
233 addx %g0,%g0,%o0 ! return carry-out from most sign. limb
234
235! ** V2 **
f41c8091
UD
236/* If we come here, the alignment of S1_PTR and RES_PTR as well as the
237 alignment of S2_PTR and RES_PTR differ. Since there are only two ways
ba848785 238 things can be aligned (that we care about) we now know that the alignment
f41c8091 239 of S1_PTR and S2_PTR are the same. */
ba848785 240
f41c8091
UD
241LOC(2): cmp SIZE,1
242 be LOC(jone)
ba848785 243 nop
f41c8091
UD
244 andcc S1_PTR,4,%g0 ! S1_PTR unaligned? Side effect: cy=0
245 be LOC(v2) ! if no, branch
ba848785 246 nop
f41c8091
UD
247/* Add least significant limb separately to align S1_PTR and S2_PTR */
248 ld [S1_PTR],%g4
249 add S1_PTR,4,S1_PTR
250 ld [S2_PTR],%g2
251 add S2_PTR,4,S2_PTR
252 add SIZE,-1,SIZE
ba848785 253 subcc %g4,%g2,%o4
f41c8091
UD
254 st %o4,[RES_PTR]
255 add RES_PTR,4,RES_PTR
ba848785 256
f41c8091
UD
257LOC(v2):
258 addx %g0,%g0,%o4 ! save cy in register
259 addcc SIZE,-8,SIZE
260 blt LOC(fin2)
ba848785
RM
261 subcc %g0,%o4,%g0 ! restore cy
262/* Add blocks of 8 limbs until less than 8 limbs remain */
f41c8091
UD
263LOC(loop2):
264 ldd [S1_PTR+0],%g2
265 ldd [S2_PTR+0],%o4
ba848785 266 subxcc %g2,%o4,%g2
f41c8091 267 st %g2,[RES_PTR+0]
ba848785 268 subxcc %g3,%o5,%g3
f41c8091
UD
269 st %g3,[RES_PTR+4]
270 ldd [S1_PTR+8],%g2
271 ldd [S2_PTR+8],%o4
ba848785 272 subxcc %g2,%o4,%g2
f41c8091 273 st %g2,[RES_PTR+8]
ba848785 274 subxcc %g3,%o5,%g3
f41c8091
UD
275 st %g3,[RES_PTR+12]
276 ldd [S1_PTR+16],%g2
277 ldd [S2_PTR+16],%o4
ba848785 278 subxcc %g2,%o4,%g2
f41c8091 279 st %g2,[RES_PTR+16]
ba848785 280 subxcc %g3,%o5,%g3
f41c8091
UD
281 st %g3,[RES_PTR+20]
282 ldd [S1_PTR+24],%g2
283 ldd [S2_PTR+24],%o4
ba848785 284 subxcc %g2,%o4,%g2
f41c8091 285 st %g2,[RES_PTR+24]
ba848785 286 subxcc %g3,%o5,%g3
f41c8091 287 st %g3,[RES_PTR+28]
ba848785 288 addx %g0,%g0,%o4 ! save cy in register
f41c8091
UD
289 addcc SIZE,-8,SIZE
290 add S1_PTR,32,S1_PTR
291 add S2_PTR,32,S2_PTR
292 add RES_PTR,32,RES_PTR
293 bge LOC(loop2)
ba848785
RM
294 subcc %g0,%o4,%g0 ! restore cy
295
f41c8091
UD
296LOC(fin2):
297 addcc SIZE,8-2,SIZE
298 blt LOC(end2)
ba848785 299 subcc %g0,%o4,%g0 ! restore cy
f41c8091
UD
300LOC(loope2):
301 ldd [S1_PTR+0],%g2
302 ldd [S2_PTR+0],%o4
ba848785 303 subxcc %g2,%o4,%g2
f41c8091 304 st %g2,[RES_PTR+0]
ba848785 305 subxcc %g3,%o5,%g3
f41c8091 306 st %g3,[RES_PTR+4]
ba848785 307 addx %g0,%g0,%o4 ! save cy in register
f41c8091
UD
308 addcc SIZE,-2,SIZE
309 add S1_PTR,8,S1_PTR
310 add S2_PTR,8,S2_PTR
311 add RES_PTR,8,RES_PTR
312 bge LOC(loope2)
ba848785 313 subcc %g0,%o4,%g0 ! restore cy
f41c8091
UD
314LOC(end2):
315 andcc SIZE,1,%g0
316 be LOC(ret2)
ba848785
RM
317 subcc %g0,%o4,%g0 ! restore cy
318/* Add last limb */
f41c8091
UD
319LOC(jone):
320 ld [S1_PTR],%g4
321 ld [S2_PTR],%g2
ba848785 322 subxcc %g4,%g2,%o4
f41c8091 323 st %o4,[RES_PTR]
ba848785 324
f41c8091
UD
325LOC(ret2):
326 retl
ba848785 327 addx %g0,%g0,%o0 ! return carry-out from most sign. limb
f41c8091
UD
328
329END(__mpn_sub_n)