]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/sparc/sparc32/add_n.S
Replace FSF snail mail address with URLs.
[thirdparty/glibc.git] / sysdeps / sparc / sparc32 / add_n.S
CommitLineData
6b628d36 1! SPARC __mpn_add_n -- Add two limb vectors of the same length > 0 and store
28f540f4 2! sum in a third limb vector.
f41c8091
UD
3!
4! Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
5!
28f540f4 6! This file is part of the GNU MP Library.
f41c8091 7!
28f540f4 8! The GNU MP Library is free software; you can redistribute it and/or modify
6d84f89a
AJ
9! it under the terms of the GNU Lesser General Public License as published by
10! the Free Software Foundation; either version 2.1 of the License, or (at your
28f540f4 11! option) any later version.
f41c8091 12!
28f540f4
RM
13! The GNU MP Library is distributed in the hope that it will be useful, but
14! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
6d84f89a 15! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
28f540f4 16! License for more details.
f41c8091 17!
6d84f89a 18! You should have received a copy of the GNU Lesser General Public License
59ba27a6
PE
19! along with the GNU MP Library; see the file COPYING.LIB. If not,
20! see <http://www.gnu.org/licenses/>.
28f540f4
RM
21
22
23! INPUT PARAMETERS
f41c8091
UD
24#define RES_PTR %o0
25#define S1_PTR %o1
26#define S2_PTR %o2
27#define SIZE %o3
28f540f4 28
f41c8091 29#include <sysdep.h>
28f540f4 30
f41c8091
UD
31ENTRY(__mpn_add_n)
32 xor S2_PTR,RES_PTR,%g1
ba848785 33 andcc %g1,4,%g0
f41c8091
UD
34 bne LOC(1) ! branch if alignment differs
35 nop
6b628d36 36! ** V1a **
f41c8091
UD
37LOC(0): andcc RES_PTR,4,%g0 ! RES_PTR unaligned? Side effect: cy=0
38 be LOC(v1) ! if no, branch
39 nop
40/* Add least significant limb separately to align RES_PTR and S2_PTR */
41 ld [S1_PTR],%g4
42 add S1_PTR,4,S1_PTR
43 ld [S2_PTR],%g2
44 add S2_PTR,4,S2_PTR
45 add SIZE,-1,SIZE
ba848785 46 addcc %g4,%g2,%o4
f41c8091
UD
47 st %o4,[RES_PTR]
48 add RES_PTR,4,RES_PTR
49LOC(v1):
50 addx %g0,%g0,%o4 ! save cy in register
51 cmp SIZE,2 ! if SIZE < 2 ...
52 bl LOC(end2) ! ... branch to tail code
6b628d36 53 subcc %g0,%o4,%g0 ! restore cy
ba848785 54
f41c8091
UD
55 ld [S1_PTR+0],%g4
56 addcc SIZE,-10,SIZE
57 ld [S1_PTR+4],%g1
58 ldd [S2_PTR+0],%g2
59 blt LOC(fin1)
ba848785
RM
60 subcc %g0,%o4,%g0 ! restore cy
61/* Add blocks of 8 limbs until less than 8 limbs remain */
f41c8091
UD
62LOC(loop1):
63 addxcc %g4,%g2,%o4
64 ld [S1_PTR+8],%g4
ba848785 65 addxcc %g1,%g3,%o5
f41c8091
UD
66 ld [S1_PTR+12],%g1
67 ldd [S2_PTR+8],%g2
68 std %o4,[RES_PTR+0]
ba848785 69 addxcc %g4,%g2,%o4
f41c8091 70 ld [S1_PTR+16],%g4
ba848785 71 addxcc %g1,%g3,%o5
f41c8091
UD
72 ld [S1_PTR+20],%g1
73 ldd [S2_PTR+16],%g2
74 std %o4,[RES_PTR+8]
ba848785 75 addxcc %g4,%g2,%o4
f41c8091 76 ld [S1_PTR+24],%g4
ba848785 77 addxcc %g1,%g3,%o5
f41c8091
UD
78 ld [S1_PTR+28],%g1
79 ldd [S2_PTR+24],%g2
80 std %o4,[RES_PTR+16]
ba848785 81 addxcc %g4,%g2,%o4
f41c8091 82 ld [S1_PTR+32],%g4
ba848785 83 addxcc %g1,%g3,%o5
f41c8091
UD
84 ld [S1_PTR+36],%g1
85 ldd [S2_PTR+32],%g2
86 std %o4,[RES_PTR+24]
ba848785 87 addx %g0,%g0,%o4 ! save cy in register
f41c8091
UD
88 addcc SIZE,-8,SIZE
89 add S1_PTR,32,S1_PTR
90 add S2_PTR,32,S2_PTR
91 add RES_PTR,32,RES_PTR
92 bge LOC(loop1)
ba848785
RM
93 subcc %g0,%o4,%g0 ! restore cy
94
f41c8091
UD
95LOC(fin1):
96 addcc SIZE,8-2,SIZE
97 blt LOC(end1)
ba848785
RM
98 subcc %g0,%o4,%g0 ! restore cy
99/* Add blocks of 2 limbs until less than 2 limbs remain */
f41c8091
UD
100LOC(loope1):
101 addxcc %g4,%g2,%o4
102 ld [S1_PTR+8],%g4
ba848785 103 addxcc %g1,%g3,%o5
f41c8091
UD
104 ld [S1_PTR+12],%g1
105 ldd [S2_PTR+8],%g2
106 std %o4,[RES_PTR+0]
ba848785 107 addx %g0,%g0,%o4 ! save cy in register
f41c8091
UD
108 addcc SIZE,-2,SIZE
109 add S1_PTR,8,S1_PTR
110 add S2_PTR,8,S2_PTR
111 add RES_PTR,8,RES_PTR
112 bge LOC(loope1)
ba848785 113 subcc %g0,%o4,%g0 ! restore cy
f41c8091
UD
114LOC(end1):
115 addxcc %g4,%g2,%o4
ba848785 116 addxcc %g1,%g3,%o5
f41c8091 117 std %o4,[RES_PTR+0]
ba848785
RM
118 addx %g0,%g0,%o4 ! save cy in register
119
f41c8091
UD
120 andcc SIZE,1,%g0
121 be LOC(ret1)
ba848785
RM
122 subcc %g0,%o4,%g0 ! restore cy
123/* Add last limb */
f41c8091
UD
124 ld [S1_PTR+8],%g4
125 ld [S2_PTR+8],%g2
ba848785 126 addxcc %g4,%g2,%o4
f41c8091 127 st %o4,[RES_PTR+8]
ba848785 128
f41c8091
UD
129LOC(ret1):
130 retl
ba848785
RM
131 addx %g0,%g0,%o0 ! return carry-out from most sign. limb
132
f41c8091 133LOC(1): xor S1_PTR,RES_PTR,%g1
ba848785 134 andcc %g1,4,%g0
f41c8091 135 bne LOC(2)
ba848785
RM
136 nop
137! ** V1b **
f41c8091
UD
138 mov S2_PTR,%g1
139 mov S1_PTR,S2_PTR
140 b LOC(0)
141 mov %g1,S1_PTR
ba848785
RM
142
143! ** V2 **
f41c8091
UD
144/* If we come here, the alignment of S1_PTR and RES_PTR as well as the
145 alignment of S2_PTR and RES_PTR differ. Since there are only two ways
ba848785 146 things can be aligned (that we care about) we now know that the alignment
f41c8091 147 of S1_PTR and S2_PTR are the same. */
ba848785 148
f41c8091
UD
149LOC(2): cmp SIZE,1
150 be LOC(jone)
ba848785 151 nop
f41c8091
UD
152 andcc S1_PTR,4,%g0 ! S1_PTR unaligned? Side effect: cy=0
153 be LOC(v2) ! if no, branch
6b628d36 154 nop
f41c8091
UD
155/* Add least significant limb separately to align S1_PTR and S2_PTR */
156 ld [S1_PTR],%g4
157 add S1_PTR,4,S1_PTR
158 ld [S2_PTR],%g2
159 add S2_PTR,4,S2_PTR
160 add SIZE,-1,SIZE
ba848785 161 addcc %g4,%g2,%o4
f41c8091
UD
162 st %o4,[RES_PTR]
163 add RES_PTR,4,RES_PTR
ba848785 164
f41c8091
UD
165LOC(v2):
166 addx %g0,%g0,%o4 ! save cy in register
167 addcc SIZE,-8,SIZE
168 blt LOC(fin2)
ba848785
RM
169 subcc %g0,%o4,%g0 ! restore cy
170/* Add blocks of 8 limbs until less than 8 limbs remain */
f41c8091
UD
171LOC(loop2):
172 ldd [S1_PTR+0],%g2
173 ldd [S2_PTR+0],%o4
ba848785 174 addxcc %g2,%o4,%g2
f41c8091 175 st %g2,[RES_PTR+0]
ba848785 176 addxcc %g3,%o5,%g3
f41c8091
UD
177 st %g3,[RES_PTR+4]
178 ldd [S1_PTR+8],%g2
179 ldd [S2_PTR+8],%o4
ba848785 180 addxcc %g2,%o4,%g2
f41c8091 181 st %g2,[RES_PTR+8]
ba848785 182 addxcc %g3,%o5,%g3
f41c8091
UD
183 st %g3,[RES_PTR+12]
184 ldd [S1_PTR+16],%g2
185 ldd [S2_PTR+16],%o4
ba848785 186 addxcc %g2,%o4,%g2
f41c8091 187 st %g2,[RES_PTR+16]
ba848785 188 addxcc %g3,%o5,%g3
f41c8091
UD
189 st %g3,[RES_PTR+20]
190 ldd [S1_PTR+24],%g2
191 ldd [S2_PTR+24],%o4
ba848785 192 addxcc %g2,%o4,%g2
f41c8091 193 st %g2,[RES_PTR+24]
ba848785 194 addxcc %g3,%o5,%g3
f41c8091 195 st %g3,[RES_PTR+28]
ba848785 196 addx %g0,%g0,%o4 ! save cy in register
f41c8091
UD
197 addcc SIZE,-8,SIZE
198 add S1_PTR,32,S1_PTR
199 add S2_PTR,32,S2_PTR
200 add RES_PTR,32,RES_PTR
201 bge LOC(loop2)
ba848785
RM
202 subcc %g0,%o4,%g0 ! restore cy
203
f41c8091
UD
204LOC(fin2):
205 addcc SIZE,8-2,SIZE
206 blt LOC(end2)
ba848785 207 subcc %g0,%o4,%g0 ! restore cy
f41c8091
UD
208LOC(loope2):
209 ldd [S1_PTR+0],%g2
210 ldd [S2_PTR+0],%o4
ba848785 211 addxcc %g2,%o4,%g2
f41c8091 212 st %g2,[RES_PTR+0]
ba848785 213 addxcc %g3,%o5,%g3
f41c8091 214 st %g3,[RES_PTR+4]
ba848785 215 addx %g0,%g0,%o4 ! save cy in register
f41c8091
UD
216 addcc SIZE,-2,SIZE
217 add S1_PTR,8,S1_PTR
218 add S2_PTR,8,S2_PTR
219 add RES_PTR,8,RES_PTR
220 bge LOC(loope2)
ba848785 221 subcc %g0,%o4,%g0 ! restore cy
f41c8091
UD
222LOC(end2):
223 andcc SIZE,1,%g0
224 be LOC(ret2)
ba848785
RM
225 subcc %g0,%o4,%g0 ! restore cy
226/* Add last limb */
f41c8091
UD
227LOC(jone):
228 ld [S1_PTR],%g4
229 ld [S2_PTR],%g2
ba848785 230 addxcc %g4,%g2,%o4
f41c8091 231 st %o4,[RES_PTR]
ba848785 232
f41c8091
UD
233LOC(ret2):
234 retl
ba848785 235 addx %g0,%g0,%o0 ! return carry-out from most sign. limb
f41c8091
UD
236
237END(__mpn_add_n)