]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/sparc/sparc32/add_n.S
Prefer https to http for gnu.org and fsf.org URLs
[thirdparty/glibc.git] / sysdeps / sparc / sparc32 / add_n.S
1 ! SPARC __mpn_add_n -- Add two limb vectors of the same length > 0 and store
2 ! sum in a third limb vector.
3 !
4 ! Copyright (C) 1995-2019 Free Software Foundation, Inc.
5 !
6 ! This file is part of the GNU MP Library.
7 !
8 ! The GNU MP Library is free software; you can redistribute it and/or modify
9 ! it under the terms of the GNU Lesser General Public License as published by
10 ! the Free Software Foundation; either version 2.1 of the License, or (at your
11 ! option) any later version.
12 !
13 ! The GNU MP Library is distributed in the hope that it will be useful, but
14 ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 ! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 ! License for more details.
17 !
18 ! You should have received a copy of the GNU Lesser General Public License
19 ! along with the GNU MP Library; see the file COPYING.LIB. If not,
20 ! see <https://www.gnu.org/licenses/>.
21
22
23 ! INPUT PARAMETERS
24 #define RES_PTR %o0
25 #define S1_PTR %o1
26 #define S2_PTR %o2
27 #define SIZE %o3
28
29 #include <sysdep.h>
30
31 ENTRY(__mpn_add_n)
32 xor S2_PTR,RES_PTR,%g1
33 andcc %g1,4,%g0
34 bne LOC(1) ! branch if alignment differs
35 nop
36 ! ** V1a **
37 LOC(0): andcc RES_PTR,4,%g0 ! RES_PTR unaligned? Side effect: cy=0
38 be LOC(v1) ! if no, branch
39 nop
40 /* Add least significant limb separately to align RES_PTR and S2_PTR */
41 ld [S1_PTR],%g4
42 add S1_PTR,4,S1_PTR
43 ld [S2_PTR],%g2
44 add S2_PTR,4,S2_PTR
45 add SIZE,-1,SIZE
46 addcc %g4,%g2,%o4
47 st %o4,[RES_PTR]
48 add RES_PTR,4,RES_PTR
49 LOC(v1):
50 addx %g0,%g0,%o4 ! save cy in register
51 cmp SIZE,2 ! if SIZE < 2 ...
52 bl LOC(end2) ! ... branch to tail code
53 subcc %g0,%o4,%g0 ! restore cy
54
55 ld [S1_PTR+0],%g4
56 addcc SIZE,-10,SIZE
57 ld [S1_PTR+4],%g1
58 ldd [S2_PTR+0],%g2
59 blt LOC(fin1)
60 subcc %g0,%o4,%g0 ! restore cy
61 /* Add blocks of 8 limbs until less than 8 limbs remain */
62 LOC(loop1):
63 addxcc %g4,%g2,%o4
64 ld [S1_PTR+8],%g4
65 addxcc %g1,%g3,%o5
66 ld [S1_PTR+12],%g1
67 ldd [S2_PTR+8],%g2
68 std %o4,[RES_PTR+0]
69 addxcc %g4,%g2,%o4
70 ld [S1_PTR+16],%g4
71 addxcc %g1,%g3,%o5
72 ld [S1_PTR+20],%g1
73 ldd [S2_PTR+16],%g2
74 std %o4,[RES_PTR+8]
75 addxcc %g4,%g2,%o4
76 ld [S1_PTR+24],%g4
77 addxcc %g1,%g3,%o5
78 ld [S1_PTR+28],%g1
79 ldd [S2_PTR+24],%g2
80 std %o4,[RES_PTR+16]
81 addxcc %g4,%g2,%o4
82 ld [S1_PTR+32],%g4
83 addxcc %g1,%g3,%o5
84 ld [S1_PTR+36],%g1
85 ldd [S2_PTR+32],%g2
86 std %o4,[RES_PTR+24]
87 addx %g0,%g0,%o4 ! save cy in register
88 addcc SIZE,-8,SIZE
89 add S1_PTR,32,S1_PTR
90 add S2_PTR,32,S2_PTR
91 add RES_PTR,32,RES_PTR
92 bge LOC(loop1)
93 subcc %g0,%o4,%g0 ! restore cy
94
95 LOC(fin1):
96 addcc SIZE,8-2,SIZE
97 blt LOC(end1)
98 subcc %g0,%o4,%g0 ! restore cy
99 /* Add blocks of 2 limbs until less than 2 limbs remain */
100 LOC(loope1):
101 addxcc %g4,%g2,%o4
102 ld [S1_PTR+8],%g4
103 addxcc %g1,%g3,%o5
104 ld [S1_PTR+12],%g1
105 ldd [S2_PTR+8],%g2
106 std %o4,[RES_PTR+0]
107 addx %g0,%g0,%o4 ! save cy in register
108 addcc SIZE,-2,SIZE
109 add S1_PTR,8,S1_PTR
110 add S2_PTR,8,S2_PTR
111 add RES_PTR,8,RES_PTR
112 bge LOC(loope1)
113 subcc %g0,%o4,%g0 ! restore cy
114 LOC(end1):
115 addxcc %g4,%g2,%o4
116 addxcc %g1,%g3,%o5
117 std %o4,[RES_PTR+0]
118 addx %g0,%g0,%o4 ! save cy in register
119
120 andcc SIZE,1,%g0
121 be LOC(ret1)
122 subcc %g0,%o4,%g0 ! restore cy
123 /* Add last limb */
124 ld [S1_PTR+8],%g4
125 ld [S2_PTR+8],%g2
126 addxcc %g4,%g2,%o4
127 st %o4,[RES_PTR+8]
128
129 LOC(ret1):
130 retl
131 addx %g0,%g0,%o0 ! return carry-out from most sign. limb
132
133 LOC(1): xor S1_PTR,RES_PTR,%g1
134 andcc %g1,4,%g0
135 bne LOC(2)
136 nop
137 ! ** V1b **
138 mov S2_PTR,%g1
139 mov S1_PTR,S2_PTR
140 b LOC(0)
141 mov %g1,S1_PTR
142
143 ! ** V2 **
144 /* If we come here, the alignment of S1_PTR and RES_PTR as well as the
145 alignment of S2_PTR and RES_PTR differ. Since there are only two ways
146 things can be aligned (that we care about) we now know that the alignment
147 of S1_PTR and S2_PTR are the same. */
148
149 LOC(2): cmp SIZE,1
150 be LOC(jone)
151 nop
152 andcc S1_PTR,4,%g0 ! S1_PTR unaligned? Side effect: cy=0
153 be LOC(v2) ! if no, branch
154 nop
155 /* Add least significant limb separately to align S1_PTR and S2_PTR */
156 ld [S1_PTR],%g4
157 add S1_PTR,4,S1_PTR
158 ld [S2_PTR],%g2
159 add S2_PTR,4,S2_PTR
160 add SIZE,-1,SIZE
161 addcc %g4,%g2,%o4
162 st %o4,[RES_PTR]
163 add RES_PTR,4,RES_PTR
164
165 LOC(v2):
166 addx %g0,%g0,%o4 ! save cy in register
167 addcc SIZE,-8,SIZE
168 blt LOC(fin2)
169 subcc %g0,%o4,%g0 ! restore cy
170 /* Add blocks of 8 limbs until less than 8 limbs remain */
171 LOC(loop2):
172 ldd [S1_PTR+0],%g2
173 ldd [S2_PTR+0],%o4
174 addxcc %g2,%o4,%g2
175 st %g2,[RES_PTR+0]
176 addxcc %g3,%o5,%g3
177 st %g3,[RES_PTR+4]
178 ldd [S1_PTR+8],%g2
179 ldd [S2_PTR+8],%o4
180 addxcc %g2,%o4,%g2
181 st %g2,[RES_PTR+8]
182 addxcc %g3,%o5,%g3
183 st %g3,[RES_PTR+12]
184 ldd [S1_PTR+16],%g2
185 ldd [S2_PTR+16],%o4
186 addxcc %g2,%o4,%g2
187 st %g2,[RES_PTR+16]
188 addxcc %g3,%o5,%g3
189 st %g3,[RES_PTR+20]
190 ldd [S1_PTR+24],%g2
191 ldd [S2_PTR+24],%o4
192 addxcc %g2,%o4,%g2
193 st %g2,[RES_PTR+24]
194 addxcc %g3,%o5,%g3
195 st %g3,[RES_PTR+28]
196 addx %g0,%g0,%o4 ! save cy in register
197 addcc SIZE,-8,SIZE
198 add S1_PTR,32,S1_PTR
199 add S2_PTR,32,S2_PTR
200 add RES_PTR,32,RES_PTR
201 bge LOC(loop2)
202 subcc %g0,%o4,%g0 ! restore cy
203
204 LOC(fin2):
205 addcc SIZE,8-2,SIZE
206 blt LOC(end2)
207 subcc %g0,%o4,%g0 ! restore cy
208 LOC(loope2):
209 ldd [S1_PTR+0],%g2
210 ldd [S2_PTR+0],%o4
211 addxcc %g2,%o4,%g2
212 st %g2,[RES_PTR+0]
213 addxcc %g3,%o5,%g3
214 st %g3,[RES_PTR+4]
215 addx %g0,%g0,%o4 ! save cy in register
216 addcc SIZE,-2,SIZE
217 add S1_PTR,8,S1_PTR
218 add S2_PTR,8,S2_PTR
219 add RES_PTR,8,RES_PTR
220 bge LOC(loope2)
221 subcc %g0,%o4,%g0 ! restore cy
222 LOC(end2):
223 andcc SIZE,1,%g0
224 be LOC(ret2)
225 subcc %g0,%o4,%g0 ! restore cy
226 /* Add last limb */
227 LOC(jone):
228 ld [S1_PTR],%g4
229 ld [S2_PTR],%g2
230 addxcc %g4,%g2,%o4
231 st %o4,[RES_PTR]
232
233 LOC(ret2):
234 retl
235 addx %g0,%g0,%o0 ! return carry-out from most sign. limb
236
237 END(__mpn_add_n)