]>
Commit | Line | Data |
---|---|---|
6b628d36 | 1 | ! SPARC __mpn_add_n -- Add two limb vectors of the same length > 0 and store |
28f540f4 | 2 | ! sum in a third limb vector. |
f41c8091 | 3 | ! |
04277e02 | 4 | ! Copyright (C) 1995-2019 Free Software Foundation, Inc. |
f41c8091 | 5 | ! |
28f540f4 | 6 | ! This file is part of the GNU MP Library. |
f41c8091 | 7 | ! |
28f540f4 | 8 | ! The GNU MP Library is free software; you can redistribute it and/or modify |
6d84f89a AJ |
9 | ! it under the terms of the GNU Lesser General Public License as published by |
10 | ! the Free Software Foundation; either version 2.1 of the License, or (at your | |
28f540f4 | 11 | ! option) any later version. |
f41c8091 | 12 | ! |
28f540f4 RM |
13 | ! The GNU MP Library is distributed in the hope that it will be useful, but |
14 | ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
6d84f89a | 15 | ! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
28f540f4 | 16 | ! License for more details. |
f41c8091 | 17 | ! |
6d84f89a | 18 | ! You should have received a copy of the GNU Lesser General Public License |
59ba27a6 | 19 | ! along with the GNU MP Library; see the file COPYING.LIB. If not, |
5a82c748 | 20 | ! see <https://www.gnu.org/licenses/>. |
28f540f4 RM |
21 | |
22 | ||
23 | ! INPUT PARAMETERS | |
f41c8091 UD |
24 | #define RES_PTR %o0 |
25 | #define S1_PTR %o1 | |
26 | #define S2_PTR %o2 | |
27 | #define SIZE %o3 | |
28f540f4 | 28 | |
f41c8091 | 29 | #include <sysdep.h> |
28f540f4 | 30 | |
f41c8091 UD |
31 | ENTRY(__mpn_add_n) |
32 | xor S2_PTR,RES_PTR,%g1 | |
ba848785 | 33 | andcc %g1,4,%g0 |
f41c8091 UD |
34 | bne LOC(1) ! branch if alignment differs |
35 | nop | |
6b628d36 | 36 | ! ** V1a ** |
f41c8091 UD |
37 | LOC(0): andcc RES_PTR,4,%g0 ! RES_PTR unaligned? Side effect: cy=0 |
38 | be LOC(v1) ! if no, branch | |
39 | nop | |
40 | /* Add least significant limb separately to align RES_PTR and S2_PTR */ | |
41 | ld [S1_PTR],%g4 | |
42 | add S1_PTR,4,S1_PTR | |
43 | ld [S2_PTR],%g2 | |
44 | add S2_PTR,4,S2_PTR | |
45 | add SIZE,-1,SIZE | |
ba848785 | 46 | addcc %g4,%g2,%o4 |
f41c8091 UD |
47 | st %o4,[RES_PTR] |
48 | add RES_PTR,4,RES_PTR | |
49 | LOC(v1): | |
50 | addx %g0,%g0,%o4 ! save cy in register | |
51 | cmp SIZE,2 ! if SIZE < 2 ... | |
52 | bl LOC(end2) ! ... branch to tail code | |
6b628d36 | 53 | subcc %g0,%o4,%g0 ! restore cy |
ba848785 | 54 | |
f41c8091 UD |
55 | ld [S1_PTR+0],%g4 |
56 | addcc SIZE,-10,SIZE | |
57 | ld [S1_PTR+4],%g1 | |
58 | ldd [S2_PTR+0],%g2 | |
59 | blt LOC(fin1) | |
ba848785 RM |
60 | subcc %g0,%o4,%g0 ! restore cy |
61 | /* Add blocks of 8 limbs until less than 8 limbs remain */ | |
f41c8091 UD |
62 | LOC(loop1): |
63 | addxcc %g4,%g2,%o4 | |
64 | ld [S1_PTR+8],%g4 | |
ba848785 | 65 | addxcc %g1,%g3,%o5 |
f41c8091 UD |
66 | ld [S1_PTR+12],%g1 |
67 | ldd [S2_PTR+8],%g2 | |
68 | std %o4,[RES_PTR+0] | |
ba848785 | 69 | addxcc %g4,%g2,%o4 |
f41c8091 | 70 | ld [S1_PTR+16],%g4 |
ba848785 | 71 | addxcc %g1,%g3,%o5 |
f41c8091 UD |
72 | ld [S1_PTR+20],%g1 |
73 | ldd [S2_PTR+16],%g2 | |
74 | std %o4,[RES_PTR+8] | |
ba848785 | 75 | addxcc %g4,%g2,%o4 |
f41c8091 | 76 | ld [S1_PTR+24],%g4 |
ba848785 | 77 | addxcc %g1,%g3,%o5 |
f41c8091 UD |
78 | ld [S1_PTR+28],%g1 |
79 | ldd [S2_PTR+24],%g2 | |
80 | std %o4,[RES_PTR+16] | |
ba848785 | 81 | addxcc %g4,%g2,%o4 |
f41c8091 | 82 | ld [S1_PTR+32],%g4 |
ba848785 | 83 | addxcc %g1,%g3,%o5 |
f41c8091 UD |
84 | ld [S1_PTR+36],%g1 |
85 | ldd [S2_PTR+32],%g2 | |
86 | std %o4,[RES_PTR+24] | |
ba848785 | 87 | addx %g0,%g0,%o4 ! save cy in register |
f41c8091 UD |
88 | addcc SIZE,-8,SIZE |
89 | add S1_PTR,32,S1_PTR | |
90 | add S2_PTR,32,S2_PTR | |
91 | add RES_PTR,32,RES_PTR | |
92 | bge LOC(loop1) | |
ba848785 RM |
93 | subcc %g0,%o4,%g0 ! restore cy |
94 | ||
f41c8091 UD |
95 | LOC(fin1): |
96 | addcc SIZE,8-2,SIZE | |
97 | blt LOC(end1) | |
ba848785 RM |
98 | subcc %g0,%o4,%g0 ! restore cy |
99 | /* Add blocks of 2 limbs until less than 2 limbs remain */ | |
f41c8091 UD |
100 | LOC(loope1): |
101 | addxcc %g4,%g2,%o4 | |
102 | ld [S1_PTR+8],%g4 | |
ba848785 | 103 | addxcc %g1,%g3,%o5 |
f41c8091 UD |
104 | ld [S1_PTR+12],%g1 |
105 | ldd [S2_PTR+8],%g2 | |
106 | std %o4,[RES_PTR+0] | |
ba848785 | 107 | addx %g0,%g0,%o4 ! save cy in register |
f41c8091 UD |
108 | addcc SIZE,-2,SIZE |
109 | add S1_PTR,8,S1_PTR | |
110 | add S2_PTR,8,S2_PTR | |
111 | add RES_PTR,8,RES_PTR | |
112 | bge LOC(loope1) | |
ba848785 | 113 | subcc %g0,%o4,%g0 ! restore cy |
f41c8091 UD |
114 | LOC(end1): |
115 | addxcc %g4,%g2,%o4 | |
ba848785 | 116 | addxcc %g1,%g3,%o5 |
f41c8091 | 117 | std %o4,[RES_PTR+0] |
ba848785 RM |
118 | addx %g0,%g0,%o4 ! save cy in register |
119 | ||
f41c8091 UD |
120 | andcc SIZE,1,%g0 |
121 | be LOC(ret1) | |
ba848785 RM |
122 | subcc %g0,%o4,%g0 ! restore cy |
123 | /* Add last limb */ | |
f41c8091 UD |
124 | ld [S1_PTR+8],%g4 |
125 | ld [S2_PTR+8],%g2 | |
ba848785 | 126 | addxcc %g4,%g2,%o4 |
f41c8091 | 127 | st %o4,[RES_PTR+8] |
ba848785 | 128 | |
f41c8091 UD |
129 | LOC(ret1): |
130 | retl | |
ba848785 RM |
131 | addx %g0,%g0,%o0 ! return carry-out from most sign. limb |
132 | ||
f41c8091 | 133 | LOC(1): xor S1_PTR,RES_PTR,%g1 |
ba848785 | 134 | andcc %g1,4,%g0 |
f41c8091 | 135 | bne LOC(2) |
ba848785 RM |
136 | nop |
137 | ! ** V1b ** | |
f41c8091 UD |
138 | mov S2_PTR,%g1 |
139 | mov S1_PTR,S2_PTR | |
140 | b LOC(0) | |
141 | mov %g1,S1_PTR | |
ba848785 RM |
142 | |
143 | ! ** V2 ** | |
f41c8091 UD |
144 | /* If we come here, the alignment of S1_PTR and RES_PTR as well as the |
145 | alignment of S2_PTR and RES_PTR differ. Since there are only two ways | |
ba848785 | 146 | things can be aligned (that we care about) we now know that the alignment |
f41c8091 | 147 | of S1_PTR and S2_PTR are the same. */ |
ba848785 | 148 | |
f41c8091 UD |
149 | LOC(2): cmp SIZE,1 |
150 | be LOC(jone) | |
ba848785 | 151 | nop |
f41c8091 UD |
152 | andcc S1_PTR,4,%g0 ! S1_PTR unaligned? Side effect: cy=0 |
153 | be LOC(v2) ! if no, branch | |
6b628d36 | 154 | nop |
f41c8091 UD |
155 | /* Add least significant limb separately to align S1_PTR and S2_PTR */ |
156 | ld [S1_PTR],%g4 | |
157 | add S1_PTR,4,S1_PTR | |
158 | ld [S2_PTR],%g2 | |
159 | add S2_PTR,4,S2_PTR | |
160 | add SIZE,-1,SIZE | |
ba848785 | 161 | addcc %g4,%g2,%o4 |
f41c8091 UD |
162 | st %o4,[RES_PTR] |
163 | add RES_PTR,4,RES_PTR | |
ba848785 | 164 | |
f41c8091 UD |
165 | LOC(v2): |
166 | addx %g0,%g0,%o4 ! save cy in register | |
167 | addcc SIZE,-8,SIZE | |
168 | blt LOC(fin2) | |
ba848785 RM |
169 | subcc %g0,%o4,%g0 ! restore cy |
170 | /* Add blocks of 8 limbs until less than 8 limbs remain */ | |
f41c8091 UD |
171 | LOC(loop2): |
172 | ldd [S1_PTR+0],%g2 | |
173 | ldd [S2_PTR+0],%o4 | |
ba848785 | 174 | addxcc %g2,%o4,%g2 |
f41c8091 | 175 | st %g2,[RES_PTR+0] |
ba848785 | 176 | addxcc %g3,%o5,%g3 |
f41c8091 UD |
177 | st %g3,[RES_PTR+4] |
178 | ldd [S1_PTR+8],%g2 | |
179 | ldd [S2_PTR+8],%o4 | |
ba848785 | 180 | addxcc %g2,%o4,%g2 |
f41c8091 | 181 | st %g2,[RES_PTR+8] |
ba848785 | 182 | addxcc %g3,%o5,%g3 |
f41c8091 UD |
183 | st %g3,[RES_PTR+12] |
184 | ldd [S1_PTR+16],%g2 | |
185 | ldd [S2_PTR+16],%o4 | |
ba848785 | 186 | addxcc %g2,%o4,%g2 |
f41c8091 | 187 | st %g2,[RES_PTR+16] |
ba848785 | 188 | addxcc %g3,%o5,%g3 |
f41c8091 UD |
189 | st %g3,[RES_PTR+20] |
190 | ldd [S1_PTR+24],%g2 | |
191 | ldd [S2_PTR+24],%o4 | |
ba848785 | 192 | addxcc %g2,%o4,%g2 |
f41c8091 | 193 | st %g2,[RES_PTR+24] |
ba848785 | 194 | addxcc %g3,%o5,%g3 |
f41c8091 | 195 | st %g3,[RES_PTR+28] |
ba848785 | 196 | addx %g0,%g0,%o4 ! save cy in register |
f41c8091 UD |
197 | addcc SIZE,-8,SIZE |
198 | add S1_PTR,32,S1_PTR | |
199 | add S2_PTR,32,S2_PTR | |
200 | add RES_PTR,32,RES_PTR | |
201 | bge LOC(loop2) | |
ba848785 RM |
202 | subcc %g0,%o4,%g0 ! restore cy |
203 | ||
f41c8091 UD |
204 | LOC(fin2): |
205 | addcc SIZE,8-2,SIZE | |
206 | blt LOC(end2) | |
ba848785 | 207 | subcc %g0,%o4,%g0 ! restore cy |
f41c8091 UD |
208 | LOC(loope2): |
209 | ldd [S1_PTR+0],%g2 | |
210 | ldd [S2_PTR+0],%o4 | |
ba848785 | 211 | addxcc %g2,%o4,%g2 |
f41c8091 | 212 | st %g2,[RES_PTR+0] |
ba848785 | 213 | addxcc %g3,%o5,%g3 |
f41c8091 | 214 | st %g3,[RES_PTR+4] |
ba848785 | 215 | addx %g0,%g0,%o4 ! save cy in register |
f41c8091 UD |
216 | addcc SIZE,-2,SIZE |
217 | add S1_PTR,8,S1_PTR | |
218 | add S2_PTR,8,S2_PTR | |
219 | add RES_PTR,8,RES_PTR | |
220 | bge LOC(loope2) | |
ba848785 | 221 | subcc %g0,%o4,%g0 ! restore cy |
f41c8091 UD |
222 | LOC(end2): |
223 | andcc SIZE,1,%g0 | |
224 | be LOC(ret2) | |
ba848785 RM |
225 | subcc %g0,%o4,%g0 ! restore cy |
226 | /* Add last limb */ | |
f41c8091 UD |
227 | LOC(jone): |
228 | ld [S1_PTR],%g4 | |
229 | ld [S2_PTR],%g2 | |
ba848785 | 230 | addxcc %g4,%g2,%o4 |
f41c8091 | 231 | st %o4,[RES_PTR] |
ba848785 | 232 | |
f41c8091 UD |
233 | LOC(ret2): |
234 | retl | |
ba848785 | 235 | addx %g0,%g0,%o0 ! return carry-out from most sign. limb |
f41c8091 UD |
236 | |
237 | END(__mpn_add_n) |