]>
Commit | Line | Data |
---|---|---|
28f540f4 RM |
1 | ! sparc __mpn_add_n -- Add two limb vectors of the same length > 0 and store |
2 | ! sum in a third limb vector. | |
3 | ||
ba848785 | 4 | ! Copyright (C) 1995 Free Software Foundation, Inc. |
28f540f4 RM |
5 | |
6 | ! This file is part of the GNU MP Library. | |
7 | ||
8 | ! The GNU MP Library is free software; you can redistribute it and/or modify | |
9 | ! it under the terms of the GNU Library General Public License as published by | |
10 | ! the Free Software Foundation; either version 2 of the License, or (at your | |
11 | ! option) any later version. | |
12 | ||
13 | ! The GNU MP Library is distributed in the hope that it will be useful, but | |
14 | ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
15 | ! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public | |
16 | ! License for more details. | |
17 | ||
18 | ! You should have received a copy of the GNU Library General Public License | |
19 | ! along with the GNU MP Library; see the file COPYING.LIB. If not, write to | |
20 | ! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
21 | ||
22 | ||
23 | ! INPUT PARAMETERS | |
ba848785 RM |
24 | #define res_ptr %o0 |
25 | #define s1_ptr %o1 | |
26 | #define s2_ptr %o2 | |
27 | #define size %o3 | |
28f540f4 RM |
28 | |
29 | #include "sysdep.h" | |
30 | ||
31 | .text | |
32 | .align 4 | |
33 | .global C_SYMBOL_NAME(__mpn_add_n) | |
34 | C_SYMBOL_NAME(__mpn_add_n): | |
ba848785 RM |
35 | cmp size,8 |
36 | mov 0,%o4 ! clear cy-save register | |
37 | blt,a Ltriv | |
38 | addcc size,-2,size | |
39 | xor s2_ptr,res_ptr,%g1 | |
40 | andcc %g1,4,%g0 | |
41 | bne L1 ! branch if alignment differs | |
42 | nop | |
43 | L0: andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0 | |
44 | beq L_v1 ! if no, branch | |
45 | nop | |
46 | ! ** V1a ** | |
47 | /* Add least significant limb separately to align res_ptr and s2_ptr */ | |
48 | ld [s1_ptr],%g4 | |
49 | add s1_ptr,4,s1_ptr | |
50 | ld [s2_ptr],%g2 | |
51 | add s2_ptr,4,s2_ptr | |
52 | add size,-1,size | |
53 | addcc %g4,%g2,%o4 | |
54 | st %o4,[res_ptr] | |
55 | add res_ptr,4,res_ptr | |
56 | ||
57 | L_v1: ld [s1_ptr+0],%g4 | |
58 | ld [s1_ptr+4],%g1 | |
59 | ldd [s2_ptr+0],%g2 | |
60 | addx %g0,%g0,%o4 ! save cy in register | |
61 | addcc size,-10,size | |
62 | blt Lfin1 | |
63 | subcc %g0,%o4,%g0 ! restore cy | |
64 | /* Add blocks of 8 limbs until less than 8 limbs remain */ | |
65 | Loop1: addxcc %g4,%g2,%o4 | |
66 | ld [s1_ptr+8],%g4 | |
67 | addxcc %g1,%g3,%o5 | |
68 | ld [s1_ptr+12],%g1 | |
69 | ldd [s2_ptr+8],%g2 | |
70 | std %o4,[res_ptr+0] | |
71 | addxcc %g4,%g2,%o4 | |
72 | ld [s1_ptr+16],%g4 | |
73 | addxcc %g1,%g3,%o5 | |
74 | ld [s1_ptr+20],%g1 | |
75 | ldd [s2_ptr+16],%g2 | |
76 | std %o4,[res_ptr+8] | |
77 | addxcc %g4,%g2,%o4 | |
78 | ld [s1_ptr+24],%g4 | |
79 | addxcc %g1,%g3,%o5 | |
80 | ld [s1_ptr+28],%g1 | |
81 | ldd [s2_ptr+24],%g2 | |
82 | std %o4,[res_ptr+16] | |
83 | addxcc %g4,%g2,%o4 | |
84 | ld [s1_ptr+32],%g4 | |
85 | addxcc %g1,%g3,%o5 | |
86 | ld [s1_ptr+36],%g1 | |
87 | ldd [s2_ptr+32],%g2 | |
88 | std %o4,[res_ptr+24] | |
89 | addx %g0,%g0,%o4 ! save cy in register | |
90 | addcc size,-8,size | |
91 | add s1_ptr,32,s1_ptr | |
92 | add s2_ptr,32,s2_ptr | |
93 | add res_ptr,32,res_ptr | |
94 | bge Loop1 | |
95 | subcc %g0,%o4,%g0 ! restore cy | |
96 | ||
97 | Lfin1: addcc size,8-2,size | |
98 | blt Lend1 | |
99 | subcc %g0,%o4,%g0 ! restore cy | |
100 | /* Add blocks of 2 limbs until less than 2 limbs remain */ | |
101 | Loop1b: addxcc %g4,%g2,%o4 | |
102 | ld [s1_ptr+8],%g4 | |
103 | addxcc %g1,%g3,%o5 | |
104 | ld [s1_ptr+12],%g1 | |
105 | ldd [s2_ptr+8],%g2 | |
106 | std %o4,[res_ptr+0] | |
107 | addx %g0,%g0,%o4 ! save cy in register | |
108 | addcc size,-2,size | |
109 | add s1_ptr,8,s1_ptr | |
110 | add s2_ptr,8,s2_ptr | |
111 | add res_ptr,8,res_ptr | |
112 | bge Loop1b | |
113 | subcc %g0,%o4,%g0 ! restore cy | |
114 | Lend1: addxcc %g4,%g2,%o4 | |
115 | addxcc %g1,%g3,%o5 | |
116 | std %o4,[res_ptr+0] | |
117 | addx %g0,%g0,%o4 ! save cy in register | |
118 | ||
119 | andcc size,1,%g0 | |
120 | be Lret1 | |
121 | subcc %g0,%o4,%g0 ! restore cy | |
122 | /* Add last limb */ | |
123 | ld [s1_ptr+8],%g4 | |
124 | ld [s2_ptr+8],%g2 | |
125 | addxcc %g4,%g2,%o4 | |
126 | st %o4,[res_ptr+8] | |
127 | ||
128 | Lret1: retl | |
129 | addx %g0,%g0,%o0 ! return carry-out from most sign. limb | |
130 | ||
131 | L1: xor s1_ptr,res_ptr,%g1 | |
132 | andcc %g1,4,%g0 | |
133 | bne L2 | |
134 | nop | |
135 | ! ** V1b ** | |
136 | mov s2_ptr,%g1 | |
137 | mov s1_ptr,s2_ptr | |
138 | b L0 | |
139 | mov %g1,s1_ptr | |
140 | ||
141 | ! ** V2 ** | |
142 | /* If we come here, the alignment of s1_ptr and res_ptr as well as the | |
143 | alignment of s2_ptr and res_ptr differ. Since there are only two ways | |
144 | things can be aligned (that we care about) we now know that the alignment | |
145 | of s1_ptr and s2_ptr are the same. */ | |
146 | ||
147 | L2: andcc s1_ptr,4,%g0 ! s1_ptr unaligned? Side effect: cy=0 | |
148 | beq L_v2 ! if no, branch | |
149 | nop | |
150 | /* Add least significant limb separately to align res_ptr and s2_ptr */ | |
151 | ld [s1_ptr],%g4 | |
152 | add s1_ptr,4,s1_ptr | |
153 | ld [s2_ptr],%g2 | |
154 | add s2_ptr,4,s2_ptr | |
155 | add size,-1,size | |
156 | addcc %g4,%g2,%o4 | |
157 | st %o4,[res_ptr] | |
158 | add res_ptr,4,res_ptr | |
159 | ||
160 | L_v2: addx %g0,%g0,%o4 ! save cy in register | |
161 | addcc size,-8,size | |
162 | blt Lfin2 | |
163 | subcc %g0,%o4,%g0 ! restore cy | |
164 | /* Add blocks of 8 limbs until less than 8 limbs remain */ | |
165 | Loop2: ldd [s1_ptr+0],%g2 | |
166 | ldd [s2_ptr+0],%o4 | |
167 | addxcc %g2,%o4,%g2 | |
168 | st %g2,[res_ptr+0] | |
169 | addxcc %g3,%o5,%g3 | |
170 | st %g3,[res_ptr+4] | |
171 | ldd [s1_ptr+8],%g2 | |
172 | ldd [s2_ptr+8],%o4 | |
173 | addxcc %g2,%o4,%g2 | |
174 | st %g2,[res_ptr+8] | |
175 | addxcc %g3,%o5,%g3 | |
176 | st %g3,[res_ptr+12] | |
177 | ldd [s1_ptr+16],%g2 | |
178 | ldd [s2_ptr+16],%o4 | |
179 | addxcc %g2,%o4,%g2 | |
180 | st %g2,[res_ptr+16] | |
181 | addxcc %g3,%o5,%g3 | |
182 | st %g3,[res_ptr+20] | |
183 | ldd [s1_ptr+24],%g2 | |
184 | ldd [s2_ptr+24],%o4 | |
185 | addxcc %g2,%o4,%g2 | |
186 | st %g2,[res_ptr+24] | |
187 | addxcc %g3,%o5,%g3 | |
188 | st %g3,[res_ptr+28] | |
189 | addx %g0,%g0,%o4 ! save cy in register | |
190 | addcc size,-8,size | |
191 | add s1_ptr,32,s1_ptr | |
192 | add s2_ptr,32,s2_ptr | |
193 | add res_ptr,32,res_ptr | |
194 | bge Loop2 | |
195 | subcc %g0,%o4,%g0 ! restore cy | |
196 | ||
197 | Lfin2: addcc size,8-2,size | |
198 | Ltriv: blt Lend2 | |
199 | subcc %g0,%o4,%g0 ! restore cy | |
200 | Loop2b: ldd [s1_ptr+0],%g2 | |
201 | ldd [s2_ptr+0],%o4 | |
202 | addxcc %g2,%o4,%g2 | |
203 | st %g2,[res_ptr+0] | |
204 | addxcc %g3,%o5,%g3 | |
205 | st %g3,[res_ptr+4] | |
206 | addx %g0,%g0,%o4 ! save cy in register | |
207 | addcc size,-2,size | |
208 | add s1_ptr,8,s1_ptr | |
209 | add s2_ptr,8,s2_ptr | |
210 | add res_ptr,8,res_ptr | |
211 | bge Loop2b | |
212 | subcc %g0,%o4,%g0 ! restore cy | |
213 | Lend2: andcc size,1,%g0 | |
214 | be Lret2 | |
215 | subcc %g0,%o4,%g0 ! restore cy | |
216 | /* Add last limb */ | |
217 | ld [s1_ptr],%g4 | |
218 | ld [s2_ptr],%g2 | |
219 | addxcc %g4,%g2,%o4 | |
220 | st %o4,[res_ptr] | |
221 | ||
222 | Lret2: retl | |
223 | addx %g0,%g0,%o0 ! return carry-out from most sign. limb |