]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/sparc/add_n.S
Regenerated
[thirdparty/glibc.git] / sysdeps / sparc / add_n.S
CommitLineData
28f540f4
RM
1! sparc __mpn_add_n -- Add two limb vectors of the same length > 0 and store
2! sum in a third limb vector.
3
ba848785 4! Copyright (C) 1995 Free Software Foundation, Inc.
28f540f4
RM
5
6! This file is part of the GNU MP Library.
7
8! The GNU MP Library is free software; you can redistribute it and/or modify
9! it under the terms of the GNU Library General Public License as published by
10! the Free Software Foundation; either version 2 of the License, or (at your
11! option) any later version.
12
13! The GNU MP Library is distributed in the hope that it will be useful, but
14! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
16! License for more details.
17
18! You should have received a copy of the GNU Library General Public License
19! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21
22
23! INPUT PARAMETERS
ba848785
RM
24#define res_ptr %o0
25#define s1_ptr %o1
26#define s2_ptr %o2
27#define size %o3
28f540f4
RM
28
29#include "sysdep.h"
30
31 .text
32 .align 4
33 .global C_SYMBOL_NAME(__mpn_add_n)
34C_SYMBOL_NAME(__mpn_add_n):
ba848785
RM
35 cmp size,8
36 mov 0,%o4 ! clear cy-save register
37 blt,a Ltriv
38 addcc size,-2,size
39 xor s2_ptr,res_ptr,%g1
40 andcc %g1,4,%g0
41 bne L1 ! branch if alignment differs
42 nop
43L0: andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0
44 beq L_v1 ! if no, branch
45 nop
46! ** V1a **
47/* Add least significant limb separately to align res_ptr and s2_ptr */
48 ld [s1_ptr],%g4
49 add s1_ptr,4,s1_ptr
50 ld [s2_ptr],%g2
51 add s2_ptr,4,s2_ptr
52 add size,-1,size
53 addcc %g4,%g2,%o4
54 st %o4,[res_ptr]
55 add res_ptr,4,res_ptr
56
57L_v1: ld [s1_ptr+0],%g4
58 ld [s1_ptr+4],%g1
59 ldd [s2_ptr+0],%g2
60 addx %g0,%g0,%o4 ! save cy in register
61 addcc size,-10,size
62 blt Lfin1
63 subcc %g0,%o4,%g0 ! restore cy
64/* Add blocks of 8 limbs until less than 8 limbs remain */
65Loop1: addxcc %g4,%g2,%o4
66 ld [s1_ptr+8],%g4
67 addxcc %g1,%g3,%o5
68 ld [s1_ptr+12],%g1
69 ldd [s2_ptr+8],%g2
70 std %o4,[res_ptr+0]
71 addxcc %g4,%g2,%o4
72 ld [s1_ptr+16],%g4
73 addxcc %g1,%g3,%o5
74 ld [s1_ptr+20],%g1
75 ldd [s2_ptr+16],%g2
76 std %o4,[res_ptr+8]
77 addxcc %g4,%g2,%o4
78 ld [s1_ptr+24],%g4
79 addxcc %g1,%g3,%o5
80 ld [s1_ptr+28],%g1
81 ldd [s2_ptr+24],%g2
82 std %o4,[res_ptr+16]
83 addxcc %g4,%g2,%o4
84 ld [s1_ptr+32],%g4
85 addxcc %g1,%g3,%o5
86 ld [s1_ptr+36],%g1
87 ldd [s2_ptr+32],%g2
88 std %o4,[res_ptr+24]
89 addx %g0,%g0,%o4 ! save cy in register
90 addcc size,-8,size
91 add s1_ptr,32,s1_ptr
92 add s2_ptr,32,s2_ptr
93 add res_ptr,32,res_ptr
94 bge Loop1
95 subcc %g0,%o4,%g0 ! restore cy
96
97Lfin1: addcc size,8-2,size
98 blt Lend1
99 subcc %g0,%o4,%g0 ! restore cy
100/* Add blocks of 2 limbs until less than 2 limbs remain */
101Loop1b: addxcc %g4,%g2,%o4
102 ld [s1_ptr+8],%g4
103 addxcc %g1,%g3,%o5
104 ld [s1_ptr+12],%g1
105 ldd [s2_ptr+8],%g2
106 std %o4,[res_ptr+0]
107 addx %g0,%g0,%o4 ! save cy in register
108 addcc size,-2,size
109 add s1_ptr,8,s1_ptr
110 add s2_ptr,8,s2_ptr
111 add res_ptr,8,res_ptr
112 bge Loop1b
113 subcc %g0,%o4,%g0 ! restore cy
114Lend1: addxcc %g4,%g2,%o4
115 addxcc %g1,%g3,%o5
116 std %o4,[res_ptr+0]
117 addx %g0,%g0,%o4 ! save cy in register
118
119 andcc size,1,%g0
120 be Lret1
121 subcc %g0,%o4,%g0 ! restore cy
122/* Add last limb */
123 ld [s1_ptr+8],%g4
124 ld [s2_ptr+8],%g2
125 addxcc %g4,%g2,%o4
126 st %o4,[res_ptr+8]
127
128Lret1: retl
129 addx %g0,%g0,%o0 ! return carry-out from most sign. limb
130
131L1: xor s1_ptr,res_ptr,%g1
132 andcc %g1,4,%g0
133 bne L2
134 nop
135! ** V1b **
136 mov s2_ptr,%g1
137 mov s1_ptr,s2_ptr
138 b L0
139 mov %g1,s1_ptr
140
141! ** V2 **
142/* If we come here, the alignment of s1_ptr and res_ptr as well as the
143 alignment of s2_ptr and res_ptr differ. Since there are only two ways
144 things can be aligned (that we care about) we now know that the alignment
145 of s1_ptr and s2_ptr are the same. */
146
147L2: andcc s1_ptr,4,%g0 ! s1_ptr unaligned? Side effect: cy=0
148 beq L_v2 ! if no, branch
149 nop
150/* Add least significant limb separately to align res_ptr and s2_ptr */
151 ld [s1_ptr],%g4
152 add s1_ptr,4,s1_ptr
153 ld [s2_ptr],%g2
154 add s2_ptr,4,s2_ptr
155 add size,-1,size
156 addcc %g4,%g2,%o4
157 st %o4,[res_ptr]
158 add res_ptr,4,res_ptr
159
160L_v2: addx %g0,%g0,%o4 ! save cy in register
161 addcc size,-8,size
162 blt Lfin2
163 subcc %g0,%o4,%g0 ! restore cy
164/* Add blocks of 8 limbs until less than 8 limbs remain */
165Loop2: ldd [s1_ptr+0],%g2
166 ldd [s2_ptr+0],%o4
167 addxcc %g2,%o4,%g2
168 st %g2,[res_ptr+0]
169 addxcc %g3,%o5,%g3
170 st %g3,[res_ptr+4]
171 ldd [s1_ptr+8],%g2
172 ldd [s2_ptr+8],%o4
173 addxcc %g2,%o4,%g2
174 st %g2,[res_ptr+8]
175 addxcc %g3,%o5,%g3
176 st %g3,[res_ptr+12]
177 ldd [s1_ptr+16],%g2
178 ldd [s2_ptr+16],%o4
179 addxcc %g2,%o4,%g2
180 st %g2,[res_ptr+16]
181 addxcc %g3,%o5,%g3
182 st %g3,[res_ptr+20]
183 ldd [s1_ptr+24],%g2
184 ldd [s2_ptr+24],%o4
185 addxcc %g2,%o4,%g2
186 st %g2,[res_ptr+24]
187 addxcc %g3,%o5,%g3
188 st %g3,[res_ptr+28]
189 addx %g0,%g0,%o4 ! save cy in register
190 addcc size,-8,size
191 add s1_ptr,32,s1_ptr
192 add s2_ptr,32,s2_ptr
193 add res_ptr,32,res_ptr
194 bge Loop2
195 subcc %g0,%o4,%g0 ! restore cy
196
197Lfin2: addcc size,8-2,size
198Ltriv: blt Lend2
199 subcc %g0,%o4,%g0 ! restore cy
200Loop2b: ldd [s1_ptr+0],%g2
201 ldd [s2_ptr+0],%o4
202 addxcc %g2,%o4,%g2
203 st %g2,[res_ptr+0]
204 addxcc %g3,%o5,%g3
205 st %g3,[res_ptr+4]
206 addx %g0,%g0,%o4 ! save cy in register
207 addcc size,-2,size
208 add s1_ptr,8,s1_ptr
209 add s2_ptr,8,s2_ptr
210 add res_ptr,8,res_ptr
211 bge Loop2b
212 subcc %g0,%o4,%g0 ! restore cy
213Lend2: andcc size,1,%g0
214 be Lret2
215 subcc %g0,%o4,%g0 ! restore cy
216/* Add last limb */
217 ld [s1_ptr],%g4
218 ld [s2_ptr],%g2
219 addxcc %g4,%g2,%o4
220 st %o4,[res_ptr]
221
222Lret2: retl
223 addx %g0,%g0,%o0 ! return carry-out from most sign. limb