]>
Commit | Line | Data |
---|---|---|
28f540f4 RM |
1 | ! SPARC __mpn_mul_1 -- Multiply a limb vector with a limb and store |
2 | ! the result in a second limb vector. | |
f41c8091 | 3 | ! |
b168057a | 4 | ! Copyright (C) 1992-2015 Free Software Foundation, Inc. |
f41c8091 | 5 | ! |
28f540f4 | 6 | ! This file is part of the GNU MP Library. |
f41c8091 | 7 | ! |
28f540f4 | 8 | ! The GNU MP Library is free software; you can redistribute it and/or modify |
6d84f89a AJ |
9 | ! it under the terms of the GNU Lesser General Public License as published by |
10 | ! the Free Software Foundation; either version 2.1 of the License, or (at your | |
28f540f4 | 11 | ! option) any later version. |
f41c8091 | 12 | ! |
28f540f4 RM |
13 | ! The GNU MP Library is distributed in the hope that it will be useful, but |
14 | ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
6d84f89a | 15 | ! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
28f540f4 | 16 | ! License for more details. |
f41c8091 | 17 | ! |
6d84f89a | 18 | ! You should have received a copy of the GNU Lesser General Public License |
59ba27a6 PE |
19 | ! along with the GNU MP Library; see the file COPYING.LIB. If not, |
20 | ! see <http://www.gnu.org/licenses/>. | |
28f540f4 RM |
21 | |
22 | ||
23 | ! INPUT PARAMETERS | |
f41c8091 UD |
24 | ! RES_PTR o0 |
25 | ! S1_PTR o1 | |
26 | ! SIZE o2 | |
27 | ! S2_LIMB o3 | |
28f540f4 RM |
28 | |
29 | ! ADD CODE FOR SMALL MULTIPLIERS! | |
30 | !1: ld | |
31 | ! st | |
32 | ! | |
33 | !2: ld ,a | |
34 | ! addxcc a,a,x | |
35 | ! st x, | |
36 | ! | |
37 | !3_unrolled: | |
38 | ! ld ,a | |
39 | ! addxcc a,a,x1 ! 2a + cy | |
40 | ! addx %g0,%g0,x2 | |
41 | ! addcc a,x1,x ! 3a + c | |
42 | ! st x, | |
43 | ! | |
44 | ! ld ,a | |
45 | ! addxcc a,a,y1 | |
46 | ! addx %g0,%g0,y2 | |
47 | ! addcc a,y1,x | |
48 | ! st x, | |
49 | ! | |
50 | !4_unrolled: | |
51 | ! ld ,a | |
52 | ! srl a,2,x1 ! 4a | |
53 | ! addxcc y2,x1,x | |
54 | ! sll a,30,x2 | |
55 | ! st x, | |
56 | ! | |
57 | ! ld ,a | |
58 | ! srl a,2,y1 | |
59 | ! addxcc x2,y1,y | |
60 | ! sll a,30,y2 | |
61 | ! st x, | |
62 | ! | |
63 | !5_unrolled: | |
64 | ! ld ,a | |
65 | ! srl a,2,x1 ! 4a | |
66 | ! addxcc a,x1,x ! 5a + c | |
67 | ! sll a,30,x2 | |
68 | ! addx %g0,x2,x2 | |
69 | ! st x, | |
70 | ! | |
71 | ! ld ,a | |
72 | ! srl a,2,y1 | |
73 | ! addxcc a,y1,x | |
74 | ! sll a,30,y2 | |
75 | ! addx %g0,y2,y2 | |
76 | ! st x, | |
77 | ! | |
78 | !8_unrolled: | |
79 | ! ld ,a | |
80 | ! srl a,3,x1 ! 8a | |
81 | ! addxcc y2,x1,x | |
82 | ! sll a,29,x2 | |
83 | ! st x, | |
84 | ! | |
85 | ! ld ,a | |
86 | ! srl a,3,y1 | |
87 | ! addxcc x2,y1,y | |
88 | ! sll a,29,y2 | |
89 | ! st x, | |
90 | ||
f41c8091 | 91 | #include <sysdep.h> |
28f540f4 | 92 | |
f41c8091 | 93 | ENTRY(__mpn_mul_1) |
28f540f4 RM |
94 | ! Make S1_PTR and RES_PTR point at the end of their blocks |
95 | ! and put (- 4 x SIZE) in index/loop counter. | |
96 | sll %o2,2,%o2 | |
97 | add %o0,%o2,%o4 ! RES_PTR in o4 since o0 is retval | |
98 | add %o1,%o2,%o1 | |
99 | sub %g0,%o2,%o2 | |
100 | ||
101 | cmp %o3,0xfff | |
f41c8091 | 102 | bgu LOC(large) |
28f540f4 RM |
103 | nop |
104 | ||
105 | ld [%o1+%o2],%o5 | |
106 | mov 0,%o0 | |
f41c8091 | 107 | b LOC(0) |
28f540f4 | 108 | add %o4,-4,%o4 |
f41c8091 | 109 | LOC(loop0): |
28f540f4 | 110 | st %g1,[%o4+%o2] |
f41c8091 | 111 | LOC(0): wr %g0,%o3,%y |
28f540f4 RM |
112 | sra %o5,31,%g2 |
113 | and %o3,%g2,%g2 | |
114 | andcc %g1,0,%g1 | |
115 | mulscc %g1,%o5,%g1 | |
116 | mulscc %g1,%o5,%g1 | |
117 | mulscc %g1,%o5,%g1 | |
118 | mulscc %g1,%o5,%g1 | |
119 | mulscc %g1,%o5,%g1 | |
120 | mulscc %g1,%o5,%g1 | |
121 | mulscc %g1,%o5,%g1 | |
122 | mulscc %g1,%o5,%g1 | |
123 | mulscc %g1,%o5,%g1 | |
124 | mulscc %g1,%o5,%g1 | |
125 | mulscc %g1,%o5,%g1 | |
126 | mulscc %g1,%o5,%g1 | |
127 | mulscc %g1,0,%g1 | |
128 | sra %g1,20,%g4 | |
129 | sll %g1,12,%g1 | |
130 | rd %y,%g3 | |
131 | srl %g3,20,%g3 | |
132 | or %g1,%g3,%g1 | |
133 | ||
134 | addcc %g1,%o0,%g1 | |
135 | addx %g2,%g4,%o0 ! add sign-compensation and cy to hi limb | |
136 | addcc %o2,4,%o2 ! loop counter | |
f41c8091 | 137 | bne,a LOC(loop0) |
28f540f4 RM |
138 | ld [%o1+%o2],%o5 |
139 | ||
140 | retl | |
141 | st %g1,[%o4+%o2] | |
142 | ||
143 | ||
f41c8091 UD |
144 | LOC(large): |
145 | ld [%o1+%o2],%o5 | |
28f540f4 RM |
146 | mov 0,%o0 |
147 | sra %o3,31,%g4 ! g4 = mask of ones iff S2_LIMB < 0 | |
f41c8091 | 148 | b LOC(1) |
28f540f4 | 149 | add %o4,-4,%o4 |
f41c8091 | 150 | LOC(loop): |
28f540f4 | 151 | st %g3,[%o4+%o2] |
f41c8091 | 152 | LOC(1): wr %g0,%o5,%y |
28f540f4 RM |
153 | and %o5,%g4,%g2 ! g2 = S1_LIMB iff S2_LIMB < 0, else 0 |
154 | andcc %g0,%g0,%g1 | |
155 | mulscc %g1,%o3,%g1 | |
156 | mulscc %g1,%o3,%g1 | |
157 | mulscc %g1,%o3,%g1 | |
158 | mulscc %g1,%o3,%g1 | |
159 | mulscc %g1,%o3,%g1 | |
160 | mulscc %g1,%o3,%g1 | |
161 | mulscc %g1,%o3,%g1 | |
162 | mulscc %g1,%o3,%g1 | |
163 | mulscc %g1,%o3,%g1 | |
164 | mulscc %g1,%o3,%g1 | |
165 | mulscc %g1,%o3,%g1 | |
166 | mulscc %g1,%o3,%g1 | |
167 | mulscc %g1,%o3,%g1 | |
168 | mulscc %g1,%o3,%g1 | |
169 | mulscc %g1,%o3,%g1 | |
170 | mulscc %g1,%o3,%g1 | |
171 | mulscc %g1,%o3,%g1 | |
172 | mulscc %g1,%o3,%g1 | |
173 | mulscc %g1,%o3,%g1 | |
174 | mulscc %g1,%o3,%g1 | |
175 | mulscc %g1,%o3,%g1 | |
176 | mulscc %g1,%o3,%g1 | |
177 | mulscc %g1,%o3,%g1 | |
178 | mulscc %g1,%o3,%g1 | |
179 | mulscc %g1,%o3,%g1 | |
180 | mulscc %g1,%o3,%g1 | |
181 | mulscc %g1,%o3,%g1 | |
182 | mulscc %g1,%o3,%g1 | |
183 | mulscc %g1,%o3,%g1 | |
184 | mulscc %g1,%o3,%g1 | |
185 | mulscc %g1,%o3,%g1 | |
186 | mulscc %g1,%o3,%g1 | |
187 | mulscc %g1,%g0,%g1 | |
188 | rd %y,%g3 | |
189 | addcc %g3,%o0,%g3 | |
190 | addx %g2,%g1,%o0 ! add sign-compensation and cy to hi limb | |
191 | addcc %o2,4,%o2 ! loop counter | |
f41c8091 | 192 | bne,a LOC(loop) |
28f540f4 RM |
193 | ld [%o1+%o2],%o5 |
194 | ||
195 | retl | |
196 | st %g3,[%o4+%o2] | |
f41c8091 UD |
197 | |
198 | END(__mpn_mul_1) |