]>
Commit | Line | Data |
---|---|---|
28f540f4 RM |
1 | ! SPARC v8 __mpn_addmul_1 -- Multiply a limb vector with a limb and |
2 | ! add the result to a second limb vector. | |
3 | ||
b168057a | 4 | ! Copyright (C) 1992-2015 Free Software Foundation, Inc. |
28f540f4 RM |
5 | |
6 | ! This file is part of the GNU MP Library. | |
7 | ||
8 | ! The GNU MP Library is free software; you can redistribute it and/or modify | |
6d84f89a AJ |
9 | ! it under the terms of the GNU Lesser General Public License as published by |
10 | ! the Free Software Foundation; either version 2.1 of the License, or (at your | |
28f540f4 RM |
11 | ! option) any later version. |
12 | ||
13 | ! The GNU MP Library is distributed in the hope that it will be useful, but | |
14 | ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
6d84f89a | 15 | ! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
28f540f4 RM |
16 | ! License for more details. |
17 | ||
6d84f89a | 18 | ! You should have received a copy of the GNU Lesser General Public License |
59ba27a6 PE |
19 | ! along with the GNU MP Library; see the file COPYING.LIB. If not, |
20 | ! see <http://www.gnu.org/licenses/>. | |
28f540f4 RM |
21 | |
22 | ||
23 | ! INPUT PARAMETERS | |
24 | ! res_ptr o0 | |
25 | ! s1_ptr o1 | |
26 | ! size o2 | |
27 | ! s2_limb o3 | |
28 | ||
f41c8091 | 29 | #include <sysdep.h> |
28f540f4 | 30 | |
f41c8091 | 31 | ENTRY(__mpn_addmul_1) |
28f540f4 | 32 | ld [%o1+0],%o4 ! 1 |
28f540f4 | 33 | sll %o2,4,%g1 |
f41c8091 | 34 | orcc %g0,%g0,%g2 |
8f5ca04b | 35 | mov %o7,%g4 ! Save return address register |
f41c8091 UD |
36 | and %g1,(4-1)<<4,%g1 |
37 | 1: call 2f | |
38 | add %o7,3f-1b,%g3 | |
39 | 2: jmp %g3+%g1 | |
40 | mov %g4,%o7 ! Restore return address register | |
41 | ||
42 | .align 4 | |
43 | 3: | |
44 | LOC(00): | |
45 | add %o0,-4,%o0 | |
46 | b LOC(loop00) /* 4, 8, 12, ... */ | |
47 | add %o1,-4,%o1 | |
28f540f4 | 48 | nop |
f41c8091 UD |
49 | LOC(01): |
50 | b LOC(loop01) /* 1, 5, 9, ... */ | |
51 | nop | |
28f540f4 RM |
52 | nop |
53 | nop | |
f41c8091 UD |
54 | LOC(10): |
55 | add %o0,-12,%o0 /* 2, 6, 10, ... */ | |
56 | b LOC(loop10) | |
57 | add %o1,4,%o1 | |
28f540f4 | 58 | nop |
f41c8091 UD |
59 | LOC(11): |
60 | add %o0,-8,%o0 /* 3, 7, 11, ... */ | |
61 | b LOC(loop11) | |
62 | add %o1,-8,%o1 | |
28f540f4 RM |
63 | nop |
64 | ||
f41c8091 UD |
65 | LOC(loop): |
66 | addcc %g3,%g2,%g3 ! 1 | |
28f540f4 RM |
67 | ld [%o1+4],%o4 ! 2 |
68 | rd %y,%g2 ! 1 | |
69 | addx %g0,%g2,%g2 | |
70 | ld [%o0+0],%g1 ! 2 | |
71 | addcc %g1,%g3,%g3 | |
72 | st %g3,[%o0+0] ! 1 | |
f41c8091 UD |
73 | LOC(loop00): |
74 | umul %o4,%o3,%g3 ! 2 | |
28f540f4 RM |
75 | ld [%o0+4],%g1 ! 2 |
76 | addxcc %g3,%g2,%g3 ! 2 | |
77 | ld [%o1+8],%o4 ! 3 | |
78 | rd %y,%g2 ! 2 | |
79 | addx %g0,%g2,%g2 | |
80 | nop | |
81 | addcc %g1,%g3,%g3 | |
82 | st %g3,[%o0+4] ! 2 | |
f41c8091 UD |
83 | LOC(loop11): |
84 | umul %o4,%o3,%g3 ! 3 | |
28f540f4 RM |
85 | addxcc %g3,%g2,%g3 ! 3 |
86 | ld [%o1+12],%o4 ! 4 | |
87 | rd %y,%g2 ! 3 | |
88 | add %o1,16,%o1 | |
89 | addx %g0,%g2,%g2 | |
90 | ld [%o0+8],%g1 ! 2 | |
91 | addcc %g1,%g3,%g3 | |
92 | st %g3,[%o0+8] ! 3 | |
f41c8091 UD |
93 | LOC(loop10): |
94 | umul %o4,%o3,%g3 ! 4 | |
28f540f4 RM |
95 | addxcc %g3,%g2,%g3 ! 4 |
96 | ld [%o1+0],%o4 ! 1 | |
97 | rd %y,%g2 ! 4 | |
98 | addx %g0,%g2,%g2 | |
99 | ld [%o0+12],%g1 ! 2 | |
100 | addcc %g1,%g3,%g3 | |
101 | st %g3,[%o0+12] ! 4 | |
102 | add %o0,16,%o0 | |
103 | addx %g0,%g2,%g2 | |
f41c8091 UD |
104 | LOC(loop01): |
105 | addcc %o2,-4,%o2 | |
106 | bg LOC(loop) | |
107 | umul %o4,%o3,%g3 ! 1 | |
28f540f4 RM |
108 | |
109 | addcc %g3,%g2,%g3 ! 4 | |
110 | rd %y,%g2 ! 4 | |
111 | addx %g0,%g2,%g2 | |
112 | ld [%o0+0],%g1 ! 2 | |
113 | addcc %g1,%g3,%g3 | |
114 | st %g3,[%o0+0] ! 4 | |
28f540f4 | 115 | retl |
f41c8091 | 116 | addx %g0,%g2,%o0 |
28f540f4 | 117 | |
f41c8091 | 118 | END(__mpn_addmul_1) |