]>
Commit | Line | Data |
---|---|---|
4a2c0fd4 AZ |
1 | /* PowerPC64 __mpn_addmul_1 -- Multiply a limb vector with a limb and add |
2 | the result to a second limb vector. | |
d614a753 | 3 | Copyright (C) 1999-2020 Free Software Foundation, Inc. |
4a2c0fd4 AZ |
4 | This file is part of the GNU C Library. |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
7 | modify it under the terms of the GNU Lesser General Public | |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
10 | ||
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Lesser General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Lesser General Public | |
17 | License along with the GNU C Library; if not, see | |
5a82c748 | 18 | <https://www.gnu.org/licenses/>. */ |
4a2c0fd4 AZ |
19 | |
20 | #include <sysdep.h> | |
21 | ||
22 | #ifdef USE_AS_SUBMUL | |
23 | # define FUNC __mpn_submul_1 | |
24 | # define ADDSUBC subfe | |
25 | # define ADDSUB subfc | |
26 | #else | |
27 | # define FUNC __mpn_addmul_1 | |
28 | # define ADDSUBC adde | |
29 | # define ADDSUB addc | |
30 | #endif | |
31 | ||
32 | #define RP r3 | |
33 | #define UP r4 | |
34 | #define N r5 | |
35 | #define VL r6 | |
36 | ||
1d880d4a TMQMF |
37 | #define R27SAVE (-40) |
38 | #define R28SAVE (-32) | |
39 | #define R29SAVE (-24) | |
40 | #define R30SAVE (-16) | |
41 | #define R31SAVE (-8) | |
42 | ||
d5b41185 | 43 | ENTRY_TOCLESS (FUNC, 5) |
1d880d4a | 44 | std r31, R31SAVE(r1) |
4a2c0fd4 | 45 | rldicl. r0, N, 0, 62 |
1d880d4a | 46 | std r30, R30SAVE(r1) |
4a2c0fd4 | 47 | cmpdi VL, r0, 2 |
1d880d4a | 48 | std r29, R29SAVE(r1) |
4a2c0fd4 | 49 | addi N, N, 3 |
1d880d4a | 50 | std r28, R28SAVE(r1) |
4a2c0fd4 | 51 | srdi N, N, 2 |
1d880d4a TMQMF |
52 | std r27, R27SAVE(r1) |
53 | cfi_offset(r31, R31SAVE) | |
54 | cfi_offset(r30, R30SAVE) | |
55 | cfi_offset(r29, R29SAVE) | |
56 | cfi_offset(r28, R28SAVE) | |
57 | cfi_offset(r27, R27SAVE) | |
4a2c0fd4 AZ |
58 | mtctr N |
59 | beq cr0, L(b00) | |
60 | blt cr6, L(b01) | |
61 | beq cr6, L(b10) | |
62 | ||
63 | L(b11): ld r9, 0(UP) | |
64 | ld r28, 0(RP) | |
65 | mulld r0, r9, VL | |
66 | mulhdu r12, r9, VL | |
67 | ADDSUB r0, r0, r28 | |
68 | std r0, 0(RP) | |
69 | addi RP, RP, 8 | |
70 | ld r9, 8(UP) | |
71 | ld r27, 16(UP) | |
72 | addi UP, UP, 24 | |
73 | #ifdef USE_AS_SUBMUL | |
74 | subfe r11, r11, r11 | |
75 | #endif | |
76 | b L(bot) | |
77 | ||
78 | .align 4 | |
79 | L(b00): ld r9, 0(UP) | |
80 | ld r27, 8(UP) | |
81 | ld r28, 0(RP) | |
82 | ld r29, 8(RP) | |
83 | mulld r0, r9, VL | |
84 | mulhdu N, r9, VL | |
85 | mulld r7, r27, VL | |
86 | mulhdu r8, r27, VL | |
87 | addc r7, r7, N | |
88 | addze r12, r8 | |
89 | ADDSUB r0, r0, r28 | |
90 | std r0, 0(RP) | |
91 | ADDSUBC r7, r7, r29 | |
92 | std r7, 8(RP) | |
93 | addi RP, RP, 16 | |
94 | ld r9, 16(UP) | |
95 | ld r27, 24(UP) | |
96 | addi UP, UP, 32 | |
97 | #ifdef USE_AS_SUBMUL | |
98 | subfe r11, r11, r11 | |
99 | #endif | |
100 | b L(bot) | |
101 | ||
102 | .align 4 | |
103 | L(b01): bdnz L(gt1) | |
104 | ld r9, 0(UP) | |
105 | ld r11, 0(RP) | |
106 | mulld r0, r9, VL | |
107 | mulhdu r8, r9, VL | |
108 | ADDSUB r0, r0, r11 | |
109 | std r0, 0(RP) | |
110 | #ifdef USE_AS_SUBMUL | |
111 | subfe r11, r11, r11 | |
112 | addic r11, r11, 1 | |
113 | #endif | |
114 | addze RP, r8 | |
115 | blr | |
116 | ||
117 | L(gt1): ld r9, 0(UP) | |
118 | ld r27, 8(UP) | |
119 | mulld r0, r9, VL | |
120 | mulhdu N, r9, VL | |
121 | mulld r7, r27, VL | |
122 | mulhdu r8, r27, VL | |
123 | ld r9, 16(UP) | |
124 | ld r28, 0(RP) | |
125 | ld r29, 8(RP) | |
126 | ld r30, 16(RP) | |
127 | mulld r11, r9, VL | |
128 | mulhdu r10, r9, VL | |
129 | addc r7, r7, N | |
130 | adde r11, r11, r8 | |
131 | addze r12, r10 | |
132 | ADDSUB r0, r0, r28 | |
133 | std r0, 0(RP) | |
134 | ADDSUBC r7, r7, r29 | |
135 | std r7, 8(RP) | |
136 | ADDSUBC r11, r11, r30 | |
137 | std r11, 16(RP) | |
138 | addi RP, RP, 24 | |
139 | ld r9, 24(UP) | |
140 | ld r27, 32(UP) | |
141 | addi UP, UP, 40 | |
142 | #ifdef USE_AS_SUBMUL | |
143 | subfe r11, r11, r11 | |
144 | #endif | |
145 | b L(bot) | |
146 | ||
147 | L(b10): addic r0, r0, r0 | |
148 | li r12, 0 | |
149 | ld r9, 0(UP) | |
150 | ld r27, 8(UP) | |
151 | bdz L(end) | |
152 | addi UP, UP, 16 | |
153 | ||
154 | .align 4 | |
155 | L(top): mulld r0, r9, VL | |
156 | mulhdu N, r9, VL | |
157 | mulld r7, r27, VL | |
158 | mulhdu r8, r27, VL | |
159 | ld r9, 0(UP) | |
160 | ld r28, 0(RP) | |
161 | ld r27, 8(UP) | |
162 | ld r29, 8(RP) | |
163 | adde r0, r0, r12 | |
164 | adde r7, r7, N | |
165 | mulld N, r9, VL | |
166 | mulhdu r10, r9, VL | |
167 | mulld r11, r27, VL | |
168 | mulhdu r12, r27, VL | |
169 | ld r9, 16(UP) | |
170 | ld r30, 16(RP) | |
171 | ld r27, 24(UP) | |
172 | ld r31, 24(RP) | |
173 | adde N, N, r8 | |
174 | adde r11, r11, r10 | |
175 | addze r12, r12 | |
176 | ADDSUB r0, r0, r28 | |
177 | std r0, 0(RP) | |
178 | ADDSUBC r7, r7, r29 | |
179 | std r7, 8(RP) | |
180 | ADDSUBC N, N, r30 | |
181 | std N, 16(RP) | |
182 | ADDSUBC r11, r11, r31 | |
183 | std r11, 24(RP) | |
184 | addi UP, UP, 32 | |
185 | #ifdef USE_AS_SUBMUL | |
186 | subfe r11, r11, r11 | |
187 | #endif | |
188 | addi RP, RP, 32 | |
189 | L(bot): | |
190 | #ifdef USE_AS_SUBMUL | |
191 | addic r11, r11, 1 | |
192 | #endif | |
193 | bdnz L(top) | |
194 | ||
195 | L(end): mulld r0, r9, VL | |
196 | mulhdu N, r9, VL | |
197 | mulld r7, r27, VL | |
198 | mulhdu r8, r27, VL | |
199 | ld r28, 0(RP) | |
200 | ld r29, 8(RP) | |
201 | adde r0, r0, r12 | |
202 | adde r7, r7, N | |
203 | addze r8, r8 | |
204 | ADDSUB r0, r0, r28 | |
205 | std r0, 0(RP) | |
206 | ADDSUBC r7, r7, r29 | |
207 | std r7, 8(RP) | |
208 | #ifdef USE_AS_SUBMUL | |
209 | subfe r11, r11, r11 | |
210 | addic r11, r11, 1 | |
211 | #endif | |
212 | addze RP, r8 | |
1d880d4a TMQMF |
213 | ld r31, R31SAVE(r1) |
214 | ld r30, R30SAVE(r1) | |
215 | ld r29, R29SAVE(r1) | |
216 | ld r28, R28SAVE(r1) | |
217 | ld r27, R27SAVE(r1) | |
4a2c0fd4 AZ |
218 | blr |
219 | END(FUNC) |