]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/powerpc/powerpc64/addmul_1.S
87ff0245bfc0de3a53ba4be6ea06968b9d052242
[thirdparty/glibc.git] / sysdeps / powerpc / powerpc64 / addmul_1.S
1 /* PowerPC64 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
2 the result to a second limb vector.
3 Copyright (C) 1999-2014 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
19
20 #include <sysdep.h>
21
22 #ifdef USE_AS_SUBMUL
23 # define FUNC __mpn_submul_1
24 # define ADDSUBC subfe
25 # define ADDSUB subfc
26 #else
27 # define FUNC __mpn_addmul_1
28 # define ADDSUBC adde
29 # define ADDSUB addc
30 #endif
31
32 #define RP r3
33 #define UP r4
34 #define N r5
35 #define VL r6
36
37 EALIGN(FUNC, 5, 0)
38 std r31, -8(r1)
39 rldicl. r0, N, 0, 62
40 std r30, -16(r1)
41 cmpdi VL, r0, 2
42 std r29, -24(r1)
43 addi N, N, 3
44 std r28, -32(r1)
45 srdi N, N, 2
46 std r27, -40(r1)
47 mtctr N
48 beq cr0, L(b00)
49 blt cr6, L(b01)
50 beq cr6, L(b10)
51
52 L(b11): ld r9, 0(UP)
53 ld r28, 0(RP)
54 mulld r0, r9, VL
55 mulhdu r12, r9, VL
56 ADDSUB r0, r0, r28
57 std r0, 0(RP)
58 addi RP, RP, 8
59 ld r9, 8(UP)
60 ld r27, 16(UP)
61 addi UP, UP, 24
62 #ifdef USE_AS_SUBMUL
63 subfe r11, r11, r11
64 #endif
65 b L(bot)
66
67 .align 4
68 L(b00): ld r9, 0(UP)
69 ld r27, 8(UP)
70 ld r28, 0(RP)
71 ld r29, 8(RP)
72 mulld r0, r9, VL
73 mulhdu N, r9, VL
74 mulld r7, r27, VL
75 mulhdu r8, r27, VL
76 addc r7, r7, N
77 addze r12, r8
78 ADDSUB r0, r0, r28
79 std r0, 0(RP)
80 ADDSUBC r7, r7, r29
81 std r7, 8(RP)
82 addi RP, RP, 16
83 ld r9, 16(UP)
84 ld r27, 24(UP)
85 addi UP, UP, 32
86 #ifdef USE_AS_SUBMUL
87 subfe r11, r11, r11
88 #endif
89 b L(bot)
90
91 .align 4
92 L(b01): bdnz L(gt1)
93 ld r9, 0(UP)
94 ld r11, 0(RP)
95 mulld r0, r9, VL
96 mulhdu r8, r9, VL
97 ADDSUB r0, r0, r11
98 std r0, 0(RP)
99 #ifdef USE_AS_SUBMUL
100 subfe r11, r11, r11
101 addic r11, r11, 1
102 #endif
103 addze RP, r8
104 blr
105
106 L(gt1): ld r9, 0(UP)
107 ld r27, 8(UP)
108 mulld r0, r9, VL
109 mulhdu N, r9, VL
110 mulld r7, r27, VL
111 mulhdu r8, r27, VL
112 ld r9, 16(UP)
113 ld r28, 0(RP)
114 ld r29, 8(RP)
115 ld r30, 16(RP)
116 mulld r11, r9, VL
117 mulhdu r10, r9, VL
118 addc r7, r7, N
119 adde r11, r11, r8
120 addze r12, r10
121 ADDSUB r0, r0, r28
122 std r0, 0(RP)
123 ADDSUBC r7, r7, r29
124 std r7, 8(RP)
125 ADDSUBC r11, r11, r30
126 std r11, 16(RP)
127 addi RP, RP, 24
128 ld r9, 24(UP)
129 ld r27, 32(UP)
130 addi UP, UP, 40
131 #ifdef USE_AS_SUBMUL
132 subfe r11, r11, r11
133 #endif
134 b L(bot)
135
136 L(b10): addic r0, r0, r0
137 li r12, 0
138 ld r9, 0(UP)
139 ld r27, 8(UP)
140 bdz L(end)
141 addi UP, UP, 16
142
143 .align 4
144 L(top): mulld r0, r9, VL
145 mulhdu N, r9, VL
146 mulld r7, r27, VL
147 mulhdu r8, r27, VL
148 ld r9, 0(UP)
149 ld r28, 0(RP)
150 ld r27, 8(UP)
151 ld r29, 8(RP)
152 adde r0, r0, r12
153 adde r7, r7, N
154 mulld N, r9, VL
155 mulhdu r10, r9, VL
156 mulld r11, r27, VL
157 mulhdu r12, r27, VL
158 ld r9, 16(UP)
159 ld r30, 16(RP)
160 ld r27, 24(UP)
161 ld r31, 24(RP)
162 adde N, N, r8
163 adde r11, r11, r10
164 addze r12, r12
165 ADDSUB r0, r0, r28
166 std r0, 0(RP)
167 ADDSUBC r7, r7, r29
168 std r7, 8(RP)
169 ADDSUBC N, N, r30
170 std N, 16(RP)
171 ADDSUBC r11, r11, r31
172 std r11, 24(RP)
173 addi UP, UP, 32
174 #ifdef USE_AS_SUBMUL
175 subfe r11, r11, r11
176 #endif
177 addi RP, RP, 32
178 L(bot):
179 #ifdef USE_AS_SUBMUL
180 addic r11, r11, 1
181 #endif
182 bdnz L(top)
183
184 L(end): mulld r0, r9, VL
185 mulhdu N, r9, VL
186 mulld r7, r27, VL
187 mulhdu r8, r27, VL
188 ld r28, 0(RP)
189 ld r29, 8(RP)
190 adde r0, r0, r12
191 adde r7, r7, N
192 addze r8, r8
193 ADDSUB r0, r0, r28
194 std r0, 0(RP)
195 ADDSUBC r7, r7, r29
196 std r7, 8(RP)
197 #ifdef USE_AS_SUBMUL
198 subfe r11, r11, r11
199 addic r11, r11, 1
200 #endif
201 addze RP, r8
202 ld r31, -8(r1)
203 ld r30, -16(r1)
204 ld r29, -24(r1)
205 ld r28, -32(r1)
206 ld r27, -40(r1)
207 blr
208 END(FUNC)