]> git.ipfire.org Git - thirdparty/glibc.git/blame - stdlib/mul_n.c
Prefer https to http for gnu.org and fsf.org URLs
[thirdparty/glibc.git] / stdlib / mul_n.c
CommitLineData
6b628d36 1/* mpn_mul_n -- Multiply two natural numbers of length n.
28f540f4 2
04277e02 3Copyright (C) 1991-2019 Free Software Foundation, Inc.
28f540f4
RM
4
5This file is part of the GNU MP Library.
6
7The GNU MP Library is free software; you can redistribute it and/or modify
6d84f89a
AJ
8it under the terms of the GNU Lesser General Public License as published by
9the Free Software Foundation; either version 2.1 of the License, or (at your
28f540f4
RM
10option) any later version.
11
12The GNU MP Library is distributed in the hope that it will be useful, but
13WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
6d84f89a 14or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
28f540f4
RM
15License for more details.
16
6d84f89a 17You should have received a copy of the GNU Lesser General Public License
59ba27a6 18along with the GNU MP Library; see the file COPYING.LIB. If not, see
5a82c748 19<https://www.gnu.org/licenses/>. */
28f540f4 20
9d13fb24 21#include <gmp.h>
28f540f4
RM
22#include "gmp-impl.h"
23
24/* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
25 both with SIZE limbs, and store the result at PRODP. 2 * SIZE limbs are
26 always stored. Return the most significant limb.
27
28 Argument constraints:
29 1. PRODP != UP and PRODP != VP, i.e. the destination
30 must be distinct from the multiplier and the multiplicand. */
31
32/* If KARATSUBA_THRESHOLD is not already defined, define it to a
33 value which is good on most machines. */
34#ifndef KARATSUBA_THRESHOLD
35#define KARATSUBA_THRESHOLD 32
36#endif
37
38/* The code can't handle KARATSUBA_THRESHOLD smaller than 2. */
39#if KARATSUBA_THRESHOLD < 2
40#undef KARATSUBA_THRESHOLD
41#define KARATSUBA_THRESHOLD 2
42#endif
43
28f540f4
RM
44/* Handle simple cases with traditional multiplication.
45
46 This is the most critical code of multiplication. All multiplies rely
47 on this, both small and huge. Small ones arrive here immediately. Huge
48 ones arrive here as this is the base case for Karatsuba's recursive
49 algorithm below. */
50
51void
6b628d36 52impn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
28f540f4
RM
53{
54 mp_size_t i;
b928942e
RM
55 mp_limb_t cy_limb;
56 mp_limb_t v_limb;
28f540f4
RM
57
58 /* Multiply by the first limb in V separately, as the result can be
59 stored (not added) to PROD. We also avoid a loop for zeroing. */
60 v_limb = vp[0];
61 if (v_limb <= 1)
62 {
63 if (v_limb == 1)
64 MPN_COPY (prodp, up, size);
65 else
66 MPN_ZERO (prodp, size);
67 cy_limb = 0;
68 }
69 else
6b628d36 70 cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
28f540f4
RM
71
72 prodp[size] = cy_limb;
73 prodp++;
74
75 /* For each iteration in the outer loop, multiply one limb from
76 U with one limb from V, and add it to PROD. */
77 for (i = 1; i < size; i++)
78 {
79 v_limb = vp[i];
80 if (v_limb <= 1)
81 {
82 cy_limb = 0;
83 if (v_limb == 1)
6b628d36 84 cy_limb = mpn_add_n (prodp, prodp, up, size);
28f540f4
RM
85 }
86 else
6b628d36 87 cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
28f540f4
RM
88
89 prodp[size] = cy_limb;
90 prodp++;
91 }
92}
93
94void
6b628d36 95impn_mul_n (mp_ptr prodp,
28f540f4 96 mp_srcptr up, mp_srcptr vp, mp_size_t size, mp_ptr tspace)
28f540f4
RM
97{
98 if ((size & 1) != 0)
99 {
100 /* The size is odd, the code code below doesn't handle that.
101 Multiply the least significant (size - 1) limbs with a recursive
102 call, and handle the most significant limb of S1 and S2
103 separately. */
104 /* A slightly faster way to do this would be to make the Karatsuba
105 code below behave as if the size were even, and let it check for
106 odd size in the end. I.e., in essence move this code to the end.
107 Doing so would save us a recursive call, and potentially make the
108 stack grow a lot less. */
109
110 mp_size_t esize = size - 1; /* even size */
b928942e 111 mp_limb_t cy_limb;
28f540f4
RM
112
113 MPN_MUL_N_RECURSE (prodp, up, vp, esize, tspace);
6b628d36 114 cy_limb = mpn_addmul_1 (prodp + esize, up, esize, vp[esize]);
28f540f4 115 prodp[esize + esize] = cy_limb;
6b628d36 116 cy_limb = mpn_addmul_1 (prodp + esize, vp, size, up[esize]);
28f540f4
RM
117
118 prodp[esize + size] = cy_limb;
119 }
120 else
121 {
122 /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
123
124 Split U in two pieces, U1 and U0, such that
125 U = U0 + U1*(B**n),
126 and V in V1 and V0, such that
127 V = V0 + V1*(B**n).
128
129 UV is then computed recursively using the identity
130
131 2n n n n
132 UV = (B + B )U V + B (U -U )(V -V ) + (B + 1)U V
133 1 1 1 0 0 1 0 0
134
135 Where B = 2**BITS_PER_MP_LIMB. */
136
137 mp_size_t hsize = size >> 1;
b928942e 138 mp_limb_t cy;
28f540f4
RM
139 int negflg;
140
141 /*** Product H. ________________ ________________
142 |_____U1 x V1____||____U0 x V0_____| */
143 /* Put result in upper part of PROD and pass low part of TSPACE
144 as new TSPACE. */
145 MPN_MUL_N_RECURSE (prodp + size, up + hsize, vp + hsize, hsize, tspace);
146
147 /*** Product M. ________________
148 |_(U1-U0)(V0-V1)_| */
6b628d36 149 if (mpn_cmp (up + hsize, up, hsize) >= 0)
28f540f4 150 {
6b628d36 151 mpn_sub_n (prodp, up + hsize, up, hsize);
28f540f4
RM
152 negflg = 0;
153 }
154 else
155 {
6b628d36 156 mpn_sub_n (prodp, up, up + hsize, hsize);
28f540f4
RM
157 negflg = 1;
158 }
6b628d36 159 if (mpn_cmp (vp + hsize, vp, hsize) >= 0)
28f540f4 160 {
6b628d36 161 mpn_sub_n (prodp + hsize, vp + hsize, vp, hsize);
28f540f4
RM
162 negflg ^= 1;
163 }
164 else
165 {
6b628d36 166 mpn_sub_n (prodp + hsize, vp, vp + hsize, hsize);
28f540f4
RM
167 /* No change of NEGFLG. */
168 }
169 /* Read temporary operands from low part of PROD.
170 Put result in low part of TSPACE using upper part of TSPACE
171 as new TSPACE. */
172 MPN_MUL_N_RECURSE (tspace, prodp, prodp + hsize, hsize, tspace + size);
173
174 /*** Add/copy product H. */
175 MPN_COPY (prodp + hsize, prodp + size, hsize);
6b628d36 176 cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
28f540f4
RM
177
178 /*** Add product M (if NEGFLG M is a negative number). */
179 if (negflg)
6b628d36 180 cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
28f540f4 181 else
6b628d36 182 cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
28f540f4
RM
183
184 /*** Product L. ________________ ________________
185 |________________||____U0 x V0_____| */
186 /* Read temporary operands from low part of PROD.
187 Put result in low part of TSPACE using upper part of TSPACE
188 as new TSPACE. */
189 MPN_MUL_N_RECURSE (tspace, up, vp, hsize, tspace + size);
190
191 /*** Add/copy Product L (twice). */
192
6b628d36 193 cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
28f540f4 194 if (cy)
6b628d36 195 mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
28f540f4
RM
196
197 MPN_COPY (prodp, tspace, hsize);
6b628d36 198 cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
28f540f4 199 if (cy)
6b628d36 200 mpn_add_1 (prodp + size, prodp + size, size, 1);
28f540f4
RM
201 }
202}
203
204void
6b628d36 205impn_sqr_n_basecase (mp_ptr prodp, mp_srcptr up, mp_size_t size)
28f540f4
RM
206{
207 mp_size_t i;
b928942e
RM
208 mp_limb_t cy_limb;
209 mp_limb_t v_limb;
28f540f4
RM
210
211 /* Multiply by the first limb in V separately, as the result can be
212 stored (not added) to PROD. We also avoid a loop for zeroing. */
213 v_limb = up[0];
214 if (v_limb <= 1)
215 {
216 if (v_limb == 1)
217 MPN_COPY (prodp, up, size);
218 else
219 MPN_ZERO (prodp, size);
220 cy_limb = 0;
221 }
222 else
6b628d36 223 cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
28f540f4
RM
224
225 prodp[size] = cy_limb;
226 prodp++;
227
228 /* For each iteration in the outer loop, multiply one limb from
229 U with one limb from V, and add it to PROD. */
230 for (i = 1; i < size; i++)
231 {
232 v_limb = up[i];
233 if (v_limb <= 1)
234 {
235 cy_limb = 0;
236 if (v_limb == 1)
6b628d36 237 cy_limb = mpn_add_n (prodp, prodp, up, size);
28f540f4
RM
238 }
239 else
6b628d36 240 cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
28f540f4
RM
241
242 prodp[size] = cy_limb;
243 prodp++;
244 }
245}
246
247void
6b628d36 248impn_sqr_n (mp_ptr prodp,
28f540f4 249 mp_srcptr up, mp_size_t size, mp_ptr tspace)
28f540f4
RM
250{
251 if ((size & 1) != 0)
252 {
253 /* The size is odd, the code code below doesn't handle that.
254 Multiply the least significant (size - 1) limbs with a recursive
255 call, and handle the most significant limb of S1 and S2
256 separately. */
257 /* A slightly faster way to do this would be to make the Karatsuba
258 code below behave as if the size were even, and let it check for
259 odd size in the end. I.e., in essence move this code to the end.
260 Doing so would save us a recursive call, and potentially make the
261 stack grow a lot less. */
262
263 mp_size_t esize = size - 1; /* even size */
b928942e 264 mp_limb_t cy_limb;
28f540f4
RM
265
266 MPN_SQR_N_RECURSE (prodp, up, esize, tspace);
6b628d36 267 cy_limb = mpn_addmul_1 (prodp + esize, up, esize, up[esize]);
28f540f4 268 prodp[esize + esize] = cy_limb;
6b628d36 269 cy_limb = mpn_addmul_1 (prodp + esize, up, size, up[esize]);
28f540f4
RM
270
271 prodp[esize + size] = cy_limb;
272 }
273 else
274 {
275 mp_size_t hsize = size >> 1;
b928942e 276 mp_limb_t cy;
28f540f4
RM
277
278 /*** Product H. ________________ ________________
279 |_____U1 x U1____||____U0 x U0_____| */
280 /* Put result in upper part of PROD and pass low part of TSPACE
281 as new TSPACE. */
282 MPN_SQR_N_RECURSE (prodp + size, up + hsize, hsize, tspace);
283
284 /*** Product M. ________________
285 |_(U1-U0)(U0-U1)_| */
6b628d36 286 if (mpn_cmp (up + hsize, up, hsize) >= 0)
28f540f4 287 {
6b628d36 288 mpn_sub_n (prodp, up + hsize, up, hsize);
28f540f4
RM
289 }
290 else
291 {
6b628d36 292 mpn_sub_n (prodp, up, up + hsize, hsize);
28f540f4
RM
293 }
294
295 /* Read temporary operands from low part of PROD.
296 Put result in low part of TSPACE using upper part of TSPACE
297 as new TSPACE. */
298 MPN_SQR_N_RECURSE (tspace, prodp, hsize, tspace + size);
299
300 /*** Add/copy product H. */
301 MPN_COPY (prodp + hsize, prodp + size, hsize);
6b628d36 302 cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
28f540f4
RM
303
304 /*** Add product M (if NEGFLG M is a negative number). */
6b628d36 305 cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
28f540f4
RM
306
307 /*** Product L. ________________ ________________
308 |________________||____U0 x U0_____| */
309 /* Read temporary operands from low part of PROD.
310 Put result in low part of TSPACE using upper part of TSPACE
311 as new TSPACE. */
312 MPN_SQR_N_RECURSE (tspace, up, hsize, tspace + size);
313
314 /*** Add/copy Product L (twice). */
315
6b628d36 316 cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
28f540f4 317 if (cy)
6b628d36 318 mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
28f540f4
RM
319
320 MPN_COPY (prodp, tspace, hsize);
6b628d36 321 cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
28f540f4 322 if (cy)
6b628d36 323 mpn_add_1 (prodp + size, prodp + size, size, 1);
28f540f4
RM
324 }
325}
326
327/* This should be made into an inline function in gmp.h. */
01c901a5 328void
6b628d36 329mpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
28f540f4 330{
6b628d36
RM
331 TMP_DECL (marker);
332 TMP_MARK (marker);
28f540f4
RM
333 if (up == vp)
334 {
335 if (size < KARATSUBA_THRESHOLD)
336 {
6b628d36 337 impn_sqr_n_basecase (prodp, up, size);
28f540f4
RM
338 }
339 else
340 {
341 mp_ptr tspace;
6b628d36
RM
342 tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB);
343 impn_sqr_n (prodp, up, size, tspace);
28f540f4
RM
344 }
345 }
346 else
347 {
348 if (size < KARATSUBA_THRESHOLD)
349 {
6b628d36 350 impn_mul_n_basecase (prodp, up, vp, size);
28f540f4
RM
351 }
352 else
353 {
354 mp_ptr tspace;
6b628d36
RM
355 tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB);
356 impn_mul_n (prodp, up, vp, size, tspace);
28f540f4
RM
357 }
358 }
6b628d36 359 TMP_FREE (marker);
28f540f4 360}