]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/generic/mul_n.c
initial import
[thirdparty/glibc.git] / sysdeps / generic / mul_n.c
1 /* __mpn_mul_n -- Multiply two natural numbers of length n.
2
3 Copyright (C) 1991, 1992, 1993, 1994 Free Software Foundation, Inc.
4
5 This file is part of the GNU MP Library.
6
7 The GNU MP Library is free software; you can redistribute it and/or modify
8 it under the terms of the GNU Library General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or (at your
10 option) any later version.
11
12 The GNU MP Library is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
15 License for more details.
16
17 You should have received a copy of the GNU Library General Public License
18 along with the GNU MP Library; see the file COPYING.LIB. If not, write to
19 the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
20
21 #include "gmp.h"
22 #include "gmp-impl.h"
23
24 /* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
25 both with SIZE limbs, and store the result at PRODP. 2 * SIZE limbs are
26 always stored. Return the most significant limb.
27
28 Argument constraints:
29 1. PRODP != UP and PRODP != VP, i.e. the destination
30 must be distinct from the multiplier and the multiplicand. */
31
32 /* If KARATSUBA_THRESHOLD is not already defined, define it to a
33 value which is good on most machines. */
34 #ifndef KARATSUBA_THRESHOLD
35 #define KARATSUBA_THRESHOLD 32
36 #endif
37
38 /* The code can't handle KARATSUBA_THRESHOLD smaller than 2. */
39 #if KARATSUBA_THRESHOLD < 2
40 #undef KARATSUBA_THRESHOLD
41 #define KARATSUBA_THRESHOLD 2
42 #endif
43
44 void
45 #if __STDC__
46 ____mpn_mul_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);
47 #else
48 ____mpn_mul_n ();
49 #endif
50
51 /* Handle simple cases with traditional multiplication.
52
53 This is the most critical code of multiplication. All multiplies rely
54 on this, both small and huge. Small ones arrive here immediately. Huge
55 ones arrive here as this is the base case for Karatsuba's recursive
56 algorithm below. */
57
58 void
59 #if __STDC__
60 ____mpn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
61 #else
62 ____mpn_mul_n_basecase (prodp, up, vp, size)
63 mp_ptr prodp;
64 mp_srcptr up;
65 mp_srcptr vp;
66 mp_size_t size;
67 #endif
68 {
69 mp_size_t i;
70 mp_limb cy_limb;
71 mp_limb v_limb;
72
73 /* Multiply by the first limb in V separately, as the result can be
74 stored (not added) to PROD. We also avoid a loop for zeroing. */
75 v_limb = vp[0];
76 if (v_limb <= 1)
77 {
78 if (v_limb == 1)
79 MPN_COPY (prodp, up, size);
80 else
81 MPN_ZERO (prodp, size);
82 cy_limb = 0;
83 }
84 else
85 cy_limb = __mpn_mul_1 (prodp, up, size, v_limb);
86
87 prodp[size] = cy_limb;
88 prodp++;
89
90 /* For each iteration in the outer loop, multiply one limb from
91 U with one limb from V, and add it to PROD. */
92 for (i = 1; i < size; i++)
93 {
94 v_limb = vp[i];
95 if (v_limb <= 1)
96 {
97 cy_limb = 0;
98 if (v_limb == 1)
99 cy_limb = __mpn_add_n (prodp, prodp, up, size);
100 }
101 else
102 cy_limb = __mpn_addmul_1 (prodp, up, size, v_limb);
103
104 prodp[size] = cy_limb;
105 prodp++;
106 }
107 }
108
109 void
110 #if __STDC__
111 ____mpn_mul_n (mp_ptr prodp,
112 mp_srcptr up, mp_srcptr vp, mp_size_t size, mp_ptr tspace)
113 #else
114 ____mpn_mul_n (prodp, up, vp, size, tspace)
115 mp_ptr prodp;
116 mp_srcptr up;
117 mp_srcptr vp;
118 mp_size_t size;
119 mp_ptr tspace;
120 #endif
121 {
122 if ((size & 1) != 0)
123 {
124 /* The size is odd, the code code below doesn't handle that.
125 Multiply the least significant (size - 1) limbs with a recursive
126 call, and handle the most significant limb of S1 and S2
127 separately. */
128 /* A slightly faster way to do this would be to make the Karatsuba
129 code below behave as if the size were even, and let it check for
130 odd size in the end. I.e., in essence move this code to the end.
131 Doing so would save us a recursive call, and potentially make the
132 stack grow a lot less. */
133
134 mp_size_t esize = size - 1; /* even size */
135 mp_limb cy_limb;
136
137 MPN_MUL_N_RECURSE (prodp, up, vp, esize, tspace);
138 cy_limb = __mpn_addmul_1 (prodp + esize, up, esize, vp[esize]);
139 prodp[esize + esize] = cy_limb;
140 cy_limb = __mpn_addmul_1 (prodp + esize, vp, size, up[esize]);
141
142 prodp[esize + size] = cy_limb;
143 }
144 else
145 {
146 /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
147
148 Split U in two pieces, U1 and U0, such that
149 U = U0 + U1*(B**n),
150 and V in V1 and V0, such that
151 V = V0 + V1*(B**n).
152
153 UV is then computed recursively using the identity
154
155 2n n n n
156 UV = (B + B )U V + B (U -U )(V -V ) + (B + 1)U V
157 1 1 1 0 0 1 0 0
158
159 Where B = 2**BITS_PER_MP_LIMB. */
160
161 mp_size_t hsize = size >> 1;
162 mp_limb cy;
163 int negflg;
164
165 /*** Product H. ________________ ________________
166 |_____U1 x V1____||____U0 x V0_____| */
167 /* Put result in upper part of PROD and pass low part of TSPACE
168 as new TSPACE. */
169 MPN_MUL_N_RECURSE (prodp + size, up + hsize, vp + hsize, hsize, tspace);
170
171 /*** Product M. ________________
172 |_(U1-U0)(V0-V1)_| */
173 if (__mpn_cmp (up + hsize, up, hsize) >= 0)
174 {
175 __mpn_sub_n (prodp, up + hsize, up, hsize);
176 negflg = 0;
177 }
178 else
179 {
180 __mpn_sub_n (prodp, up, up + hsize, hsize);
181 negflg = 1;
182 }
183 if (__mpn_cmp (vp + hsize, vp, hsize) >= 0)
184 {
185 __mpn_sub_n (prodp + hsize, vp + hsize, vp, hsize);
186 negflg ^= 1;
187 }
188 else
189 {
190 __mpn_sub_n (prodp + hsize, vp, vp + hsize, hsize);
191 /* No change of NEGFLG. */
192 }
193 /* Read temporary operands from low part of PROD.
194 Put result in low part of TSPACE using upper part of TSPACE
195 as new TSPACE. */
196 MPN_MUL_N_RECURSE (tspace, prodp, prodp + hsize, hsize, tspace + size);
197
198 /*** Add/copy product H. */
199 MPN_COPY (prodp + hsize, prodp + size, hsize);
200 cy = __mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
201
202 /*** Add product M (if NEGFLG M is a negative number). */
203 if (negflg)
204 cy -= __mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
205 else
206 cy += __mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
207
208 /*** Product L. ________________ ________________
209 |________________||____U0 x V0_____| */
210 /* Read temporary operands from low part of PROD.
211 Put result in low part of TSPACE using upper part of TSPACE
212 as new TSPACE. */
213 MPN_MUL_N_RECURSE (tspace, up, vp, hsize, tspace + size);
214
215 /*** Add/copy Product L (twice). */
216
217 cy += __mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
218 if (cy)
219 {
220 if (cy > 0)
221 __mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
222 else
223 {
224 __mpn_sub_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
225 abort ();
226 }
227 }
228
229 MPN_COPY (prodp, tspace, hsize);
230 cy = __mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
231 if (cy)
232 __mpn_add_1 (prodp + size, prodp + size, size, 1);
233 }
234 }
235
236 void
237 #if __STDC__
238 ____mpn_sqr_n_basecase (mp_ptr prodp, mp_srcptr up, mp_size_t size)
239 #else
240 ____mpn_sqr_n_basecase (prodp, up, size)
241 mp_ptr prodp;
242 mp_srcptr up;
243 mp_size_t size;
244 #endif
245 {
246 mp_size_t i;
247 mp_limb cy_limb;
248 mp_limb v_limb;
249
250 /* Multiply by the first limb in V separately, as the result can be
251 stored (not added) to PROD. We also avoid a loop for zeroing. */
252 v_limb = up[0];
253 if (v_limb <= 1)
254 {
255 if (v_limb == 1)
256 MPN_COPY (prodp, up, size);
257 else
258 MPN_ZERO (prodp, size);
259 cy_limb = 0;
260 }
261 else
262 cy_limb = __mpn_mul_1 (prodp, up, size, v_limb);
263
264 prodp[size] = cy_limb;
265 prodp++;
266
267 /* For each iteration in the outer loop, multiply one limb from
268 U with one limb from V, and add it to PROD. */
269 for (i = 1; i < size; i++)
270 {
271 v_limb = up[i];
272 if (v_limb <= 1)
273 {
274 cy_limb = 0;
275 if (v_limb == 1)
276 cy_limb = __mpn_add_n (prodp, prodp, up, size);
277 }
278 else
279 cy_limb = __mpn_addmul_1 (prodp, up, size, v_limb);
280
281 prodp[size] = cy_limb;
282 prodp++;
283 }
284 }
285
286 void
287 #if __STDC__
288 ____mpn_sqr_n (mp_ptr prodp,
289 mp_srcptr up, mp_size_t size, mp_ptr tspace)
290 #else
291 ____mpn_sqr_n (prodp, up, size, tspace)
292 mp_ptr prodp;
293 mp_srcptr up;
294 mp_size_t size;
295 mp_ptr tspace;
296 #endif
297 {
298 if ((size & 1) != 0)
299 {
300 /* The size is odd, the code code below doesn't handle that.
301 Multiply the least significant (size - 1) limbs with a recursive
302 call, and handle the most significant limb of S1 and S2
303 separately. */
304 /* A slightly faster way to do this would be to make the Karatsuba
305 code below behave as if the size were even, and let it check for
306 odd size in the end. I.e., in essence move this code to the end.
307 Doing so would save us a recursive call, and potentially make the
308 stack grow a lot less. */
309
310 mp_size_t esize = size - 1; /* even size */
311 mp_limb cy_limb;
312
313 MPN_SQR_N_RECURSE (prodp, up, esize, tspace);
314 cy_limb = __mpn_addmul_1 (prodp + esize, up, esize, up[esize]);
315 prodp[esize + esize] = cy_limb;
316 cy_limb = __mpn_addmul_1 (prodp + esize, up, size, up[esize]);
317
318 prodp[esize + size] = cy_limb;
319 }
320 else
321 {
322 mp_size_t hsize = size >> 1;
323 mp_limb cy;
324
325 /*** Product H. ________________ ________________
326 |_____U1 x U1____||____U0 x U0_____| */
327 /* Put result in upper part of PROD and pass low part of TSPACE
328 as new TSPACE. */
329 MPN_SQR_N_RECURSE (prodp + size, up + hsize, hsize, tspace);
330
331 /*** Product M. ________________
332 |_(U1-U0)(U0-U1)_| */
333 if (__mpn_cmp (up + hsize, up, hsize) >= 0)
334 {
335 __mpn_sub_n (prodp, up + hsize, up, hsize);
336 }
337 else
338 {
339 __mpn_sub_n (prodp, up, up + hsize, hsize);
340 }
341
342 /* Read temporary operands from low part of PROD.
343 Put result in low part of TSPACE using upper part of TSPACE
344 as new TSPACE. */
345 MPN_SQR_N_RECURSE (tspace, prodp, hsize, tspace + size);
346
347 /*** Add/copy product H. */
348 MPN_COPY (prodp + hsize, prodp + size, hsize);
349 cy = __mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
350
351 /*** Add product M (if NEGFLG M is a negative number). */
352 cy -= __mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
353
354 /*** Product L. ________________ ________________
355 |________________||____U0 x U0_____| */
356 /* Read temporary operands from low part of PROD.
357 Put result in low part of TSPACE using upper part of TSPACE
358 as new TSPACE. */
359 MPN_SQR_N_RECURSE (tspace, up, hsize, tspace + size);
360
361 /*** Add/copy Product L (twice). */
362
363 cy += __mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
364 if (cy)
365 {
366 if (cy > 0)
367 __mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
368 else
369 {
370 __mpn_sub_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
371 abort ();
372 }
373 }
374
375 MPN_COPY (prodp, tspace, hsize);
376 cy = __mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
377 if (cy)
378 __mpn_add_1 (prodp + size, prodp + size, size, 1);
379 }
380 }
381
382 /* This should be made into an inline function in gmp.h. */
383 inline void
384 #if __STDC__
385 __mpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
386 #else
387 __mpn_mul_n (prodp, up, vp, size)
388 mp_ptr prodp;
389 mp_srcptr up;
390 mp_srcptr vp;
391 mp_size_t size;
392 #endif
393 {
394 if (up == vp)
395 {
396 if (size < KARATSUBA_THRESHOLD)
397 {
398 ____mpn_sqr_n_basecase (prodp, up, size);
399 }
400 else
401 {
402 mp_ptr tspace;
403 tspace = (mp_ptr) alloca (2 * size * BYTES_PER_MP_LIMB);
404 ____mpn_sqr_n (prodp, up, size, tspace);
405 }
406 }
407 else
408 {
409 if (size < KARATSUBA_THRESHOLD)
410 {
411 ____mpn_mul_n_basecase (prodp, up, vp, size);
412 }
413 else
414 {
415 mp_ptr tspace;
416 tspace = (mp_ptr) alloca (2 * size * BYTES_PER_MP_LIMB);
417 ____mpn_mul_n (prodp, up, vp, size, tspace);
418 }
419 }
420 }