]>
Commit | Line | Data |
---|---|---|
6b628d36 | 1 | /* mpn_mul -- Multiply two natural numbers. |
28f540f4 | 2 | |
b168057a | 3 | Copyright (C) 1991-2015 Free Software Foundation, Inc. |
28f540f4 RM |
4 | |
5 | This file is part of the GNU MP Library. | |
6 | ||
7 | The GNU MP Library is free software; you can redistribute it and/or modify | |
6d84f89a AJ |
8 | it under the terms of the GNU Lesser General Public License as published by |
9 | the Free Software Foundation; either version 2.1 of the License, or (at your | |
28f540f4 RM |
10 | option) any later version. |
11 | ||
12 | The GNU MP Library is distributed in the hope that it will be useful, but | |
13 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
6d84f89a | 14 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
28f540f4 RM |
15 | License for more details. |
16 | ||
6d84f89a | 17 | You should have received a copy of the GNU Lesser General Public License |
59ba27a6 PE |
18 | along with the GNU MP Library; see the file COPYING.LIB. If not, see |
19 | <http://www.gnu.org/licenses/>. */ | |
28f540f4 | 20 | |
9d13fb24 | 21 | #include <gmp.h> |
28f540f4 RM |
22 | #include "gmp-impl.h" |
23 | ||
24 | /* Multiply the natural numbers u (pointed to by UP, with USIZE limbs) | |
25 | and v (pointed to by VP, with VSIZE limbs), and store the result at | |
26 | PRODP. USIZE + VSIZE limbs are always stored, but if the input | |
27 | operands are normalized. Return the most significant limb of the | |
28 | result. | |
29 | ||
30 | NOTE: The space pointed to by PRODP is overwritten before finished | |
31 | with U and V, so overlap is an error. | |
32 | ||
33 | Argument constraints: | |
34 | 1. USIZE >= VSIZE. | |
35 | 2. PRODP != UP and PRODP != VP, i.e. the destination | |
36 | must be distinct from the multiplier and the multiplicand. */ | |
37 | ||
38 | /* If KARATSUBA_THRESHOLD is not already defined, define it to a | |
39 | value which is good on most machines. */ | |
40 | #ifndef KARATSUBA_THRESHOLD | |
41 | #define KARATSUBA_THRESHOLD 32 | |
42 | #endif | |
43 | ||
b928942e | 44 | mp_limb_t |
28f540f4 | 45 | #if __STDC__ |
6b628d36 RM |
46 | mpn_mul (mp_ptr prodp, |
47 | mp_srcptr up, mp_size_t usize, | |
48 | mp_srcptr vp, mp_size_t vsize) | |
28f540f4 | 49 | #else |
6b628d36 | 50 | mpn_mul (prodp, up, usize, vp, vsize) |
28f540f4 RM |
51 | mp_ptr prodp; |
52 | mp_srcptr up; | |
53 | mp_size_t usize; | |
54 | mp_srcptr vp; | |
55 | mp_size_t vsize; | |
56 | #endif | |
57 | { | |
58 | mp_ptr prod_endp = prodp + usize + vsize - 1; | |
b928942e | 59 | mp_limb_t cy; |
28f540f4 | 60 | mp_ptr tspace; |
6b628d36 | 61 | TMP_DECL (marker); |
28f540f4 RM |
62 | |
63 | if (vsize < KARATSUBA_THRESHOLD) | |
64 | { | |
65 | /* Handle simple cases with traditional multiplication. | |
66 | ||
67 | This is the most critical code of the entire function. All | |
68 | multiplies rely on this, both small and huge. Small ones arrive | |
69 | here immediately. Huge ones arrive here as this is the base case | |
70 | for Karatsuba's recursive algorithm below. */ | |
71 | mp_size_t i; | |
b928942e RM |
72 | mp_limb_t cy_limb; |
73 | mp_limb_t v_limb; | |
28f540f4 RM |
74 | |
75 | if (vsize == 0) | |
76 | return 0; | |
77 | ||
78 | /* Multiply by the first limb in V separately, as the result can be | |
79 | stored (not added) to PROD. We also avoid a loop for zeroing. */ | |
80 | v_limb = vp[0]; | |
81 | if (v_limb <= 1) | |
82 | { | |
83 | if (v_limb == 1) | |
84 | MPN_COPY (prodp, up, usize); | |
85 | else | |
86 | MPN_ZERO (prodp, usize); | |
87 | cy_limb = 0; | |
88 | } | |
89 | else | |
6b628d36 | 90 | cy_limb = mpn_mul_1 (prodp, up, usize, v_limb); |
28f540f4 RM |
91 | |
92 | prodp[usize] = cy_limb; | |
93 | prodp++; | |
94 | ||
95 | /* For each iteration in the outer loop, multiply one limb from | |
96 | U with one limb from V, and add it to PROD. */ | |
97 | for (i = 1; i < vsize; i++) | |
98 | { | |
99 | v_limb = vp[i]; | |
100 | if (v_limb <= 1) | |
101 | { | |
102 | cy_limb = 0; | |
103 | if (v_limb == 1) | |
6b628d36 | 104 | cy_limb = mpn_add_n (prodp, prodp, up, usize); |
28f540f4 RM |
105 | } |
106 | else | |
6b628d36 | 107 | cy_limb = mpn_addmul_1 (prodp, up, usize, v_limb); |
28f540f4 RM |
108 | |
109 | prodp[usize] = cy_limb; | |
110 | prodp++; | |
111 | } | |
112 | return cy_limb; | |
113 | } | |
114 | ||
6b628d36 RM |
115 | TMP_MARK (marker); |
116 | ||
117 | tspace = (mp_ptr) TMP_ALLOC (2 * vsize * BYTES_PER_MP_LIMB); | |
28f540f4 RM |
118 | MPN_MUL_N_RECURSE (prodp, up, vp, vsize, tspace); |
119 | ||
120 | prodp += vsize; | |
121 | up += vsize; | |
122 | usize -= vsize; | |
123 | if (usize >= vsize) | |
124 | { | |
6b628d36 | 125 | mp_ptr tp = (mp_ptr) TMP_ALLOC (2 * vsize * BYTES_PER_MP_LIMB); |
28f540f4 RM |
126 | do |
127 | { | |
128 | MPN_MUL_N_RECURSE (tp, up, vp, vsize, tspace); | |
6b628d36 RM |
129 | cy = mpn_add_n (prodp, prodp, tp, vsize); |
130 | mpn_add_1 (prodp + vsize, tp + vsize, vsize, cy); | |
28f540f4 RM |
131 | prodp += vsize; |
132 | up += vsize; | |
133 | usize -= vsize; | |
134 | } | |
135 | while (usize >= vsize); | |
136 | } | |
137 | ||
138 | /* True: usize < vsize. */ | |
139 | ||
140 | /* Make life simple: Recurse. */ | |
141 | ||
142 | if (usize != 0) | |
143 | { | |
6b628d36 RM |
144 | mpn_mul (tspace, vp, vsize, up, usize); |
145 | cy = mpn_add_n (prodp, prodp, tspace, vsize); | |
146 | mpn_add_1 (prodp + vsize, tspace + vsize, usize, cy); | |
28f540f4 RM |
147 | } |
148 | ||
6b628d36 | 149 | TMP_FREE (marker); |
28f540f4 RM |
150 | return *prod_endp; |
151 | } |