]>
Commit | Line | Data |
---|---|---|
1d92226b JJ |
1 | /* mpn_mul_n -- Multiply two natural numbers of length n. |
2 | ||
1a41c323 | 3 | Copyright (C) 1991-2013 Free Software Foundation, Inc. |
1d92226b JJ |
4 | |
5 | This file is part of the GNU MP Library. | |
6 | ||
7 | The GNU MP Library is free software; you can redistribute it and/or modify | |
8 | it under the terms of the GNU Lesser General Public License as published by | |
9 | the Free Software Foundation; either version 2.1 of the License, or (at your | |
10 | option) any later version. | |
11 | ||
12 | The GNU MP Library is distributed in the hope that it will be useful, but | |
13 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
14 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public | |
15 | License for more details. | |
16 | ||
17 | You should have received a copy of the GNU Lesser General Public License | |
18 | along with the GNU MP Library; see the file COPYING.LIB. If not, write to | |
19 | the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, | |
20 | MA 02111-1307, USA. */ | |
21 | ||
a855debf | 22 | #include <config.h> |
1d92226b JJ |
23 | #include "gmp-impl.h" |
24 | ||
25 | /* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP), | |
26 | both with SIZE limbs, and store the result at PRODP. 2 * SIZE limbs are | |
27 | always stored. Return the most significant limb. | |
28 | ||
29 | Argument constraints: | |
30 | 1. PRODP != UP and PRODP != VP, i.e. the destination | |
31 | must be distinct from the multiplier and the multiplicand. */ | |
32 | ||
33 | /* If KARATSUBA_THRESHOLD is not already defined, define it to a | |
34 | value which is good on most machines. */ | |
35 | #ifndef KARATSUBA_THRESHOLD | |
36 | #define KARATSUBA_THRESHOLD 32 | |
37 | #endif | |
38 | ||
39 | /* The code can't handle KARATSUBA_THRESHOLD smaller than 2. */ | |
40 | #if KARATSUBA_THRESHOLD < 2 | |
41 | #undef KARATSUBA_THRESHOLD | |
42 | #define KARATSUBA_THRESHOLD 2 | |
43 | #endif | |
44 | ||
45 | /* Handle simple cases with traditional multiplication. | |
46 | ||
47 | This is the most critical code of multiplication. All multiplies rely | |
48 | on this, both small and huge. Small ones arrive here immediately. Huge | |
49 | ones arrive here as this is the base case for Karatsuba's recursive | |
50 | algorithm below. */ | |
51 | ||
52 | void | |
53 | #if __STDC__ | |
54 | impn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size) | |
55 | #else | |
56 | impn_mul_n_basecase (prodp, up, vp, size) | |
57 | mp_ptr prodp; | |
58 | mp_srcptr up; | |
59 | mp_srcptr vp; | |
60 | mp_size_t size; | |
61 | #endif | |
62 | { | |
63 | mp_size_t i; | |
64 | mp_limb_t cy_limb; | |
65 | mp_limb_t v_limb; | |
66 | ||
67 | /* Multiply by the first limb in V separately, as the result can be | |
68 | stored (not added) to PROD. We also avoid a loop for zeroing. */ | |
69 | v_limb = vp[0]; | |
70 | if (v_limb <= 1) | |
71 | { | |
72 | if (v_limb == 1) | |
73 | MPN_COPY (prodp, up, size); | |
74 | else | |
75 | MPN_ZERO (prodp, size); | |
76 | cy_limb = 0; | |
77 | } | |
78 | else | |
79 | cy_limb = mpn_mul_1 (prodp, up, size, v_limb); | |
80 | ||
81 | prodp[size] = cy_limb; | |
82 | prodp++; | |
83 | ||
84 | /* For each iteration in the outer loop, multiply one limb from | |
85 | U with one limb from V, and add it to PROD. */ | |
86 | for (i = 1; i < size; i++) | |
87 | { | |
88 | v_limb = vp[i]; | |
89 | if (v_limb <= 1) | |
90 | { | |
91 | cy_limb = 0; | |
92 | if (v_limb == 1) | |
93 | cy_limb = mpn_add_n (prodp, prodp, up, size); | |
94 | } | |
95 | else | |
96 | cy_limb = mpn_addmul_1 (prodp, up, size, v_limb); | |
97 | ||
98 | prodp[size] = cy_limb; | |
99 | prodp++; | |
100 | } | |
101 | } | |
102 | ||
103 | void | |
104 | #if __STDC__ | |
105 | impn_mul_n (mp_ptr prodp, | |
106 | mp_srcptr up, mp_srcptr vp, mp_size_t size, mp_ptr tspace) | |
107 | #else | |
108 | impn_mul_n (prodp, up, vp, size, tspace) | |
109 | mp_ptr prodp; | |
110 | mp_srcptr up; | |
111 | mp_srcptr vp; | |
112 | mp_size_t size; | |
113 | mp_ptr tspace; | |
114 | #endif | |
115 | { | |
116 | if ((size & 1) != 0) | |
117 | { | |
118 | /* The size is odd, the code code below doesn't handle that. | |
119 | Multiply the least significant (size - 1) limbs with a recursive | |
120 | call, and handle the most significant limb of S1 and S2 | |
121 | separately. */ | |
122 | /* A slightly faster way to do this would be to make the Karatsuba | |
123 | code below behave as if the size were even, and let it check for | |
124 | odd size in the end. I.e., in essence move this code to the end. | |
125 | Doing so would save us a recursive call, and potentially make the | |
126 | stack grow a lot less. */ | |
127 | ||
128 | mp_size_t esize = size - 1; /* even size */ | |
129 | mp_limb_t cy_limb; | |
130 | ||
131 | MPN_MUL_N_RECURSE (prodp, up, vp, esize, tspace); | |
132 | cy_limb = mpn_addmul_1 (prodp + esize, up, esize, vp[esize]); | |
133 | prodp[esize + esize] = cy_limb; | |
134 | cy_limb = mpn_addmul_1 (prodp + esize, vp, size, up[esize]); | |
135 | ||
136 | prodp[esize + size] = cy_limb; | |
137 | } | |
138 | else | |
139 | { | |
140 | /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm. | |
141 | ||
142 | Split U in two pieces, U1 and U0, such that | |
143 | U = U0 + U1*(B**n), | |
144 | and V in V1 and V0, such that | |
145 | V = V0 + V1*(B**n). | |
146 | ||
147 | UV is then computed recursively using the identity | |
148 | ||
149 | 2n n n n | |
150 | UV = (B + B )U V + B (U -U )(V -V ) + (B + 1)U V | |
151 | 1 1 1 0 0 1 0 0 | |
152 | ||
153 | Where B = 2**BITS_PER_MP_LIMB. */ | |
154 | ||
155 | mp_size_t hsize = size >> 1; | |
156 | mp_limb_t cy; | |
157 | int negflg; | |
158 | ||
159 | /*** Product H. ________________ ________________ | |
160 | |_____U1 x V1____||____U0 x V0_____| */ | |
161 | /* Put result in upper part of PROD and pass low part of TSPACE | |
162 | as new TSPACE. */ | |
163 | MPN_MUL_N_RECURSE (prodp + size, up + hsize, vp + hsize, hsize, tspace); | |
164 | ||
165 | /*** Product M. ________________ | |
166 | |_(U1-U0)(V0-V1)_| */ | |
167 | if (mpn_cmp (up + hsize, up, hsize) >= 0) | |
168 | { | |
169 | mpn_sub_n (prodp, up + hsize, up, hsize); | |
170 | negflg = 0; | |
171 | } | |
172 | else | |
173 | { | |
174 | mpn_sub_n (prodp, up, up + hsize, hsize); | |
175 | negflg = 1; | |
176 | } | |
177 | if (mpn_cmp (vp + hsize, vp, hsize) >= 0) | |
178 | { | |
179 | mpn_sub_n (prodp + hsize, vp + hsize, vp, hsize); | |
180 | negflg ^= 1; | |
181 | } | |
182 | else | |
183 | { | |
184 | mpn_sub_n (prodp + hsize, vp, vp + hsize, hsize); | |
185 | /* No change of NEGFLG. */ | |
186 | } | |
187 | /* Read temporary operands from low part of PROD. | |
188 | Put result in low part of TSPACE using upper part of TSPACE | |
189 | as new TSPACE. */ | |
190 | MPN_MUL_N_RECURSE (tspace, prodp, prodp + hsize, hsize, tspace + size); | |
191 | ||
192 | /*** Add/copy product H. */ | |
193 | MPN_COPY (prodp + hsize, prodp + size, hsize); | |
194 | cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize); | |
195 | ||
196 | /*** Add product M (if NEGFLG M is a negative number). */ | |
197 | if (negflg) | |
198 | cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size); | |
199 | else | |
200 | cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size); | |
201 | ||
202 | /*** Product L. ________________ ________________ | |
203 | |________________||____U0 x V0_____| */ | |
204 | /* Read temporary operands from low part of PROD. | |
205 | Put result in low part of TSPACE using upper part of TSPACE | |
206 | as new TSPACE. */ | |
207 | MPN_MUL_N_RECURSE (tspace, up, vp, hsize, tspace + size); | |
208 | ||
209 | /*** Add/copy Product L (twice). */ | |
210 | ||
211 | cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size); | |
212 | if (cy) | |
213 | mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy); | |
214 | ||
215 | MPN_COPY (prodp, tspace, hsize); | |
216 | cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize); | |
217 | if (cy) | |
218 | mpn_add_1 (prodp + size, prodp + size, size, 1); | |
219 | } | |
220 | } |