]>
Commit | Line | Data |
---|---|---|
6b628d36 | 1 | /* mpn_mul -- Multiply two natural numbers. |
28f540f4 | 2 | |
6d7e8eda | 3 | Copyright (C) 1991-2023 Free Software Foundation, Inc. |
28f540f4 RM |
4 | |
5 | This file is part of the GNU MP Library. | |
6 | ||
7 | The GNU MP Library is free software; you can redistribute it and/or modify | |
6d84f89a AJ |
8 | it under the terms of the GNU Lesser General Public License as published by |
9 | the Free Software Foundation; either version 2.1 of the License, or (at your | |
28f540f4 RM |
10 | option) any later version. |
11 | ||
12 | The GNU MP Library is distributed in the hope that it will be useful, but | |
13 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
6d84f89a | 14 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
28f540f4 RM |
15 | License for more details. |
16 | ||
6d84f89a | 17 | You should have received a copy of the GNU Lesser General Public License |
59ba27a6 | 18 | along with the GNU MP Library; see the file COPYING.LIB. If not, see |
5a82c748 | 19 | <https://www.gnu.org/licenses/>. */ |
28f540f4 | 20 | |
9d13fb24 | 21 | #include <gmp.h> |
28f540f4 RM |
22 | #include "gmp-impl.h" |
23 | ||
24 | /* Multiply the natural numbers u (pointed to by UP, with USIZE limbs) | |
25 | and v (pointed to by VP, with VSIZE limbs), and store the result at | |
26 | PRODP. USIZE + VSIZE limbs are always stored, but if the input | |
27 | operands are normalized. Return the most significant limb of the | |
28 | result. | |
29 | ||
30 | NOTE: The space pointed to by PRODP is overwritten before finished | |
31 | with U and V, so overlap is an error. | |
32 | ||
33 | Argument constraints: | |
34 | 1. USIZE >= VSIZE. | |
35 | 2. PRODP != UP and PRODP != VP, i.e. the destination | |
36 | must be distinct from the multiplier and the multiplicand. */ | |
37 | ||
38 | /* If KARATSUBA_THRESHOLD is not already defined, define it to a | |
39 | value which is good on most machines. */ | |
40 | #ifndef KARATSUBA_THRESHOLD | |
41 | #define KARATSUBA_THRESHOLD 32 | |
42 | #endif | |
43 | ||
b928942e | 44 | mp_limb_t |
6b628d36 RM |
45 | mpn_mul (mp_ptr prodp, |
46 | mp_srcptr up, mp_size_t usize, | |
47 | mp_srcptr vp, mp_size_t vsize) | |
28f540f4 RM |
48 | { |
49 | mp_ptr prod_endp = prodp + usize + vsize - 1; | |
b928942e | 50 | mp_limb_t cy; |
28f540f4 | 51 | mp_ptr tspace; |
6b628d36 | 52 | TMP_DECL (marker); |
28f540f4 RM |
53 | |
54 | if (vsize < KARATSUBA_THRESHOLD) | |
55 | { | |
56 | /* Handle simple cases with traditional multiplication. | |
57 | ||
58 | This is the most critical code of the entire function. All | |
59 | multiplies rely on this, both small and huge. Small ones arrive | |
60 | here immediately. Huge ones arrive here as this is the base case | |
61 | for Karatsuba's recursive algorithm below. */ | |
62 | mp_size_t i; | |
b928942e RM |
63 | mp_limb_t cy_limb; |
64 | mp_limb_t v_limb; | |
28f540f4 RM |
65 | |
66 | if (vsize == 0) | |
67 | return 0; | |
68 | ||
69 | /* Multiply by the first limb in V separately, as the result can be | |
70 | stored (not added) to PROD. We also avoid a loop for zeroing. */ | |
71 | v_limb = vp[0]; | |
72 | if (v_limb <= 1) | |
73 | { | |
74 | if (v_limb == 1) | |
75 | MPN_COPY (prodp, up, usize); | |
76 | else | |
77 | MPN_ZERO (prodp, usize); | |
78 | cy_limb = 0; | |
79 | } | |
80 | else | |
6b628d36 | 81 | cy_limb = mpn_mul_1 (prodp, up, usize, v_limb); |
28f540f4 RM |
82 | |
83 | prodp[usize] = cy_limb; | |
84 | prodp++; | |
85 | ||
86 | /* For each iteration in the outer loop, multiply one limb from | |
87 | U with one limb from V, and add it to PROD. */ | |
88 | for (i = 1; i < vsize; i++) | |
89 | { | |
90 | v_limb = vp[i]; | |
91 | if (v_limb <= 1) | |
92 | { | |
93 | cy_limb = 0; | |
94 | if (v_limb == 1) | |
6b628d36 | 95 | cy_limb = mpn_add_n (prodp, prodp, up, usize); |
28f540f4 RM |
96 | } |
97 | else | |
6b628d36 | 98 | cy_limb = mpn_addmul_1 (prodp, up, usize, v_limb); |
28f540f4 RM |
99 | |
100 | prodp[usize] = cy_limb; | |
101 | prodp++; | |
102 | } | |
103 | return cy_limb; | |
104 | } | |
105 | ||
6b628d36 RM |
106 | TMP_MARK (marker); |
107 | ||
108 | tspace = (mp_ptr) TMP_ALLOC (2 * vsize * BYTES_PER_MP_LIMB); | |
28f540f4 RM |
109 | MPN_MUL_N_RECURSE (prodp, up, vp, vsize, tspace); |
110 | ||
111 | prodp += vsize; | |
112 | up += vsize; | |
113 | usize -= vsize; | |
114 | if (usize >= vsize) | |
115 | { | |
6b628d36 | 116 | mp_ptr tp = (mp_ptr) TMP_ALLOC (2 * vsize * BYTES_PER_MP_LIMB); |
28f540f4 RM |
117 | do |
118 | { | |
119 | MPN_MUL_N_RECURSE (tp, up, vp, vsize, tspace); | |
6b628d36 RM |
120 | cy = mpn_add_n (prodp, prodp, tp, vsize); |
121 | mpn_add_1 (prodp + vsize, tp + vsize, vsize, cy); | |
28f540f4 RM |
122 | prodp += vsize; |
123 | up += vsize; | |
124 | usize -= vsize; | |
125 | } | |
126 | while (usize >= vsize); | |
127 | } | |
128 | ||
129 | /* True: usize < vsize. */ | |
130 | ||
131 | /* Make life simple: Recurse. */ | |
132 | ||
133 | if (usize != 0) | |
134 | { | |
6b628d36 RM |
135 | mpn_mul (tspace, vp, vsize, up, usize); |
136 | cy = mpn_add_n (prodp, prodp, tspace, vsize); | |
137 | mpn_add_1 (prodp + vsize, tspace + vsize, usize, cy); | |
28f540f4 RM |
138 | } |
139 | ||
6b628d36 | 140 | TMP_FREE (marker); |
28f540f4 RM |
141 | return *prod_endp; |
142 | } |