]>
git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/ec/curve448/arch_64/f_impl.c
2 * Copyright 2017-2021 The OpenSSL Project Authors. All Rights Reserved.
3 * Copyright 2014 Cryptography Research, Inc.
5 * Licensed under the OpenSSL license (the "License"). You may not use
6 * this file except in compliance with the License. You can obtain a copy
7 * in the file LICENSE in the source distribution or at
8 * https://www.openssl.org/source/license.html
10 * Originally written by Mike Hamburg
15 void gf_mul(gf_s
* RESTRICT cs
, const gf as
, const gf bs
)
17 const uint64_t *a
= as
->limb
, *b
= bs
->limb
;
18 uint64_t *c
= cs
->limb
;
19 uint128_t accum0
= 0, accum1
= 0, accum2
;
20 uint64_t mask
= (1ULL << 56) - 1;
21 uint64_t aa
[4], bb
[4], bbb
[4];
24 for (i
= 0; i
< 4; i
++) {
25 aa
[i
] = a
[i
] + a
[i
+ 4];
26 bb
[i
] = b
[i
] + b
[i
+ 4];
27 bbb
[i
] = bb
[i
] + b
[i
+ 4];
30 for (i
= 0; i
< 4; i
++) {
33 for (j
= 0; j
<= i
; j
++) {
34 accum2
+= widemul(a
[j
], b
[i
- j
]);
35 accum1
+= widemul(aa
[j
], bb
[i
- j
]);
36 accum0
+= widemul(a
[j
+ 4], b
[i
- j
+ 4]);
39 accum2
+= widemul(a
[j
], b
[i
- j
+ 8]);
40 accum1
+= widemul(aa
[j
], bbb
[i
- j
+ 4]);
41 accum0
+= widemul(a
[j
+ 4], bb
[i
- j
+ 4]);
47 c
[i
] = ((uint64_t)(accum0
)) & mask
;
48 c
[i
+ 4] = ((uint64_t)(accum1
)) & mask
;
57 c
[4] = ((uint64_t)(accum0
)) & mask
;
58 c
[0] = ((uint64_t)(accum1
)) & mask
;
63 c
[5] += ((uint64_t)(accum0
));
64 c
[1] += ((uint64_t)(accum1
));
67 void gf_mulw_unsigned(gf_s
* RESTRICT cs
, const gf as
, uint32_t b
)
69 const uint64_t *a
= as
->limb
;
70 uint64_t *c
= cs
->limb
;
71 uint128_t accum0
= 0, accum4
= 0;
72 uint64_t mask
= (1ULL << 56) - 1;
75 for (i
= 0; i
< 4; i
++) {
76 accum0
+= widemul(b
, a
[i
]);
77 accum4
+= widemul(b
, a
[i
+ 4]);
80 c
[i
+ 4] = accum4
& mask
;
84 accum0
+= accum4
+ c
[4];
93 void gf_sqr(gf_s
* RESTRICT cs
, const gf as
)
95 const uint64_t *a
= as
->limb
;
96 uint64_t *c
= cs
->limb
;
97 uint128_t accum0
= 0, accum1
= 0, accum2
;
98 uint64_t mask
= (1ULL << 56) - 1;
102 /* For some reason clang doesn't vectorize this without prompting? */
103 for (i
= 0; i
< 4; i
++)
104 aa
[i
] = a
[i
] + a
[i
+ 4];
106 accum2
= widemul(a
[0], a
[3]);
107 accum0
= widemul(aa
[0], aa
[3]);
108 accum1
= widemul(a
[4], a
[7]);
110 accum2
+= widemul(a
[1], a
[2]);
111 accum0
+= widemul(aa
[1], aa
[2]);
112 accum1
+= widemul(a
[5], a
[6]);
117 c
[3] = ((uint64_t)(accum1
)) << 1 & mask
;
118 c
[7] = ((uint64_t)(accum0
)) << 1 & mask
;
123 accum0
+= widemul(2 * aa
[1], aa
[3]);
124 accum1
+= widemul(2 * a
[5], a
[7]);
125 accum0
+= widemul(aa
[2], aa
[2]);
128 accum0
-= widemul(2 * a
[1], a
[3]);
129 accum1
+= widemul(a
[6], a
[6]);
131 accum2
= widemul(a
[0], a
[0]);
135 accum0
-= widemul(a
[2], a
[2]);
136 accum1
+= widemul(aa
[0], aa
[0]);
137 accum0
+= widemul(a
[4], a
[4]);
139 c
[0] = ((uint64_t)(accum0
)) & mask
;
140 c
[4] = ((uint64_t)(accum1
)) & mask
;
145 accum2
= widemul(2 * aa
[2], aa
[3]);
146 accum0
-= widemul(2 * a
[2], a
[3]);
147 accum1
+= widemul(2 * a
[6], a
[7]);
152 accum2
= widemul(2 * a
[0], a
[1]);
153 accum1
+= widemul(2 * aa
[0], aa
[1]);
154 accum0
+= widemul(2 * a
[4], a
[5]);
159 c
[1] = ((uint64_t)(accum0
)) & mask
;
160 c
[5] = ((uint64_t)(accum1
)) & mask
;
165 accum2
= widemul(aa
[3], aa
[3]);
166 accum0
-= widemul(a
[3], a
[3]);
167 accum1
+= widemul(a
[7], a
[7]);
172 accum2
= widemul(2 * a
[0], a
[2]);
173 accum1
+= widemul(2 * aa
[0], aa
[2]);
174 accum0
+= widemul(2 * a
[4], a
[6]);
176 accum2
+= widemul(a
[1], a
[1]);
177 accum1
+= widemul(aa
[1], aa
[1]);
178 accum0
+= widemul(a
[5], a
[5]);
183 c
[2] = ((uint64_t)(accum0
)) & mask
;
184 c
[6] = ((uint64_t)(accum1
)) & mask
;
191 c
[3] = ((uint64_t)(accum0
)) & mask
;
192 c
[7] = ((uint64_t)(accum1
)) & mask
;
194 /* we could almost stop here, but it wouldn't be stable, so... */
198 c
[4] += ((uint64_t)(accum0
)) + ((uint64_t)(accum1
));
199 c
[0] += ((uint64_t)(accum1
));