]>
Commit | Line | Data |
---|---|---|
a6b803b3 AB |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * OpenSSL/Cryptogams accelerated Poly1305 transform for ARM | |
4 | * | |
5 | * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org> | |
6 | */ | |
7 | ||
8 | #include <asm/hwcap.h> | |
9 | #include <asm/neon.h> | |
10 | #include <asm/simd.h> | |
11 | #include <asm/unaligned.h> | |
12 | #include <crypto/algapi.h> | |
13 | #include <crypto/internal/hash.h> | |
14 | #include <crypto/internal/poly1305.h> | |
15 | #include <crypto/internal/simd.h> | |
16 | #include <linux/cpufeature.h> | |
17 | #include <linux/crypto.h> | |
18 | #include <linux/jump_label.h> | |
19 | #include <linux/module.h> | |
20 | ||
21 | void poly1305_init_arm(void *state, const u8 *key); | |
22 | void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit); | |
31899908 | 23 | void poly1305_emit_arm(void *state, u8 *digest, const u32 *nonce); |
a6b803b3 AB |
24 | |
25 | void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit) | |
26 | { | |
27 | } | |
28 | ||
29 | static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); | |
30 | ||
31 | void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key) | |
32 | { | |
33 | poly1305_init_arm(&dctx->h, key); | |
34 | dctx->s[0] = get_unaligned_le32(key + 16); | |
35 | dctx->s[1] = get_unaligned_le32(key + 20); | |
36 | dctx->s[2] = get_unaligned_le32(key + 24); | |
37 | dctx->s[3] = get_unaligned_le32(key + 28); | |
38 | dctx->buflen = 0; | |
39 | } | |
40 | EXPORT_SYMBOL(poly1305_init_arch); | |
41 | ||
42 | static int arm_poly1305_init(struct shash_desc *desc) | |
43 | { | |
44 | struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); | |
45 | ||
46 | dctx->buflen = 0; | |
47 | dctx->rset = 0; | |
48 | dctx->sset = false; | |
49 | ||
50 | return 0; | |
51 | } | |
52 | ||
53 | static void arm_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, | |
54 | u32 len, u32 hibit, bool do_neon) | |
55 | { | |
56 | if (unlikely(!dctx->sset)) { | |
57 | if (!dctx->rset) { | |
58 | poly1305_init_arm(&dctx->h, src); | |
59 | src += POLY1305_BLOCK_SIZE; | |
60 | len -= POLY1305_BLOCK_SIZE; | |
61 | dctx->rset = 1; | |
62 | } | |
63 | if (len >= POLY1305_BLOCK_SIZE) { | |
64 | dctx->s[0] = get_unaligned_le32(src + 0); | |
65 | dctx->s[1] = get_unaligned_le32(src + 4); | |
66 | dctx->s[2] = get_unaligned_le32(src + 8); | |
67 | dctx->s[3] = get_unaligned_le32(src + 12); | |
68 | src += POLY1305_BLOCK_SIZE; | |
69 | len -= POLY1305_BLOCK_SIZE; | |
70 | dctx->sset = true; | |
71 | } | |
72 | if (len < POLY1305_BLOCK_SIZE) | |
73 | return; | |
74 | } | |
75 | ||
76 | len &= ~(POLY1305_BLOCK_SIZE - 1); | |
77 | ||
78 | if (static_branch_likely(&have_neon) && likely(do_neon)) | |
79 | poly1305_blocks_neon(&dctx->h, src, len, hibit); | |
80 | else | |
81 | poly1305_blocks_arm(&dctx->h, src, len, hibit); | |
82 | } | |
83 | ||
84 | static void arm_poly1305_do_update(struct poly1305_desc_ctx *dctx, | |
85 | const u8 *src, u32 len, bool do_neon) | |
86 | { | |
87 | if (unlikely(dctx->buflen)) { | |
88 | u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen); | |
89 | ||
90 | memcpy(dctx->buf + dctx->buflen, src, bytes); | |
91 | src += bytes; | |
92 | len -= bytes; | |
93 | dctx->buflen += bytes; | |
94 | ||
95 | if (dctx->buflen == POLY1305_BLOCK_SIZE) { | |
96 | arm_poly1305_blocks(dctx, dctx->buf, | |
97 | POLY1305_BLOCK_SIZE, 1, false); | |
98 | dctx->buflen = 0; | |
99 | } | |
100 | } | |
101 | ||
102 | if (likely(len >= POLY1305_BLOCK_SIZE)) { | |
103 | arm_poly1305_blocks(dctx, src, len, 1, do_neon); | |
104 | src += round_down(len, POLY1305_BLOCK_SIZE); | |
105 | len %= POLY1305_BLOCK_SIZE; | |
106 | } | |
107 | ||
108 | if (unlikely(len)) { | |
109 | dctx->buflen = len; | |
110 | memcpy(dctx->buf, src, len); | |
111 | } | |
112 | } | |
113 | ||
114 | static int arm_poly1305_update(struct shash_desc *desc, | |
115 | const u8 *src, unsigned int srclen) | |
116 | { | |
117 | struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); | |
118 | ||
119 | arm_poly1305_do_update(dctx, src, srclen, false); | |
120 | return 0; | |
121 | } | |
122 | ||
123 | static int __maybe_unused arm_poly1305_update_neon(struct shash_desc *desc, | |
124 | const u8 *src, | |
125 | unsigned int srclen) | |
126 | { | |
127 | struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); | |
128 | bool do_neon = crypto_simd_usable() && srclen > 128; | |
129 | ||
130 | if (static_branch_likely(&have_neon) && do_neon) | |
131 | kernel_neon_begin(); | |
132 | arm_poly1305_do_update(dctx, src, srclen, do_neon); | |
133 | if (static_branch_likely(&have_neon) && do_neon) | |
134 | kernel_neon_end(); | |
135 | return 0; | |
136 | } | |
137 | ||
138 | void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, | |
139 | unsigned int nbytes) | |
140 | { | |
141 | bool do_neon = IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && | |
142 | crypto_simd_usable(); | |
143 | ||
144 | if (unlikely(dctx->buflen)) { | |
145 | u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen); | |
146 | ||
147 | memcpy(dctx->buf + dctx->buflen, src, bytes); | |
148 | src += bytes; | |
149 | nbytes -= bytes; | |
150 | dctx->buflen += bytes; | |
151 | ||
152 | if (dctx->buflen == POLY1305_BLOCK_SIZE) { | |
153 | poly1305_blocks_arm(&dctx->h, dctx->buf, | |
154 | POLY1305_BLOCK_SIZE, 1); | |
155 | dctx->buflen = 0; | |
156 | } | |
157 | } | |
158 | ||
159 | if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { | |
160 | unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); | |
161 | ||
162 | if (static_branch_likely(&have_neon) && do_neon) { | |
706024a5 JD |
163 | do { |
164 | unsigned int todo = min_t(unsigned int, len, SZ_4K); | |
165 | ||
166 | kernel_neon_begin(); | |
167 | poly1305_blocks_neon(&dctx->h, src, todo, 1); | |
168 | kernel_neon_end(); | |
169 | ||
170 | len -= todo; | |
171 | src += todo; | |
172 | } while (len); | |
a6b803b3 AB |
173 | } else { |
174 | poly1305_blocks_arm(&dctx->h, src, len, 1); | |
706024a5 | 175 | src += len; |
a6b803b3 | 176 | } |
a6b803b3 AB |
177 | nbytes %= POLY1305_BLOCK_SIZE; |
178 | } | |
179 | ||
180 | if (unlikely(nbytes)) { | |
181 | dctx->buflen = nbytes; | |
182 | memcpy(dctx->buf, src, nbytes); | |
183 | } | |
184 | } | |
185 | EXPORT_SYMBOL(poly1305_update_arch); | |
186 | ||
187 | void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) | |
188 | { | |
a6b803b3 AB |
189 | if (unlikely(dctx->buflen)) { |
190 | dctx->buf[dctx->buflen++] = 1; | |
191 | memset(dctx->buf + dctx->buflen, 0, | |
192 | POLY1305_BLOCK_SIZE - dctx->buflen); | |
193 | poly1305_blocks_arm(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); | |
194 | } | |
195 | ||
31899908 | 196 | poly1305_emit_arm(&dctx->h, dst, dctx->s); |
a6b803b3 AB |
197 | *dctx = (struct poly1305_desc_ctx){}; |
198 | } | |
199 | EXPORT_SYMBOL(poly1305_final_arch); | |
200 | ||
201 | static int arm_poly1305_final(struct shash_desc *desc, u8 *dst) | |
202 | { | |
203 | struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); | |
204 | ||
205 | if (unlikely(!dctx->sset)) | |
206 | return -ENOKEY; | |
207 | ||
208 | poly1305_final_arch(dctx, dst); | |
209 | return 0; | |
210 | } | |
211 | ||
212 | static struct shash_alg arm_poly1305_algs[] = {{ | |
213 | .init = arm_poly1305_init, | |
214 | .update = arm_poly1305_update, | |
215 | .final = arm_poly1305_final, | |
216 | .digestsize = POLY1305_DIGEST_SIZE, | |
217 | .descsize = sizeof(struct poly1305_desc_ctx), | |
218 | ||
219 | .base.cra_name = "poly1305", | |
220 | .base.cra_driver_name = "poly1305-arm", | |
221 | .base.cra_priority = 150, | |
222 | .base.cra_blocksize = POLY1305_BLOCK_SIZE, | |
223 | .base.cra_module = THIS_MODULE, | |
224 | #ifdef CONFIG_KERNEL_MODE_NEON | |
225 | }, { | |
226 | .init = arm_poly1305_init, | |
227 | .update = arm_poly1305_update_neon, | |
228 | .final = arm_poly1305_final, | |
229 | .digestsize = POLY1305_DIGEST_SIZE, | |
230 | .descsize = sizeof(struct poly1305_desc_ctx), | |
231 | ||
232 | .base.cra_name = "poly1305", | |
233 | .base.cra_driver_name = "poly1305-neon", | |
234 | .base.cra_priority = 200, | |
235 | .base.cra_blocksize = POLY1305_BLOCK_SIZE, | |
236 | .base.cra_module = THIS_MODULE, | |
237 | #endif | |
238 | }}; | |
239 | ||
240 | static int __init arm_poly1305_mod_init(void) | |
241 | { | |
242 | if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && | |
243 | (elf_hwcap & HWCAP_NEON)) | |
244 | static_branch_enable(&have_neon); | |
8394bfec | 245 | else if (IS_REACHABLE(CONFIG_CRYPTO_HASH)) |
a6b803b3 AB |
246 | /* register only the first entry */ |
247 | return crypto_register_shash(&arm_poly1305_algs[0]); | |
248 | ||
8394bfec JD |
249 | return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? |
250 | crypto_register_shashes(arm_poly1305_algs, | |
251 | ARRAY_SIZE(arm_poly1305_algs)) : 0; | |
a6b803b3 AB |
252 | } |
253 | ||
254 | static void __exit arm_poly1305_mod_exit(void) | |
255 | { | |
8394bfec JD |
256 | if (!IS_REACHABLE(CONFIG_CRYPTO_HASH)) |
257 | return; | |
a6b803b3 AB |
258 | if (!static_branch_likely(&have_neon)) { |
259 | crypto_unregister_shash(&arm_poly1305_algs[0]); | |
260 | return; | |
261 | } | |
262 | crypto_unregister_shashes(arm_poly1305_algs, | |
263 | ARRAY_SIZE(arm_poly1305_algs)); | |
264 | } | |
265 | ||
266 | module_init(arm_poly1305_mod_init); | |
267 | module_exit(arm_poly1305_mod_exit); | |
268 | ||
269 | MODULE_LICENSE("GPL v2"); | |
270 | MODULE_ALIAS_CRYPTO("poly1305"); | |
271 | MODULE_ALIAS_CRYPTO("poly1305-arm"); | |
272 | MODULE_ALIAS_CRYPTO("poly1305-neon"); |