]>
Commit | Line | Data |
---|---|---|
b36d8c09 AB |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * ARM NEON accelerated ChaCha and XChaCha stream ciphers, | |
4 | * including ChaCha20 (RFC7539) | |
5 | * | |
6 | * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org> | |
7 | * Copyright (C) 2015 Martin Willi | |
8 | */ | |
9 | ||
10 | #include <crypto/algapi.h> | |
11 | #include <crypto/internal/chacha.h> | |
12 | #include <crypto/internal/simd.h> | |
13 | #include <crypto/internal/skcipher.h> | |
a44a3430 | 14 | #include <linux/jump_label.h> |
b36d8c09 AB |
15 | #include <linux/kernel.h> |
16 | #include <linux/module.h> | |
17 | ||
18 | #include <asm/cputype.h> | |
19 | #include <asm/hwcap.h> | |
20 | #include <asm/neon.h> | |
21 | #include <asm/simd.h> | |
22 | ||
23 | asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src, | |
24 | int nrounds); | |
25 | asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src, | |
26 | int nrounds); | |
27 | asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds); | |
28 | asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds); | |
29 | ||
30 | asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes, | |
31 | const u32 *state, int nrounds); | |
32 | ||
a44a3430 AB |
33 | static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon); |
34 | ||
b36d8c09 AB |
35 | static inline bool neon_usable(void) |
36 | { | |
a44a3430 | 37 | return static_branch_likely(&use_neon) && crypto_simd_usable(); |
b36d8c09 AB |
38 | } |
39 | ||
40 | static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, | |
41 | unsigned int bytes, int nrounds) | |
42 | { | |
43 | u8 buf[CHACHA_BLOCK_SIZE]; | |
44 | ||
45 | while (bytes >= CHACHA_BLOCK_SIZE * 4) { | |
46 | chacha_4block_xor_neon(state, dst, src, nrounds); | |
47 | bytes -= CHACHA_BLOCK_SIZE * 4; | |
48 | src += CHACHA_BLOCK_SIZE * 4; | |
49 | dst += CHACHA_BLOCK_SIZE * 4; | |
50 | state[12] += 4; | |
51 | } | |
52 | while (bytes >= CHACHA_BLOCK_SIZE) { | |
53 | chacha_block_xor_neon(state, dst, src, nrounds); | |
54 | bytes -= CHACHA_BLOCK_SIZE; | |
55 | src += CHACHA_BLOCK_SIZE; | |
56 | dst += CHACHA_BLOCK_SIZE; | |
57 | state[12]++; | |
58 | } | |
59 | if (bytes) { | |
60 | memcpy(buf, src, bytes); | |
61 | chacha_block_xor_neon(state, buf, buf, nrounds); | |
62 | memcpy(dst, buf, bytes); | |
63 | } | |
64 | } | |
65 | ||
a44a3430 AB |
66 | void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds) |
67 | { | |
68 | if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) { | |
69 | hchacha_block_arm(state, stream, nrounds); | |
70 | } else { | |
71 | kernel_neon_begin(); | |
72 | hchacha_block_neon(state, stream, nrounds); | |
73 | kernel_neon_end(); | |
74 | } | |
75 | } | |
76 | EXPORT_SYMBOL(hchacha_block_arch); | |
77 | ||
78 | void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv) | |
79 | { | |
80 | chacha_init_generic(state, key, iv); | |
81 | } | |
82 | EXPORT_SYMBOL(chacha_init_arch); | |
83 | ||
84 | void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, | |
85 | int nrounds) | |
86 | { | |
87 | if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() || | |
88 | bytes <= CHACHA_BLOCK_SIZE) { | |
89 | chacha_doarm(dst, src, bytes, state, nrounds); | |
90 | state[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE); | |
91 | return; | |
92 | } | |
93 | ||
706024a5 JD |
94 | do { |
95 | unsigned int todo = min_t(unsigned int, bytes, SZ_4K); | |
96 | ||
97 | kernel_neon_begin(); | |
98 | chacha_doneon(state, dst, src, todo, nrounds); | |
99 | kernel_neon_end(); | |
100 | ||
101 | bytes -= todo; | |
102 | src += todo; | |
103 | dst += todo; | |
104 | } while (bytes); | |
a44a3430 AB |
105 | } |
106 | EXPORT_SYMBOL(chacha_crypt_arch); | |
107 | ||
b36d8c09 AB |
108 | static int chacha_stream_xor(struct skcipher_request *req, |
109 | const struct chacha_ctx *ctx, const u8 *iv, | |
110 | bool neon) | |
111 | { | |
112 | struct skcipher_walk walk; | |
113 | u32 state[16]; | |
114 | int err; | |
115 | ||
116 | err = skcipher_walk_virt(&walk, req, false); | |
117 | ||
118 | chacha_init_generic(state, ctx->key, iv); | |
119 | ||
120 | while (walk.nbytes > 0) { | |
121 | unsigned int nbytes = walk.nbytes; | |
122 | ||
123 | if (nbytes < walk.total) | |
124 | nbytes = round_down(nbytes, walk.stride); | |
125 | ||
0bc81767 | 126 | if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) { |
b36d8c09 AB |
127 | chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr, |
128 | nbytes, state, ctx->nrounds); | |
129 | state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE); | |
130 | } else { | |
131 | kernel_neon_begin(); | |
132 | chacha_doneon(state, walk.dst.virt.addr, | |
133 | walk.src.virt.addr, nbytes, ctx->nrounds); | |
134 | kernel_neon_end(); | |
135 | } | |
136 | err = skcipher_walk_done(&walk, walk.nbytes - nbytes); | |
137 | } | |
138 | ||
139 | return err; | |
140 | } | |
141 | ||
142 | static int do_chacha(struct skcipher_request *req, bool neon) | |
143 | { | |
144 | struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); | |
145 | struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); | |
146 | ||
147 | return chacha_stream_xor(req, ctx, req->iv, neon); | |
148 | } | |
149 | ||
150 | static int chacha_arm(struct skcipher_request *req) | |
151 | { | |
152 | return do_chacha(req, false); | |
153 | } | |
154 | ||
155 | static int chacha_neon(struct skcipher_request *req) | |
156 | { | |
157 | return do_chacha(req, neon_usable()); | |
158 | } | |
159 | ||
160 | static int do_xchacha(struct skcipher_request *req, bool neon) | |
161 | { | |
162 | struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); | |
163 | struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); | |
164 | struct chacha_ctx subctx; | |
165 | u32 state[16]; | |
166 | u8 real_iv[16]; | |
167 | ||
168 | chacha_init_generic(state, ctx->key, req->iv); | |
169 | ||
0bc81767 | 170 | if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) { |
b36d8c09 AB |
171 | hchacha_block_arm(state, subctx.key, ctx->nrounds); |
172 | } else { | |
173 | kernel_neon_begin(); | |
174 | hchacha_block_neon(state, subctx.key, ctx->nrounds); | |
175 | kernel_neon_end(); | |
176 | } | |
177 | subctx.nrounds = ctx->nrounds; | |
178 | ||
179 | memcpy(&real_iv[0], req->iv + 24, 8); | |
180 | memcpy(&real_iv[8], req->iv + 16, 8); | |
181 | return chacha_stream_xor(req, &subctx, real_iv, neon); | |
182 | } | |
183 | ||
184 | static int xchacha_arm(struct skcipher_request *req) | |
185 | { | |
186 | return do_xchacha(req, false); | |
187 | } | |
188 | ||
189 | static int xchacha_neon(struct skcipher_request *req) | |
190 | { | |
191 | return do_xchacha(req, neon_usable()); | |
192 | } | |
193 | ||
194 | static struct skcipher_alg arm_algs[] = { | |
195 | { | |
196 | .base.cra_name = "chacha20", | |
197 | .base.cra_driver_name = "chacha20-arm", | |
198 | .base.cra_priority = 200, | |
199 | .base.cra_blocksize = 1, | |
200 | .base.cra_ctxsize = sizeof(struct chacha_ctx), | |
201 | .base.cra_module = THIS_MODULE, | |
202 | ||
203 | .min_keysize = CHACHA_KEY_SIZE, | |
204 | .max_keysize = CHACHA_KEY_SIZE, | |
205 | .ivsize = CHACHA_IV_SIZE, | |
206 | .chunksize = CHACHA_BLOCK_SIZE, | |
207 | .setkey = chacha20_setkey, | |
208 | .encrypt = chacha_arm, | |
209 | .decrypt = chacha_arm, | |
210 | }, { | |
211 | .base.cra_name = "xchacha20", | |
212 | .base.cra_driver_name = "xchacha20-arm", | |
213 | .base.cra_priority = 200, | |
214 | .base.cra_blocksize = 1, | |
215 | .base.cra_ctxsize = sizeof(struct chacha_ctx), | |
216 | .base.cra_module = THIS_MODULE, | |
217 | ||
218 | .min_keysize = CHACHA_KEY_SIZE, | |
219 | .max_keysize = CHACHA_KEY_SIZE, | |
220 | .ivsize = XCHACHA_IV_SIZE, | |
221 | .chunksize = CHACHA_BLOCK_SIZE, | |
222 | .setkey = chacha20_setkey, | |
223 | .encrypt = xchacha_arm, | |
224 | .decrypt = xchacha_arm, | |
225 | }, { | |
226 | .base.cra_name = "xchacha12", | |
227 | .base.cra_driver_name = "xchacha12-arm", | |
228 | .base.cra_priority = 200, | |
229 | .base.cra_blocksize = 1, | |
230 | .base.cra_ctxsize = sizeof(struct chacha_ctx), | |
231 | .base.cra_module = THIS_MODULE, | |
232 | ||
233 | .min_keysize = CHACHA_KEY_SIZE, | |
234 | .max_keysize = CHACHA_KEY_SIZE, | |
235 | .ivsize = XCHACHA_IV_SIZE, | |
236 | .chunksize = CHACHA_BLOCK_SIZE, | |
237 | .setkey = chacha12_setkey, | |
238 | .encrypt = xchacha_arm, | |
239 | .decrypt = xchacha_arm, | |
240 | }, | |
241 | }; | |
242 | ||
243 | static struct skcipher_alg neon_algs[] = { | |
244 | { | |
245 | .base.cra_name = "chacha20", | |
246 | .base.cra_driver_name = "chacha20-neon", | |
247 | .base.cra_priority = 300, | |
248 | .base.cra_blocksize = 1, | |
249 | .base.cra_ctxsize = sizeof(struct chacha_ctx), | |
250 | .base.cra_module = THIS_MODULE, | |
251 | ||
252 | .min_keysize = CHACHA_KEY_SIZE, | |
253 | .max_keysize = CHACHA_KEY_SIZE, | |
254 | .ivsize = CHACHA_IV_SIZE, | |
255 | .chunksize = CHACHA_BLOCK_SIZE, | |
256 | .walksize = 4 * CHACHA_BLOCK_SIZE, | |
257 | .setkey = chacha20_setkey, | |
258 | .encrypt = chacha_neon, | |
259 | .decrypt = chacha_neon, | |
260 | }, { | |
261 | .base.cra_name = "xchacha20", | |
262 | .base.cra_driver_name = "xchacha20-neon", | |
263 | .base.cra_priority = 300, | |
264 | .base.cra_blocksize = 1, | |
265 | .base.cra_ctxsize = sizeof(struct chacha_ctx), | |
266 | .base.cra_module = THIS_MODULE, | |
267 | ||
268 | .min_keysize = CHACHA_KEY_SIZE, | |
269 | .max_keysize = CHACHA_KEY_SIZE, | |
270 | .ivsize = XCHACHA_IV_SIZE, | |
271 | .chunksize = CHACHA_BLOCK_SIZE, | |
272 | .walksize = 4 * CHACHA_BLOCK_SIZE, | |
273 | .setkey = chacha20_setkey, | |
274 | .encrypt = xchacha_neon, | |
275 | .decrypt = xchacha_neon, | |
276 | }, { | |
277 | .base.cra_name = "xchacha12", | |
278 | .base.cra_driver_name = "xchacha12-neon", | |
279 | .base.cra_priority = 300, | |
280 | .base.cra_blocksize = 1, | |
281 | .base.cra_ctxsize = sizeof(struct chacha_ctx), | |
282 | .base.cra_module = THIS_MODULE, | |
283 | ||
284 | .min_keysize = CHACHA_KEY_SIZE, | |
285 | .max_keysize = CHACHA_KEY_SIZE, | |
286 | .ivsize = XCHACHA_IV_SIZE, | |
287 | .chunksize = CHACHA_BLOCK_SIZE, | |
288 | .walksize = 4 * CHACHA_BLOCK_SIZE, | |
289 | .setkey = chacha12_setkey, | |
290 | .encrypt = xchacha_neon, | |
291 | .decrypt = xchacha_neon, | |
292 | } | |
293 | }; | |
294 | ||
295 | static int __init chacha_simd_mod_init(void) | |
296 | { | |
8394bfec | 297 | int err = 0; |
b36d8c09 | 298 | |
8394bfec JD |
299 | if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) { |
300 | err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); | |
301 | if (err) | |
302 | return err; | |
303 | } | |
b36d8c09 AB |
304 | |
305 | if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) { | |
306 | int i; | |
307 | ||
308 | switch (read_cpuid_part()) { | |
309 | case ARM_CPU_PART_CORTEX_A7: | |
310 | case ARM_CPU_PART_CORTEX_A5: | |
311 | /* | |
312 | * The Cortex-A7 and Cortex-A5 do not perform well with | |
313 | * the NEON implementation but do incredibly with the | |
314 | * scalar one and use less power. | |
315 | */ | |
316 | for (i = 0; i < ARRAY_SIZE(neon_algs); i++) | |
317 | neon_algs[i].base.cra_priority = 0; | |
318 | break; | |
a44a3430 AB |
319 | default: |
320 | static_branch_enable(&use_neon); | |
b36d8c09 AB |
321 | } |
322 | ||
8394bfec JD |
323 | if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) { |
324 | err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs)); | |
325 | if (err) | |
326 | crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); | |
327 | } | |
b36d8c09 AB |
328 | } |
329 | return err; | |
330 | } | |
331 | ||
332 | static void __exit chacha_simd_mod_fini(void) | |
333 | { | |
8394bfec JD |
334 | if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) { |
335 | crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); | |
336 | if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) | |
337 | crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs)); | |
338 | } | |
b36d8c09 AB |
339 | } |
340 | ||
341 | module_init(chacha_simd_mod_init); | |
342 | module_exit(chacha_simd_mod_fini); | |
343 | ||
344 | MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (scalar and NEON accelerated)"); | |
345 | MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); | |
346 | MODULE_LICENSE("GPL v2"); | |
347 | MODULE_ALIAS_CRYPTO("chacha20"); | |
348 | MODULE_ALIAS_CRYPTO("chacha20-arm"); | |
349 | MODULE_ALIAS_CRYPTO("xchacha20"); | |
350 | MODULE_ALIAS_CRYPTO("xchacha20-arm"); | |
351 | MODULE_ALIAS_CRYPTO("xchacha12"); | |
352 | MODULE_ALIAS_CRYPTO("xchacha12-arm"); | |
353 | #ifdef CONFIG_KERNEL_MODE_NEON | |
354 | MODULE_ALIAS_CRYPTO("chacha20-neon"); | |
355 | MODULE_ALIAS_CRYPTO("xchacha20-neon"); | |
356 | MODULE_ALIAS_CRYPTO("xchacha12-neon"); | |
357 | #endif |