]>
Commit | Line | Data |
---|---|---|
f569ca16 AB |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * OpenSSL/Cryptogams accelerated Poly1305 transform for arm64 | |
4 | * | |
5 | * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org> | |
6 | */ | |
7 | ||
8 | #include <asm/hwcap.h> | |
9 | #include <asm/neon.h> | |
10 | #include <asm/simd.h> | |
11 | #include <asm/unaligned.h> | |
12 | #include <crypto/algapi.h> | |
13 | #include <crypto/internal/hash.h> | |
14 | #include <crypto/internal/poly1305.h> | |
15 | #include <crypto/internal/simd.h> | |
16 | #include <linux/cpufeature.h> | |
17 | #include <linux/crypto.h> | |
18 | #include <linux/jump_label.h> | |
19 | #include <linux/module.h> | |
20 | ||
21 | asmlinkage void poly1305_init_arm64(void *state, const u8 *key); | |
22 | asmlinkage void poly1305_blocks(void *state, const u8 *src, u32 len, u32 hibit); | |
23 | asmlinkage void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit); | |
31899908 | 24 | asmlinkage void poly1305_emit(void *state, u8 *digest, const u32 *nonce); |
f569ca16 AB |
25 | |
26 | static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); | |
27 | ||
28 | void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key) | |
29 | { | |
30 | poly1305_init_arm64(&dctx->h, key); | |
31 | dctx->s[0] = get_unaligned_le32(key + 16); | |
32 | dctx->s[1] = get_unaligned_le32(key + 20); | |
33 | dctx->s[2] = get_unaligned_le32(key + 24); | |
34 | dctx->s[3] = get_unaligned_le32(key + 28); | |
35 | dctx->buflen = 0; | |
36 | } | |
37 | EXPORT_SYMBOL(poly1305_init_arch); | |
38 | ||
39 | static int neon_poly1305_init(struct shash_desc *desc) | |
40 | { | |
41 | struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); | |
42 | ||
43 | dctx->buflen = 0; | |
44 | dctx->rset = 0; | |
45 | dctx->sset = false; | |
46 | ||
47 | return 0; | |
48 | } | |
49 | ||
50 | static void neon_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, | |
51 | u32 len, u32 hibit, bool do_neon) | |
52 | { | |
53 | if (unlikely(!dctx->sset)) { | |
54 | if (!dctx->rset) { | |
55 | poly1305_init_arch(dctx, src); | |
56 | src += POLY1305_BLOCK_SIZE; | |
57 | len -= POLY1305_BLOCK_SIZE; | |
58 | dctx->rset = 1; | |
59 | } | |
60 | if (len >= POLY1305_BLOCK_SIZE) { | |
61 | dctx->s[0] = get_unaligned_le32(src + 0); | |
62 | dctx->s[1] = get_unaligned_le32(src + 4); | |
63 | dctx->s[2] = get_unaligned_le32(src + 8); | |
64 | dctx->s[3] = get_unaligned_le32(src + 12); | |
65 | src += POLY1305_BLOCK_SIZE; | |
66 | len -= POLY1305_BLOCK_SIZE; | |
67 | dctx->sset = true; | |
68 | } | |
69 | if (len < POLY1305_BLOCK_SIZE) | |
70 | return; | |
71 | } | |
72 | ||
73 | len &= ~(POLY1305_BLOCK_SIZE - 1); | |
74 | ||
75 | if (static_branch_likely(&have_neon) && likely(do_neon)) | |
76 | poly1305_blocks_neon(&dctx->h, src, len, hibit); | |
77 | else | |
78 | poly1305_blocks(&dctx->h, src, len, hibit); | |
79 | } | |
80 | ||
81 | static void neon_poly1305_do_update(struct poly1305_desc_ctx *dctx, | |
82 | const u8 *src, u32 len, bool do_neon) | |
83 | { | |
84 | if (unlikely(dctx->buflen)) { | |
85 | u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen); | |
86 | ||
87 | memcpy(dctx->buf + dctx->buflen, src, bytes); | |
88 | src += bytes; | |
89 | len -= bytes; | |
90 | dctx->buflen += bytes; | |
91 | ||
92 | if (dctx->buflen == POLY1305_BLOCK_SIZE) { | |
93 | neon_poly1305_blocks(dctx, dctx->buf, | |
94 | POLY1305_BLOCK_SIZE, 1, false); | |
95 | dctx->buflen = 0; | |
96 | } | |
97 | } | |
98 | ||
99 | if (likely(len >= POLY1305_BLOCK_SIZE)) { | |
100 | neon_poly1305_blocks(dctx, src, len, 1, do_neon); | |
101 | src += round_down(len, POLY1305_BLOCK_SIZE); | |
102 | len %= POLY1305_BLOCK_SIZE; | |
103 | } | |
104 | ||
105 | if (unlikely(len)) { | |
106 | dctx->buflen = len; | |
107 | memcpy(dctx->buf, src, len); | |
108 | } | |
109 | } | |
110 | ||
111 | static int neon_poly1305_update(struct shash_desc *desc, | |
112 | const u8 *src, unsigned int srclen) | |
113 | { | |
114 | bool do_neon = crypto_simd_usable() && srclen > 128; | |
115 | struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); | |
116 | ||
117 | if (static_branch_likely(&have_neon) && do_neon) | |
118 | kernel_neon_begin(); | |
119 | neon_poly1305_do_update(dctx, src, srclen, do_neon); | |
120 | if (static_branch_likely(&have_neon) && do_neon) | |
121 | kernel_neon_end(); | |
122 | return 0; | |
123 | } | |
124 | ||
125 | void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, | |
126 | unsigned int nbytes) | |
127 | { | |
128 | if (unlikely(dctx->buflen)) { | |
129 | u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen); | |
130 | ||
131 | memcpy(dctx->buf + dctx->buflen, src, bytes); | |
132 | src += bytes; | |
133 | nbytes -= bytes; | |
134 | dctx->buflen += bytes; | |
135 | ||
136 | if (dctx->buflen == POLY1305_BLOCK_SIZE) { | |
137 | poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1); | |
138 | dctx->buflen = 0; | |
139 | } | |
140 | } | |
141 | ||
142 | if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { | |
143 | unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); | |
144 | ||
145 | if (static_branch_likely(&have_neon) && crypto_simd_usable()) { | |
706024a5 JD |
146 | do { |
147 | unsigned int todo = min_t(unsigned int, len, SZ_4K); | |
148 | ||
149 | kernel_neon_begin(); | |
150 | poly1305_blocks_neon(&dctx->h, src, todo, 1); | |
151 | kernel_neon_end(); | |
152 | ||
153 | len -= todo; | |
154 | src += todo; | |
155 | } while (len); | |
f569ca16 AB |
156 | } else { |
157 | poly1305_blocks(&dctx->h, src, len, 1); | |
706024a5 | 158 | src += len; |
f569ca16 | 159 | } |
f569ca16 AB |
160 | nbytes %= POLY1305_BLOCK_SIZE; |
161 | } | |
162 | ||
163 | if (unlikely(nbytes)) { | |
164 | dctx->buflen = nbytes; | |
165 | memcpy(dctx->buf, src, nbytes); | |
166 | } | |
167 | } | |
168 | EXPORT_SYMBOL(poly1305_update_arch); | |
169 | ||
170 | void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) | |
171 | { | |
f569ca16 AB |
172 | if (unlikely(dctx->buflen)) { |
173 | dctx->buf[dctx->buflen++] = 1; | |
174 | memset(dctx->buf + dctx->buflen, 0, | |
175 | POLY1305_BLOCK_SIZE - dctx->buflen); | |
176 | poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); | |
177 | } | |
178 | ||
31899908 | 179 | poly1305_emit(&dctx->h, dst, dctx->s); |
f569ca16 AB |
180 | *dctx = (struct poly1305_desc_ctx){}; |
181 | } | |
182 | EXPORT_SYMBOL(poly1305_final_arch); | |
183 | ||
184 | static int neon_poly1305_final(struct shash_desc *desc, u8 *dst) | |
185 | { | |
186 | struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); | |
187 | ||
188 | if (unlikely(!dctx->sset)) | |
189 | return -ENOKEY; | |
190 | ||
191 | poly1305_final_arch(dctx, dst); | |
192 | return 0; | |
193 | } | |
194 | ||
195 | static struct shash_alg neon_poly1305_alg = { | |
196 | .init = neon_poly1305_init, | |
197 | .update = neon_poly1305_update, | |
198 | .final = neon_poly1305_final, | |
199 | .digestsize = POLY1305_DIGEST_SIZE, | |
200 | .descsize = sizeof(struct poly1305_desc_ctx), | |
201 | ||
202 | .base.cra_name = "poly1305", | |
203 | .base.cra_driver_name = "poly1305-neon", | |
204 | .base.cra_priority = 200, | |
205 | .base.cra_blocksize = POLY1305_BLOCK_SIZE, | |
206 | .base.cra_module = THIS_MODULE, | |
207 | }; | |
208 | ||
209 | static int __init neon_poly1305_mod_init(void) | |
210 | { | |
211 | if (!cpu_have_named_feature(ASIMD)) | |
212 | return 0; | |
213 | ||
214 | static_branch_enable(&have_neon); | |
215 | ||
8394bfec JD |
216 | return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? |
217 | crypto_register_shash(&neon_poly1305_alg) : 0; | |
f569ca16 AB |
218 | } |
219 | ||
220 | static void __exit neon_poly1305_mod_exit(void) | |
221 | { | |
8394bfec | 222 | if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && cpu_have_named_feature(ASIMD)) |
f569ca16 AB |
223 | crypto_unregister_shash(&neon_poly1305_alg); |
224 | } | |
225 | ||
226 | module_init(neon_poly1305_mod_init); | |
227 | module_exit(neon_poly1305_mod_exit); | |
228 | ||
229 | MODULE_LICENSE("GPL v2"); | |
230 | MODULE_ALIAS_CRYPTO("poly1305"); | |
231 | MODULE_ALIAS_CRYPTO("poly1305-neon"); |