]>
Commit | Line | Data |
---|---|---|
eb24af5d JS |
1 | /* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */ |
2 | // | |
3 | // This file is dual-licensed, meaning that you can use it under your | |
4 | // choice of either of the following two licenses: | |
5 | // | |
6 | // Copyright 2023 The OpenSSL Project Authors. All Rights Reserved. | |
7 | // | |
8 | // Licensed under the Apache License 2.0 (the "License"). You can obtain | |
9 | // a copy in the file LICENSE in the source distribution or at | |
10 | // https://www.openssl.org/source/license.html | |
11 | // | |
12 | // or | |
13 | // | |
14 | // Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu> | |
15 | // Copyright (c) 2023, Phoebe Chen <phoebe.chen@sifive.com> | |
16 | // Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com> | |
17 | // Copyright 2024 Google LLC | |
18 | // All rights reserved. | |
19 | // | |
20 | // Redistribution and use in source and binary forms, with or without | |
21 | // modification, are permitted provided that the following conditions | |
22 | // are met: | |
23 | // 1. Redistributions of source code must retain the above copyright | |
24 | // notice, this list of conditions and the following disclaimer. | |
25 | // 2. Redistributions in binary form must reproduce the above copyright | |
26 | // notice, this list of conditions and the following disclaimer in the | |
27 | // documentation and/or other materials provided with the distribution. | |
28 | // | |
29 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
30 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
31 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
32 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
33 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
34 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
35 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
36 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
37 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
38 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
39 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
40 | ||
41 | // The generated code of this file depends on the following RISC-V extensions: | |
42 | // - RV64I | |
43 | // - RISC-V Vector ('V') with VLEN >= 128 | |
44 | // - RISC-V Vector AES block cipher extension ('Zvkned') | |
45 | ||
46 | #include <linux/linkage.h> | |
47 | ||
48 | .text | |
49 | .option arch, +zvkned | |
50 | ||
51 | #include "aes-macros.S" | |
52 | ||
53 | #define KEYP a0 | |
54 | #define INP a1 | |
55 | #define OUTP a2 | |
56 | #define LEN a3 | |
57 | #define IVP a4 | |
58 | ||
59 | .macro __aes_crypt_zvkned enc, keylen | |
60 | vle32.v v16, (INP) | |
61 | aes_crypt v16, \enc, \keylen | |
62 | vse32.v v16, (OUTP) | |
63 | ret | |
64 | .endm | |
65 | ||
66 | .macro aes_crypt_zvkned enc | |
67 | aes_begin KEYP, 128f, 192f | |
68 | __aes_crypt_zvkned \enc, 256 | |
69 | 128: | |
70 | __aes_crypt_zvkned \enc, 128 | |
71 | 192: | |
72 | __aes_crypt_zvkned \enc, 192 | |
73 | .endm | |
74 | ||
75 | // void aes_encrypt_zvkned(const struct crypto_aes_ctx *key, | |
76 | // const u8 in[16], u8 out[16]); | |
77 | SYM_FUNC_START(aes_encrypt_zvkned) | |
78 | aes_crypt_zvkned 1 | |
79 | SYM_FUNC_END(aes_encrypt_zvkned) | |
80 | ||
81 | // Same prototype and calling convention as the encryption function | |
82 | SYM_FUNC_START(aes_decrypt_zvkned) | |
83 | aes_crypt_zvkned 0 | |
84 | SYM_FUNC_END(aes_decrypt_zvkned) | |
85 | ||
86 | .macro __aes_ecb_crypt enc, keylen | |
87 | srli t0, LEN, 2 | |
88 | // t0 is the remaining length in 32-bit words. It's a multiple of 4. | |
89 | 1: | |
90 | vsetvli t1, t0, e32, m8, ta, ma | |
91 | sub t0, t0, t1 // Subtract number of words processed | |
92 | slli t1, t1, 2 // Words to bytes | |
93 | vle32.v v16, (INP) | |
94 | aes_crypt v16, \enc, \keylen | |
95 | vse32.v v16, (OUTP) | |
96 | add INP, INP, t1 | |
97 | add OUTP, OUTP, t1 | |
98 | bnez t0, 1b | |
99 | ||
100 | ret | |
101 | .endm | |
102 | ||
103 | .macro aes_ecb_crypt enc | |
104 | aes_begin KEYP, 128f, 192f | |
105 | __aes_ecb_crypt \enc, 256 | |
106 | 128: | |
107 | __aes_ecb_crypt \enc, 128 | |
108 | 192: | |
109 | __aes_ecb_crypt \enc, 192 | |
110 | .endm | |
111 | ||
112 | // void aes_ecb_encrypt_zvkned(const struct crypto_aes_ctx *key, | |
113 | // const u8 *in, u8 *out, size_t len); | |
114 | // | |
115 | // |len| must be nonzero and a multiple of 16 (AES_BLOCK_SIZE). | |
116 | SYM_FUNC_START(aes_ecb_encrypt_zvkned) | |
117 | aes_ecb_crypt 1 | |
118 | SYM_FUNC_END(aes_ecb_encrypt_zvkned) | |
119 | ||
120 | // Same prototype and calling convention as the encryption function | |
121 | SYM_FUNC_START(aes_ecb_decrypt_zvkned) | |
122 | aes_ecb_crypt 0 | |
123 | SYM_FUNC_END(aes_ecb_decrypt_zvkned) | |
124 | ||
125 | .macro aes_cbc_encrypt keylen | |
126 | vle32.v v16, (IVP) // Load IV | |
127 | 1: | |
128 | vle32.v v17, (INP) // Load plaintext block | |
129 | vxor.vv v16, v16, v17 // XOR with IV or prev ciphertext block | |
130 | aes_encrypt v16, \keylen // Encrypt | |
131 | vse32.v v16, (OUTP) // Store ciphertext block | |
132 | addi INP, INP, 16 | |
133 | addi OUTP, OUTP, 16 | |
134 | addi LEN, LEN, -16 | |
135 | bnez LEN, 1b | |
136 | ||
137 | vse32.v v16, (IVP) // Store next IV | |
138 | ret | |
139 | .endm | |
140 | ||
141 | .macro aes_cbc_decrypt keylen | |
da215b08 | 142 | srli LEN, LEN, 2 // Convert LEN from bytes to words |
eb24af5d JS |
143 | vle32.v v16, (IVP) // Load IV |
144 | 1: | |
da215b08 EB |
145 | vsetvli t0, LEN, e32, m4, ta, ma |
146 | vle32.v v20, (INP) // Load ciphertext blocks | |
147 | vslideup.vi v16, v20, 4 // Setup prev ciphertext blocks | |
148 | addi t1, t0, -4 | |
149 | vslidedown.vx v24, v20, t1 // Save last ciphertext block | |
150 | aes_decrypt v20, \keylen // Decrypt the blocks | |
151 | vxor.vv v20, v20, v16 // XOR with prev ciphertext blocks | |
152 | vse32.v v20, (OUTP) // Store plaintext blocks | |
153 | vmv.v.v v16, v24 // Next "IV" is last ciphertext block | |
154 | slli t1, t0, 2 // Words to bytes | |
155 | add INP, INP, t1 | |
156 | add OUTP, OUTP, t1 | |
157 | sub LEN, LEN, t0 | |
eb24af5d JS |
158 | bnez LEN, 1b |
159 | ||
da215b08 | 160 | vsetivli zero, 4, e32, m1, ta, ma |
eb24af5d JS |
161 | vse32.v v16, (IVP) // Store next IV |
162 | ret | |
163 | .endm | |
164 | ||
165 | // void aes_cbc_encrypt_zvkned(const struct crypto_aes_ctx *key, | |
166 | // const u8 *in, u8 *out, size_t len, u8 iv[16]); | |
167 | // | |
168 | // |len| must be nonzero and a multiple of 16 (AES_BLOCK_SIZE). | |
169 | SYM_FUNC_START(aes_cbc_encrypt_zvkned) | |
170 | aes_begin KEYP, 128f, 192f | |
171 | aes_cbc_encrypt 256 | |
172 | 128: | |
173 | aes_cbc_encrypt 128 | |
174 | 192: | |
175 | aes_cbc_encrypt 192 | |
176 | SYM_FUNC_END(aes_cbc_encrypt_zvkned) | |
177 | ||
178 | // Same prototype and calling convention as the encryption function | |
179 | SYM_FUNC_START(aes_cbc_decrypt_zvkned) | |
180 | aes_begin KEYP, 128f, 192f | |
181 | aes_cbc_decrypt 256 | |
182 | 128: | |
183 | aes_cbc_decrypt 128 | |
184 | 192: | |
185 | aes_cbc_decrypt 192 | |
186 | SYM_FUNC_END(aes_cbc_decrypt_zvkned) | |
c70dfa4a EB |
187 | |
188 | .macro aes_cbc_cts_encrypt keylen | |
189 | ||
190 | // CBC-encrypt all blocks except the last. But don't store the | |
191 | // second-to-last block to the output buffer yet, since it will be | |
192 | // handled specially in the ciphertext stealing step. Exception: if the | |
193 | // message is single-block, still encrypt the last (and only) block. | |
194 | li t0, 16 | |
195 | j 2f | |
196 | 1: | |
197 | vse32.v v16, (OUTP) // Store ciphertext block | |
198 | addi OUTP, OUTP, 16 | |
199 | 2: | |
200 | vle32.v v17, (INP) // Load plaintext block | |
201 | vxor.vv v16, v16, v17 // XOR with IV or prev ciphertext block | |
202 | aes_encrypt v16, \keylen // Encrypt | |
203 | addi INP, INP, 16 | |
204 | addi LEN, LEN, -16 | |
205 | bgt LEN, t0, 1b // Repeat if more than one block remains | |
206 | ||
207 | // Special case: if the message is a single block, just do CBC. | |
208 | beqz LEN, .Lcts_encrypt_done\@ | |
209 | ||
210 | // Encrypt the last two blocks using ciphertext stealing as follows: | |
211 | // C[n-1] = Encrypt(Encrypt(P[n-1] ^ C[n-2]) ^ P[n]) | |
212 | // C[n] = Encrypt(P[n-1] ^ C[n-2])[0..LEN] | |
213 | // | |
214 | // C[i] denotes the i'th ciphertext block, and likewise P[i] the i'th | |
215 | // plaintext block. Block n, the last block, may be partial; its length | |
216 | // is 1 <= LEN <= 16. If there are only 2 blocks, C[n-2] means the IV. | |
217 | // | |
218 | // v16 already contains Encrypt(P[n-1] ^ C[n-2]). | |
219 | // INP points to P[n]. OUTP points to where C[n-1] should go. | |
220 | // To support in-place encryption, load P[n] before storing C[n]. | |
221 | addi t0, OUTP, 16 // Get pointer to where C[n] should go | |
222 | vsetvli zero, LEN, e8, m1, tu, ma | |
223 | vle8.v v17, (INP) // Load P[n] | |
224 | vse8.v v16, (t0) // Store C[n] | |
225 | vxor.vv v16, v16, v17 // v16 = Encrypt(P[n-1] ^ C[n-2]) ^ P[n] | |
226 | vsetivli zero, 4, e32, m1, ta, ma | |
227 | aes_encrypt v16, \keylen | |
228 | .Lcts_encrypt_done\@: | |
229 | vse32.v v16, (OUTP) // Store C[n-1] (or C[n] in single-block case) | |
230 | ret | |
231 | .endm | |
232 | ||
233 | #define LEN32 t4 // Length of remaining full blocks in 32-bit words | |
234 | #define LEN_MOD16 t5 // Length of message in bytes mod 16 | |
235 | ||
236 | .macro aes_cbc_cts_decrypt keylen | |
237 | andi LEN32, LEN, ~15 | |
238 | srli LEN32, LEN32, 2 | |
239 | andi LEN_MOD16, LEN, 15 | |
240 | ||
241 | // Save C[n-2] in v28 so that it's available later during the ciphertext | |
242 | // stealing step. If there are fewer than three blocks, C[n-2] means | |
243 | // the IV, otherwise it means the third-to-last ciphertext block. | |
244 | vmv.v.v v28, v16 // IV | |
245 | add t0, LEN, -33 | |
246 | bltz t0, .Lcts_decrypt_loop\@ | |
247 | andi t0, t0, ~15 | |
248 | add t0, t0, INP | |
249 | vle32.v v28, (t0) | |
250 | ||
251 | // CBC-decrypt all full blocks. For the last full block, or the last 2 | |
252 | // full blocks if the message is block-aligned, this doesn't write the | |
253 | // correct output blocks (unless the message is only a single block), | |
254 | // because it XORs the wrong values with the raw AES plaintexts. But we | |
255 | // fix this after this loop without redoing the AES decryptions. This | |
256 | // approach allows more of the AES decryptions to be parallelized. | |
257 | .Lcts_decrypt_loop\@: | |
258 | vsetvli t0, LEN32, e32, m4, ta, ma | |
259 | addi t1, t0, -4 | |
260 | vle32.v v20, (INP) // Load next set of ciphertext blocks | |
261 | vmv.v.v v24, v16 // Get IV or last ciphertext block of prev set | |
262 | vslideup.vi v24, v20, 4 // Setup prev ciphertext blocks | |
263 | vslidedown.vx v16, v20, t1 // Save last ciphertext block of this set | |
264 | aes_decrypt v20, \keylen // Decrypt this set of blocks | |
265 | vxor.vv v24, v24, v20 // XOR prev ciphertext blocks with decrypted blocks | |
266 | vse32.v v24, (OUTP) // Store this set of plaintext blocks | |
267 | sub LEN32, LEN32, t0 | |
268 | slli t0, t0, 2 // Words to bytes | |
269 | add INP, INP, t0 | |
270 | add OUTP, OUTP, t0 | |
271 | bnez LEN32, .Lcts_decrypt_loop\@ | |
272 | ||
273 | vsetivli zero, 4, e32, m4, ta, ma | |
274 | vslidedown.vx v20, v20, t1 // Extract raw plaintext of last full block | |
275 | addi t0, OUTP, -16 // Get pointer to last full plaintext block | |
276 | bnez LEN_MOD16, .Lcts_decrypt_non_block_aligned\@ | |
277 | ||
278 | // Special case: if the message is a single block, just do CBC. | |
279 | li t1, 16 | |
280 | beq LEN, t1, .Lcts_decrypt_done\@ | |
281 | ||
282 | // Block-aligned message. Just fix up the last 2 blocks. We need: | |
283 | // | |
284 | // P[n-1] = Decrypt(C[n]) ^ C[n-2] | |
285 | // P[n] = Decrypt(C[n-1]) ^ C[n] | |
286 | // | |
287 | // We have C[n] in v16, Decrypt(C[n]) in v20, and C[n-2] in v28. | |
288 | // Together with Decrypt(C[n-1]) ^ C[n-2] from the output buffer, this | |
289 | // is everything needed to fix the output without re-decrypting blocks. | |
290 | addi t1, OUTP, -32 // Get pointer to where P[n-1] should go | |
291 | vxor.vv v20, v20, v28 // Decrypt(C[n]) ^ C[n-2] == P[n-1] | |
292 | vle32.v v24, (t1) // Decrypt(C[n-1]) ^ C[n-2] | |
293 | vse32.v v20, (t1) // Store P[n-1] | |
294 | vxor.vv v20, v24, v16 // Decrypt(C[n-1]) ^ C[n-2] ^ C[n] == P[n] ^ C[n-2] | |
295 | j .Lcts_decrypt_finish\@ | |
296 | ||
297 | .Lcts_decrypt_non_block_aligned\@: | |
298 | // Decrypt the last two blocks using ciphertext stealing as follows: | |
299 | // | |
300 | // P[n-1] = Decrypt(C[n] || Decrypt(C[n-1])[LEN_MOD16..16]) ^ C[n-2] | |
301 | // P[n] = (Decrypt(C[n-1]) ^ C[n])[0..LEN_MOD16] | |
302 | // | |
303 | // We already have Decrypt(C[n-1]) in v20 and C[n-2] in v28. | |
304 | vmv.v.v v16, v20 // v16 = Decrypt(C[n-1]) | |
305 | vsetvli zero, LEN_MOD16, e8, m1, tu, ma | |
306 | vle8.v v20, (INP) // v20 = C[n] || Decrypt(C[n-1])[LEN_MOD16..16] | |
307 | vxor.vv v16, v16, v20 // v16 = Decrypt(C[n-1]) ^ C[n] | |
308 | vse8.v v16, (OUTP) // Store P[n] | |
309 | vsetivli zero, 4, e32, m1, ta, ma | |
310 | aes_decrypt v20, \keylen // v20 = Decrypt(C[n] || Decrypt(C[n-1])[LEN_MOD16..16]) | |
311 | .Lcts_decrypt_finish\@: | |
312 | vxor.vv v20, v20, v28 // XOR with C[n-2] | |
313 | vse32.v v20, (t0) // Store last full plaintext block | |
314 | .Lcts_decrypt_done\@: | |
315 | ret | |
316 | .endm | |
317 | ||
318 | .macro aes_cbc_cts_crypt keylen | |
319 | vle32.v v16, (IVP) // Load IV | |
320 | beqz a5, .Lcts_decrypt\@ | |
321 | aes_cbc_cts_encrypt \keylen | |
322 | .Lcts_decrypt\@: | |
323 | aes_cbc_cts_decrypt \keylen | |
324 | .endm | |
325 | ||
326 | // void aes_cbc_cts_crypt_zvkned(const struct crypto_aes_ctx *key, | |
327 | // const u8 *in, u8 *out, size_t len, | |
328 | // const u8 iv[16], bool enc); | |
329 | // | |
330 | // Encrypts or decrypts a message with the CS3 variant of AES-CBC-CTS. | |
331 | // This is the variant that unconditionally swaps the last two blocks. | |
332 | SYM_FUNC_START(aes_cbc_cts_crypt_zvkned) | |
333 | aes_begin KEYP, 128f, 192f | |
334 | aes_cbc_cts_crypt 256 | |
335 | 128: | |
336 | aes_cbc_cts_crypt 128 | |
337 | 192: | |
338 | aes_cbc_cts_crypt 192 | |
339 | SYM_FUNC_END(aes_cbc_cts_crypt_zvkned) |