]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/chacha/chacha_enc.c
riscv: Provide a vector implementation of CHACHA20 cipher.
[thirdparty/openssl.git] / crypto / chacha / chacha_enc.c
1 /*
2 * Copyright 2015-2020 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License 2.0 (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9
10 /* Adapted from the public domain code by D. Bernstein from SUPERCOP. */
11
12 #include <string.h>
13
14 #include "internal/endian.h"
15 #include "crypto/chacha.h"
16 #include "crypto/ctype.h"
17
18 typedef unsigned int u32;
19 typedef unsigned char u8;
20 typedef union {
21 u32 u[16];
22 u8 c[64];
23 } chacha_buf;
24
25 # define ROTATE(v, n) (((v) << (n)) | ((v) >> (32 - (n))))
26
27 # ifndef PEDANTIC
28 # if defined(__GNUC__) && __GNUC__>=2 && \
29 !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
30 # if defined(__riscv_zbb) || defined(__riscv_zbkb)
31 # if __riscv_xlen == 64
32 # undef ROTATE
33 # define ROTATE(x, n) ({ u32 ret; \
34 asm ("roriw %0, %1, %2" \
35 : "=r"(ret) \
36 : "r"(x), "i"(32 - (n))); ret;})
37 # endif
38 # if __riscv_xlen == 32
39 # undef ROTATE
40 # define ROTATE(x, n) ({ u32 ret; \
41 asm ("rori %0, %1, %2" \
42 : "=r"(ret) \
43 : "r"(x), "i"(32 - (n))); ret;})
44 # endif
45 # endif
46 # endif
47 # endif
48
49 # define U32TO8_LITTLE(p, v) do { \
50 (p)[0] = (u8)(v >> 0); \
51 (p)[1] = (u8)(v >> 8); \
52 (p)[2] = (u8)(v >> 16); \
53 (p)[3] = (u8)(v >> 24); \
54 } while(0)
55
56 /* QUARTERROUND updates a, b, c, d with a ChaCha "quarter" round. */
57 # define QUARTERROUND(a,b,c,d) ( \
58 x[a] += x[b], x[d] = ROTATE((x[d] ^ x[a]),16), \
59 x[c] += x[d], x[b] = ROTATE((x[b] ^ x[c]),12), \
60 x[a] += x[b], x[d] = ROTATE((x[d] ^ x[a]), 8), \
61 x[c] += x[d], x[b] = ROTATE((x[b] ^ x[c]), 7) )
62
63 /* chacha_core performs 20 rounds of ChaCha on the input words in
64 * |input| and writes the 64 output bytes to |output|. */
65 static void chacha20_core(chacha_buf *output, const u32 input[16])
66 {
67 u32 x[16];
68 int i;
69 DECLARE_IS_ENDIAN;
70
71 memcpy(x, input, sizeof(x));
72
73 for (i = 20; i > 0; i -= 2) {
74 QUARTERROUND(0, 4, 8, 12);
75 QUARTERROUND(1, 5, 9, 13);
76 QUARTERROUND(2, 6, 10, 14);
77 QUARTERROUND(3, 7, 11, 15);
78 QUARTERROUND(0, 5, 10, 15);
79 QUARTERROUND(1, 6, 11, 12);
80 QUARTERROUND(2, 7, 8, 13);
81 QUARTERROUND(3, 4, 9, 14);
82 }
83
84 if (IS_LITTLE_ENDIAN) {
85 for (i = 0; i < 16; ++i)
86 output->u[i] = x[i] + input[i];
87 } else {
88 for (i = 0; i < 16; ++i)
89 U32TO8_LITTLE(output->c + 4 * i, (x[i] + input[i]));
90 }
91 }
92
93 #ifdef INCLUDE_C_CHACHA20
94 void ChaCha20_ctr32_c(unsigned char *out, const unsigned char *inp, size_t len,
95 const unsigned int key[8], const unsigned int counter[4])
96 #else
97 void ChaCha20_ctr32(unsigned char *out, const unsigned char *inp, size_t len,
98 const unsigned int key[8], const unsigned int counter[4])
99 #endif
100 {
101 u32 input[16];
102 chacha_buf buf;
103 size_t todo, i;
104
105 /* sigma constant "expand 32-byte k" in little-endian encoding */
106 input[0] = ((u32)ossl_toascii('e')) | ((u32)ossl_toascii('x') << 8)
107 | ((u32)ossl_toascii('p') << 16)
108 | ((u32)ossl_toascii('a') << 24);
109 input[1] = ((u32)ossl_toascii('n')) | ((u32)ossl_toascii('d') << 8)
110 | ((u32)ossl_toascii(' ') << 16)
111 | ((u32)ossl_toascii('3') << 24);
112 input[2] = ((u32)ossl_toascii('2')) | ((u32)ossl_toascii('-') << 8)
113 | ((u32)ossl_toascii('b') << 16)
114 | ((u32)ossl_toascii('y') << 24);
115 input[3] = ((u32)ossl_toascii('t')) | ((u32)ossl_toascii('e') << 8)
116 | ((u32)ossl_toascii(' ') << 16)
117 | ((u32)ossl_toascii('k') << 24);
118
119 input[4] = key[0];
120 input[5] = key[1];
121 input[6] = key[2];
122 input[7] = key[3];
123 input[8] = key[4];
124 input[9] = key[5];
125 input[10] = key[6];
126 input[11] = key[7];
127
128 input[12] = counter[0];
129 input[13] = counter[1];
130 input[14] = counter[2];
131 input[15] = counter[3];
132
133 while (len > 0) {
134 todo = sizeof(buf);
135 if (len < todo)
136 todo = len;
137
138 chacha20_core(&buf, input);
139
140 for (i = 0; i < todo; i++)
141 out[i] = inp[i] ^ buf.c[i];
142 out += todo;
143 inp += todo;
144 len -= todo;
145
146 /*
147 * Advance 32-bit counter. Note that as subroutine is so to
148 * say nonce-agnostic, this limited counter width doesn't
149 * prevent caller from implementing wider counter. It would
150 * simply take two calls split on counter overflow...
151 */
152 input[12]++;
153 }
154 }