]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/chacha/asm/chacha-riscv64-zvkb.pl
riscv: Provide a vector implementation of CHACHA20 cipher.
[thirdparty/openssl.git] / crypto / chacha / asm / chacha-riscv64-zvkb.pl
1 #! /usr/bin/env perl
2 # This file is dual-licensed, meaning that you can use it under your
3 # choice of either of the following two licenses:
4 #
5 # Copyright 2023-2023 The OpenSSL Project Authors. All Rights Reserved.
6 #
7 # Licensed under the Apache License 2.0 (the "License"). You may not use
8 # this file except in compliance with the License. You can obtain a copy
9 # in the file LICENSE in the source distribution or at
10 # https://www.openssl.org/source/license.html
11 #
12 # or
13 #
14 # Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
15 # All rights reserved.
16 #
17 # Redistribution and use in source and binary forms, with or without
18 # modification, are permitted provided that the following conditions
19 # are met:
20 # 1. Redistributions of source code must retain the above copyright
21 # notice, this list of conditions and the following disclaimer.
22 # 2. Redistributions in binary form must reproduce the above copyright
23 # notice, this list of conditions and the following disclaimer in the
24 # documentation and/or other materials provided with the distribution.
25 #
26 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37
38 # - RV64I
39 # - RISC-V Vector ('V') with VLEN >= 128
40 # - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
41 # - RISC-V Zicclsm(Main memory supports misaligned loads/stores)
42
43 use strict;
44 use warnings;
45
46 use FindBin qw($Bin);
47 use lib "$Bin";
48 use lib "$Bin/../../perlasm";
49 use riscv;
50
51 # $output is the last argument if it looks like a file (it has an extension)
52 # $flavour is the first argument if it doesn't look like a file
53 my $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
54 my $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
55
56 $output and open STDOUT, ">$output";
57
58 my $code = <<___;
59 .text
60 ___
61
62 # void ChaCha20_ctr32_zvkb(unsigned char *out, const unsigned char *inp,
63 # size_t len, const unsigned int key[8],
64 # const unsigned int counter[4]);
65 ################################################################################
66 my ( $OUTPUT, $INPUT, $LEN, $KEY, $COUNTER ) = ( "a0", "a1", "a2", "a3", "a4" );
67 my ( $T0 ) = ( "t0" );
68 my ( $CONST_DATA0, $CONST_DATA1, $CONST_DATA2, $CONST_DATA3 ) =
69 ( "a5", "a6", "a7", "t1" );
70 my ( $KEY0, $KEY1, $KEY2,$KEY3, $KEY4, $KEY5, $KEY6, $KEY7,
71 $COUNTER0, $COUNTER1, $NONCE0, $NONCE1
72 ) = ( "s0", "s1", "s2", "s3", "s4", "s5", "s6",
73 "s7", "s8", "s9", "s10", "s11" );
74 my ( $VL, $STRIDE, $CHACHA_LOOP_COUNT ) = ( "t2", "t3", "t4" );
75 my (
76 $V0, $V1, $V2, $V3, $V4, $V5, $V6, $V7, $V8, $V9, $V10,
77 $V11, $V12, $V13, $V14, $V15, $V16, $V17, $V18, $V19, $V20, $V21,
78 $V22, $V23, $V24, $V25, $V26, $V27, $V28, $V29, $V30, $V31,
79 ) = map( "v$_", ( 0 .. 31 ) );
80
81 sub chacha_quad_round_group {
82 my (
83 $A0, $B0, $C0, $D0, $A1, $B1, $C1, $D1,
84 $A2, $B2, $C2, $D2, $A3, $B3, $C3, $D3
85 ) = @_;
86
87 my $code = <<___;
88 # a += b; d ^= a; d <<<= 16;
89 @{[vadd_vv $A0, $A0, $B0]}
90 @{[vadd_vv $A1, $A1, $B1]}
91 @{[vadd_vv $A2, $A2, $B2]}
92 @{[vadd_vv $A3, $A3, $B3]}
93 @{[vxor_vv $D0, $D0, $A0]}
94 @{[vxor_vv $D1, $D1, $A1]}
95 @{[vxor_vv $D2, $D2, $A2]}
96 @{[vxor_vv $D3, $D3, $A3]}
97 @{[vror_vi $D0, $D0, 32 - 16]}
98 @{[vror_vi $D1, $D1, 32 - 16]}
99 @{[vror_vi $D2, $D2, 32 - 16]}
100 @{[vror_vi $D3, $D3, 32 - 16]}
101 # c += d; b ^= c; b <<<= 12;
102 @{[vadd_vv $C0, $C0, $D0]}
103 @{[vadd_vv $C1, $C1, $D1]}
104 @{[vadd_vv $C2, $C2, $D2]}
105 @{[vadd_vv $C3, $C3, $D3]}
106 @{[vxor_vv $B0, $B0, $C0]}
107 @{[vxor_vv $B1, $B1, $C1]}
108 @{[vxor_vv $B2, $B2, $C2]}
109 @{[vxor_vv $B3, $B3, $C3]}
110 @{[vror_vi $B0, $B0, 32 - 12]}
111 @{[vror_vi $B1, $B1, 32 - 12]}
112 @{[vror_vi $B2, $B2, 32 - 12]}
113 @{[vror_vi $B3, $B3, 32 - 12]}
114 # a += b; d ^= a; d <<<= 8;
115 @{[vadd_vv $A0, $A0, $B0]}
116 @{[vadd_vv $A1, $A1, $B1]}
117 @{[vadd_vv $A2, $A2, $B2]}
118 @{[vadd_vv $A3, $A3, $B3]}
119 @{[vxor_vv $D0, $D0, $A0]}
120 @{[vxor_vv $D1, $D1, $A1]}
121 @{[vxor_vv $D2, $D2, $A2]}
122 @{[vxor_vv $D3, $D3, $A3]}
123 @{[vror_vi $D0, $D0, 32 - 8]}
124 @{[vror_vi $D1, $D1, 32 - 8]}
125 @{[vror_vi $D2, $D2, 32 - 8]}
126 @{[vror_vi $D3, $D3, 32 - 8]}
127 # c += d; b ^= c; b <<<= 7;
128 @{[vadd_vv $C0, $C0, $D0]}
129 @{[vadd_vv $C1, $C1, $D1]}
130 @{[vadd_vv $C2, $C2, $D2]}
131 @{[vadd_vv $C3, $C3, $D3]}
132 @{[vxor_vv $B0, $B0, $C0]}
133 @{[vxor_vv $B1, $B1, $C1]}
134 @{[vxor_vv $B2, $B2, $C2]}
135 @{[vxor_vv $B3, $B3, $C3]}
136 @{[vror_vi $B0, $B0, 32 - 7]}
137 @{[vror_vi $B1, $B1, 32 - 7]}
138 @{[vror_vi $B2, $B2, 32 - 7]}
139 @{[vror_vi $B3, $B3, 32 - 7]}
140 ___
141
142 return $code;
143 }
144
145 $code .= <<___;
146 .p2align 3
147 .globl ChaCha20_ctr32_zvkb
148 .type ChaCha20_ctr32_zvkb,\@function
149 ChaCha20_ctr32_zvkb:
150 srli $LEN, $LEN, 6
151 beqz $LEN, .Lend
152
153 addi sp, sp, -96
154 sd s0, 0(sp)
155 sd s1, 8(sp)
156 sd s2, 16(sp)
157 sd s3, 24(sp)
158 sd s4, 32(sp)
159 sd s5, 40(sp)
160 sd s6, 48(sp)
161 sd s7, 56(sp)
162 sd s8, 64(sp)
163 sd s9, 72(sp)
164 sd s10, 80(sp)
165 sd s11, 88(sp)
166
167 li $STRIDE, 64
168
169 #### chacha block data
170 # "expa" little endian
171 li $CONST_DATA0, 0x61707865
172 # "nd 3" little endian
173 li $CONST_DATA1, 0x3320646e
174 # "2-by" little endian
175 li $CONST_DATA2, 0x79622d32
176 # "te k" little endian
177 li $CONST_DATA3, 0x6b206574
178
179 lw $KEY0, 0($KEY)
180 lw $KEY1, 4($KEY)
181 lw $KEY2, 8($KEY)
182 lw $KEY3, 12($KEY)
183 lw $KEY4, 16($KEY)
184 lw $KEY5, 20($KEY)
185 lw $KEY6, 24($KEY)
186 lw $KEY7, 28($KEY)
187
188 lw $COUNTER0, 0($COUNTER)
189 lw $COUNTER1, 4($COUNTER)
190 lw $NONCE0, 8($COUNTER)
191 lw $NONCE1, 12($COUNTER)
192
193 .Lblock_loop:
194 @{[vsetvli $VL, $LEN, "e32", "m1", "ta", "ma"]}
195
196 # init chacha const states
197 @{[vmv_v_x $V0, $CONST_DATA0]}
198 @{[vmv_v_x $V1, $CONST_DATA1]}
199 @{[vmv_v_x $V2, $CONST_DATA2]}
200 @{[vmv_v_x $V3, $CONST_DATA3]}
201
202 # init chacha key states
203 @{[vmv_v_x $V4, $KEY0]}
204 @{[vmv_v_x $V5, $KEY1]}
205 @{[vmv_v_x $V6, $KEY2]}
206 @{[vmv_v_x $V7, $KEY3]}
207 @{[vmv_v_x $V8, $KEY4]}
208 @{[vmv_v_x $V9, $KEY5]}
209 @{[vmv_v_x $V10, $KEY6]}
210 @{[vmv_v_x $V11, $KEY7]}
211
212 # init chacha key states
213 @{[vid_v $V12]}
214 @{[vadd_vx $V12, $V12, $COUNTER0]}
215 @{[vmv_v_x $V13, $COUNTER1]}
216
217 # init chacha nonce states
218 @{[vmv_v_x $V14, $NONCE0]}
219 @{[vmv_v_x $V15, $NONCE1]}
220
221 # load the top-half of input data
222 @{[vlsseg_nf_e32_v 8, $V16, $INPUT, $STRIDE]}
223
224 li $CHACHA_LOOP_COUNT, 10
225 .Lround_loop:
226 addi $CHACHA_LOOP_COUNT, $CHACHA_LOOP_COUNT, -1
227 @{[chacha_quad_round_group
228 $V0, $V4, $V8, $V12,
229 $V1, $V5, $V9, $V13,
230 $V2, $V6, $V10, $V14,
231 $V3, $V7, $V11, $V15]}
232 @{[chacha_quad_round_group
233 $V0, $V5, $V10, $V15,
234 $V1, $V6, $V11, $V12,
235 $V2, $V7, $V8, $V13,
236 $V3, $V4, $V9, $V14]}
237 bnez $CHACHA_LOOP_COUNT, .Lround_loop
238
239 # load the bottom-half of input data
240 addi $T0, $INPUT, 32
241 @{[vlsseg_nf_e32_v 8, $V24, $T0, $STRIDE]}
242
243 # add chacha top-half initial block states
244 @{[vadd_vx $V0, $V0, $CONST_DATA0]}
245 @{[vadd_vx $V1, $V1, $CONST_DATA1]}
246 @{[vadd_vx $V2, $V2, $CONST_DATA2]}
247 @{[vadd_vx $V3, $V3, $CONST_DATA3]}
248 @{[vadd_vx $V4, $V4, $KEY0]}
249 @{[vadd_vx $V5, $V5, $KEY1]}
250 @{[vadd_vx $V6, $V6, $KEY2]}
251 @{[vadd_vx $V7, $V7, $KEY3]}
252 # xor with the top-half input
253 @{[vxor_vv $V16, $V16, $V0]}
254 @{[vxor_vv $V17, $V17, $V1]}
255 @{[vxor_vv $V18, $V18, $V2]}
256 @{[vxor_vv $V19, $V19, $V3]}
257 @{[vxor_vv $V20, $V20, $V4]}
258 @{[vxor_vv $V21, $V21, $V5]}
259 @{[vxor_vv $V22, $V22, $V6]}
260 @{[vxor_vv $V23, $V23, $V7]}
261
262 # save the top-half of output
263 @{[vssseg_nf_e32_v 8, $V16, $OUTPUT, $STRIDE]}
264
265 # add chacha bottom-half initial block states
266 @{[vadd_vx $V8, $V8, $KEY4]}
267 @{[vadd_vx $V9, $V9, $KEY5]}
268 @{[vadd_vx $V10, $V10, $KEY6]}
269 @{[vadd_vx $V11, $V11, $KEY7]}
270 @{[vid_v $V0]}
271 @{[vadd_vx $V12, $V12, $COUNTER0]}
272 @{[vadd_vx $V13, $V13, $COUNTER1]}
273 @{[vadd_vx $V14, $V14, $NONCE0]}
274 @{[vadd_vx $V15, $V15, $NONCE1]}
275 @{[vadd_vv $V12, $V12, $V0]}
276 # xor with the bottom-half input
277 @{[vxor_vv $V24, $V24, $V8]}
278 @{[vxor_vv $V25, $V25, $V9]}
279 @{[vxor_vv $V26, $V26, $V10]}
280 @{[vxor_vv $V27, $V27, $V11]}
281 @{[vxor_vv $V29, $V29, $V13]}
282 @{[vxor_vv $V28, $V28, $V12]}
283 @{[vxor_vv $V30, $V30, $V14]}
284 @{[vxor_vv $V31, $V31, $V15]}
285
286 # save the bottom-half of output
287 addi $T0, $OUTPUT, 32
288 @{[vssseg_nf_e32_v 8, $V24, $T0, $STRIDE]}
289
290 # update counter
291 add $COUNTER0, $COUNTER0, $VL
292 sub $LEN, $LEN, $VL
293 # increase offset for `4 * 16 * VL = 64 * VL`
294 slli $T0, $VL, 6
295 add $INPUT, $INPUT, $T0
296 add $OUTPUT, $OUTPUT, $T0
297 bnez $LEN, .Lblock_loop
298
299 ld s0, 0(sp)
300 ld s1, 8(sp)
301 ld s2, 16(sp)
302 ld s3, 24(sp)
303 ld s4, 32(sp)
304 ld s5, 40(sp)
305 ld s6, 48(sp)
306 ld s7, 56(sp)
307 ld s8, 64(sp)
308 ld s9, 72(sp)
309 ld s10, 80(sp)
310 ld s11, 88(sp)
311 addi sp, sp, 96
312
313 .Lend:
314 ret
315 .size ChaCha20_ctr32_zvkb,.-ChaCha20_ctr32_zvkb
316 ___
317
318 print $code;
319
320 close STDOUT or die "error closing STDOUT: $!";