]>
git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/chacha/asm/chacha-riscv64-zvkb.pl
2 # This file is dual-licensed, meaning that you can use it under your
3 # choice of either of the following two licenses:
5 # Copyright 2023-2023 The OpenSSL Project Authors. All Rights Reserved.
7 # Licensed under the Apache License 2.0 (the "License"). You may not use
8 # this file except in compliance with the License. You can obtain a copy
9 # in the file LICENSE in the source distribution or at
10 # https://www.openssl.org/source/license.html
14 # Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
15 # All rights reserved.
17 # Redistribution and use in source and binary forms, with or without
18 # modification, are permitted provided that the following conditions
20 # 1. Redistributions of source code must retain the above copyright
21 # notice, this list of conditions and the following disclaimer.
22 # 2. Redistributions in binary form must reproduce the above copyright
23 # notice, this list of conditions and the following disclaimer in the
24 # documentation and/or other materials provided with the distribution.
26 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 # - RISC-V Vector ('V') with VLEN >= 128
40 # - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
41 # - RISC-V Zicclsm(Main memory supports misaligned loads/stores)
48 use lib "$Bin/../../perlasm";
51 # $output is the last argument if it looks like a file (it has an extension)
52 # $flavour is the first argument if it doesn't look like a file
53 my $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
54 my $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
56 $output and open STDOUT, ">$output";
62 # void ChaCha20_ctr32_zvkb(unsigned char *out, const unsigned char *inp,
63 # size_t len, const unsigned int key[8],
64 # const unsigned int counter[4]);
65 ################################################################################
66 my ( $OUTPUT, $INPUT, $LEN, $KEY, $COUNTER ) = ( "a0", "a1", "a2", "a3", "a4" );
67 my ( $T0 ) = ( "t0" );
68 my ( $CONST_DATA0, $CONST_DATA1, $CONST_DATA2, $CONST_DATA3 ) =
69 ( "a5", "a6", "a7", "t1" );
70 my ( $KEY0, $KEY1, $KEY2,$KEY3, $KEY4, $KEY5, $KEY6, $KEY7,
71 $COUNTER0, $COUNTER1, $NONCE0, $NONCE1
72 ) = ( "s0", "s1", "s2", "s3", "s4", "s5", "s6",
73 "s7", "s8", "s9", "s10", "s11" );
74 my ( $VL, $STRIDE, $CHACHA_LOOP_COUNT ) = ( "t2", "t3", "t4" );
76 $V0, $V1, $V2, $V3, $V4, $V5, $V6, $V7, $V8, $V9, $V10,
77 $V11, $V12, $V13, $V14, $V15, $V16, $V17, $V18, $V19, $V20, $V21,
78 $V22, $V23, $V24, $V25, $V26, $V27, $V28, $V29, $V30, $V31,
79 ) = map( "v$_", ( 0 .. 31 ) );
81 sub chacha_quad_round_group {
83 $A0, $B0, $C0, $D0, $A1, $B1, $C1, $D1,
84 $A2, $B2, $C2, $D2, $A3, $B3, $C3, $D3
88 # a += b; d ^= a; d <<<= 16;
89 @{[vadd_vv $A0, $A0, $B0]}
90 @{[vadd_vv $A1, $A1, $B1]}
91 @{[vadd_vv $A2, $A2, $B2]}
92 @{[vadd_vv $A3, $A3, $B3]}
93 @{[vxor_vv $D0, $D0, $A0]}
94 @{[vxor_vv $D1, $D1, $A1]}
95 @{[vxor_vv $D2, $D2, $A2]}
96 @{[vxor_vv $D3, $D3, $A3]}
97 @{[vror_vi $D0, $D0, 32 - 16]}
98 @{[vror_vi $D1, $D1, 32 - 16]}
99 @{[vror_vi $D2, $D2, 32 - 16]}
100 @{[vror_vi $D3, $D3, 32 - 16]}
101 # c += d; b ^= c; b <<<= 12;
102 @{[vadd_vv $C0, $C0, $D0]}
103 @{[vadd_vv $C1, $C1, $D1]}
104 @{[vadd_vv $C2, $C2, $D2]}
105 @{[vadd_vv $C3, $C3, $D3]}
106 @{[vxor_vv $B0, $B0, $C0]}
107 @{[vxor_vv $B1, $B1, $C1]}
108 @{[vxor_vv $B2, $B2, $C2]}
109 @{[vxor_vv $B3, $B3, $C3]}
110 @{[vror_vi $B0, $B0, 32 - 12]}
111 @{[vror_vi $B1, $B1, 32 - 12]}
112 @{[vror_vi $B2, $B2, 32 - 12]}
113 @{[vror_vi $B3, $B3, 32 - 12]}
114 # a += b; d ^= a; d <<<= 8;
115 @{[vadd_vv $A0, $A0, $B0]}
116 @{[vadd_vv $A1, $A1, $B1]}
117 @{[vadd_vv $A2, $A2, $B2]}
118 @{[vadd_vv $A3, $A3, $B3]}
119 @{[vxor_vv $D0, $D0, $A0]}
120 @{[vxor_vv $D1, $D1, $A1]}
121 @{[vxor_vv $D2, $D2, $A2]}
122 @{[vxor_vv $D3, $D3, $A3]}
123 @{[vror_vi $D0, $D0, 32 - 8]}
124 @{[vror_vi $D1, $D1, 32 - 8]}
125 @{[vror_vi $D2, $D2, 32 - 8]}
126 @{[vror_vi $D3, $D3, 32 - 8]}
127 # c += d; b ^= c; b <<<= 7;
128 @{[vadd_vv $C0, $C0, $D0]}
129 @{[vadd_vv $C1, $C1, $D1]}
130 @{[vadd_vv $C2, $C2, $D2]}
131 @{[vadd_vv $C3, $C3, $D3]}
132 @{[vxor_vv $B0, $B0, $C0]}
133 @{[vxor_vv $B1, $B1, $C1]}
134 @{[vxor_vv $B2, $B2, $C2]}
135 @{[vxor_vv $B3, $B3, $C3]}
136 @{[vror_vi $B0, $B0, 32 - 7]}
137 @{[vror_vi $B1, $B1, 32 - 7]}
138 @{[vror_vi $B2, $B2, 32 - 7]}
139 @{[vror_vi $B3, $B3, 32 - 7]}
147 .globl ChaCha20_ctr32_zvkb
148 .type ChaCha20_ctr32_zvkb,\@function
169 #### chacha block data
170 # "expa" little endian
171 li $CONST_DATA0, 0x61707865
172 # "nd 3" little endian
173 li $CONST_DATA1, 0x3320646e
174 # "2-by" little endian
175 li $CONST_DATA2, 0x79622d32
176 # "te k" little endian
177 li $CONST_DATA3, 0x6b206574
188 lw $COUNTER0, 0($COUNTER)
189 lw $COUNTER1, 4($COUNTER)
190 lw $NONCE0, 8($COUNTER)
191 lw $NONCE1, 12($COUNTER)
194 @{[vsetvli $VL, $LEN, "e32", "m1", "ta", "ma"]}
196 # init chacha const states
197 @{[vmv_v_x $V0, $CONST_DATA0]}
198 @{[vmv_v_x $V1, $CONST_DATA1]}
199 @{[vmv_v_x $V2, $CONST_DATA2]}
200 @{[vmv_v_x $V3, $CONST_DATA3]}
202 # init chacha key states
203 @{[vmv_v_x $V4, $KEY0]}
204 @{[vmv_v_x $V5, $KEY1]}
205 @{[vmv_v_x $V6, $KEY2]}
206 @{[vmv_v_x $V7, $KEY3]}
207 @{[vmv_v_x $V8, $KEY4]}
208 @{[vmv_v_x $V9, $KEY5]}
209 @{[vmv_v_x $V10, $KEY6]}
210 @{[vmv_v_x $V11, $KEY7]}
212 # init chacha key states
214 @{[vadd_vx $V12, $V12, $COUNTER0]}
215 @{[vmv_v_x $V13, $COUNTER1]}
217 # init chacha nonce states
218 @{[vmv_v_x $V14, $NONCE0]}
219 @{[vmv_v_x $V15, $NONCE1]}
221 # load the top-half of input data
222 @{[vlsseg_nf_e32_v 8, $V16, $INPUT, $STRIDE]}
224 li $CHACHA_LOOP_COUNT, 10
226 addi $CHACHA_LOOP_COUNT, $CHACHA_LOOP_COUNT, -1
227 @{[chacha_quad_round_group
230 $V2, $V6, $V10, $V14,
231 $V3, $V7, $V11, $V15]}
232 @{[chacha_quad_round_group
233 $V0, $V5, $V10, $V15,
234 $V1, $V6, $V11, $V12,
236 $V3, $V4, $V9, $V14]}
237 bnez $CHACHA_LOOP_COUNT, .Lround_loop
239 # load the bottom-half of input data
241 @{[vlsseg_nf_e32_v 8, $V24, $T0, $STRIDE]}
243 # add chacha top-half initial block states
244 @{[vadd_vx $V0, $V0, $CONST_DATA0]}
245 @{[vadd_vx $V1, $V1, $CONST_DATA1]}
246 @{[vadd_vx $V2, $V2, $CONST_DATA2]}
247 @{[vadd_vx $V3, $V3, $CONST_DATA3]}
248 @{[vadd_vx $V4, $V4, $KEY0]}
249 @{[vadd_vx $V5, $V5, $KEY1]}
250 @{[vadd_vx $V6, $V6, $KEY2]}
251 @{[vadd_vx $V7, $V7, $KEY3]}
252 # xor with the top-half input
253 @{[vxor_vv $V16, $V16, $V0]}
254 @{[vxor_vv $V17, $V17, $V1]}
255 @{[vxor_vv $V18, $V18, $V2]}
256 @{[vxor_vv $V19, $V19, $V3]}
257 @{[vxor_vv $V20, $V20, $V4]}
258 @{[vxor_vv $V21, $V21, $V5]}
259 @{[vxor_vv $V22, $V22, $V6]}
260 @{[vxor_vv $V23, $V23, $V7]}
262 # save the top-half of output
263 @{[vssseg_nf_e32_v 8, $V16, $OUTPUT, $STRIDE]}
265 # add chacha bottom-half initial block states
266 @{[vadd_vx $V8, $V8, $KEY4]}
267 @{[vadd_vx $V9, $V9, $KEY5]}
268 @{[vadd_vx $V10, $V10, $KEY6]}
269 @{[vadd_vx $V11, $V11, $KEY7]}
271 @{[vadd_vx $V12, $V12, $COUNTER0]}
272 @{[vadd_vx $V13, $V13, $COUNTER1]}
273 @{[vadd_vx $V14, $V14, $NONCE0]}
274 @{[vadd_vx $V15, $V15, $NONCE1]}
275 @{[vadd_vv $V12, $V12, $V0]}
276 # xor with the bottom-half input
277 @{[vxor_vv $V24, $V24, $V8]}
278 @{[vxor_vv $V25, $V25, $V9]}
279 @{[vxor_vv $V26, $V26, $V10]}
280 @{[vxor_vv $V27, $V27, $V11]}
281 @{[vxor_vv $V29, $V29, $V13]}
282 @{[vxor_vv $V28, $V28, $V12]}
283 @{[vxor_vv $V30, $V30, $V14]}
284 @{[vxor_vv $V31, $V31, $V15]}
286 # save the bottom-half of output
287 addi $T0, $OUTPUT, 32
288 @{[vssseg_nf_e32_v 8, $V24, $T0, $STRIDE]}
291 add $COUNTER0, $COUNTER0, $VL
293 # increase offset for `4 * 16 * VL = 64 * VL`
295 add $INPUT, $INPUT, $T0
296 add $OUTPUT, $OUTPUT, $T0
297 bnez $LEN, .Lblock_loop
315 .size ChaCha20_ctr32_zvkb,.-ChaCha20_ctr32_zvkb
320 close STDOUT or die "error closing STDOUT: $!";