]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/sm3/asm/sm3-armv8.pl
Copyright year updates
[thirdparty/openssl.git] / crypto / sm3 / asm / sm3-armv8.pl
1 #! /usr/bin/env perl
2 # Copyright 2021-2023 The OpenSSL Project Authors. All Rights Reserved.
3 #
4 # Licensed under the Apache License 2.0 (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
8 #
9 # This module implements support for Armv8 SM3 instructions
10
11 # $output is the last argument if it looks like a file (it has an extension)
12 # $flavour is the first argument if it doesn't look like a file
13 $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
14 $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
15
16 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
17 ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
18 ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
19 die "can't locate arm-xlate.pl";
20
21 open OUT,"| \"$^X\" $xlate $flavour \"$output\""
22 or die "can't call $xlate: $!";
23 *STDOUT=*OUT;
24
25 # Message expanding:
26 # Wj <- P1(W[j-16]^W[j-9]^(W[j-3]<<<15))^(W[j-13]<<<7)^W[j-6]
27 # Input: s0, s1, s2, s3
28 # s0 = w0 | w1 | w2 | w3
29 # s1 = w4 | w5 | w6 | w7
30 # s2 = w8 | w9 | w10 | w11
31 # s3 = w12 | w13 | w14 | w15
32 # Output: s4
33 sub msg_exp () {
34 my $s0 = shift;
35 my $s1 = shift;
36 my $s2 = shift;
37 my $s3 = shift;
38 my $s4 = shift;
39 my $vtmp1 = shift;
40 my $vtmp2 = shift;
41 $code.=<<___;
42 // s4 = w7 | w8 | w9 | w10
43 ext $s4.16b, $s1.16b, $s2.16b, #12
44 // vtmp1 = w3 | w4 | w5 | w6
45 ext $vtmp1.16b, $s0.16b, $s1.16b, #12
46 // vtmp2 = w10 | w11 | w12 | w13
47 ext $vtmp2.16b, $s2.16b, $s3.16b, #8
48 sm3partw1 $s4.4s, $s0.4s, $s3.4s
49 sm3partw2 $s4.4s, $vtmp2.4s, $vtmp1.4s
50 ___
51 }
52
53 # A round of compresson function
54 # Input:
55 # ab - choose instruction among sm3tt1a, sm3tt1b, sm3tt2a, sm3tt2b
56 # vstate0 - vstate1, store digest status(A - H)
57 # vconst0 - vconst1, interleaved used to store Tj <<< j
58 # vtmp - temporary register
59 # vw - for sm3tt1ab, vw = s0 eor s1
60 # s0 - for sm3tt2ab, just be s0
61 # i, choose wj' or wj from vw
62 sub round () {
63 my $ab = shift;
64 my $vstate0 = shift;
65 my $vstate1 = shift;
66 my $vconst0 = shift;
67 my $vconst1 = shift;
68 my $vtmp = shift;
69 my $vw = shift;
70 my $s0 = shift;
71 my $i = shift;
72 $code.=<<___;
73 sm3ss1 $vtmp.4s, $vstate0.4s, $vconst0.4s, $vstate1.4s
74 shl $vconst1.4s, $vconst0.4s, #1
75 sri $vconst1.4s, $vconst0.4s, #31
76 sm3tt1$ab $vstate0.4s, $vtmp.4s, $vw.4s[$i]
77 sm3tt2$ab $vstate1.4s, $vtmp.4s, $s0.4s[$i]
78 ___
79 }
80
81 sub qround () {
82 my $ab = shift;
83 my $vstate0 = shift;
84 my $vstate1 = shift;
85 my $vconst0 = shift;
86 my $vconst1 = shift;
87 my $vtmp1 = shift;
88 my $vtmp2 = shift;
89 my $s0 = shift;
90 my $s1 = shift;
91 my $s2 = shift;
92 my $s3 = shift;
93 my $s4 = shift;
94 if($s4) {
95 &msg_exp($s0, $s1, $s2, $s3, $s4, $vtmp1, $vtmp2);
96 }
97 $code.=<<___;
98 eor $vtmp1.16b, $s0.16b, $s1.16b
99 ___
100 &round($ab, $vstate0, $vstate1, $vconst0, $vconst1, $vtmp2,
101 $vtmp1, $s0, 0);
102 &round($ab, $vstate0, $vstate1, $vconst1, $vconst0, $vtmp2,
103 $vtmp1, $s0, 1);
104 &round($ab, $vstate0, $vstate1, $vconst0, $vconst1, $vtmp2,
105 $vtmp1, $s0, 2);
106 &round($ab, $vstate0, $vstate1, $vconst1, $vconst0, $vtmp2,
107 $vtmp1, $s0, 3);
108 }
109
110 $code=<<___;
111 #include "arm_arch.h"
112 .text
113 ___
114
115 {{{
116 my ($pstate,$pdata,$num)=("x0","x1","w2");
117 my ($state1,$state2)=("v5","v6");
118 my ($sconst1, $sconst2)=("s16","s17");
119 my ($vconst1, $vconst2)=("v16","v17");
120 my ($s0,$s1,$s2,$s3,$s4)=map("v$_",(0..4));
121 my ($bkstate1,$bkstate2)=("v18","v19");
122 my ($vconst_tmp1,$vconst_tmp2)=("v20","v21");
123 my ($vtmp1,$vtmp2)=("v22","v23");
124 my $constaddr="x8";
125 # void ossl_hwsm3_block_data_order(SM3_CTX *c, const void *p, size_t num)
126 $code.=<<___;
127 .globl ossl_hwsm3_block_data_order
128 .type ossl_hwsm3_block_data_order,%function
129 .align 5
130 ossl_hwsm3_block_data_order:
131 AARCH64_VALID_CALL_TARGET
132 // load state
133 ld1 {$state1.4s-$state2.4s}, [$pstate]
134 rev64 $state1.4s, $state1.4s
135 rev64 $state2.4s, $state2.4s
136 ext $state1.16b, $state1.16b, $state1.16b, #8
137 ext $state2.16b, $state2.16b, $state2.16b, #8
138
139 adr $constaddr, .Tj
140 ldp $sconst1, $sconst2, [$constaddr]
141
142 .Loop:
143 // load input
144 ld1 {$s0.16b-$s3.16b}, [$pdata], #64
145 sub $num, $num, #1
146
147 mov $bkstate1.16b, $state1.16b
148 mov $bkstate2.16b, $state2.16b
149
150 #ifndef __ARMEB__
151 rev32 $s0.16b, $s0.16b
152 rev32 $s1.16b, $s1.16b
153 rev32 $s2.16b, $s2.16b
154 rev32 $s3.16b, $s3.16b
155 #endif
156
157 ext $vconst_tmp1.16b, $vconst1.16b, $vconst1.16b, #4
158 ___
159 &qround("a",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
160 $s0,$s1,$s2,$s3,$s4);
161 &qround("a",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
162 $s1,$s2,$s3,$s4,$s0);
163 &qround("a",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
164 $s2,$s3,$s4,$s0,$s1);
165 &qround("a",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
166 $s3,$s4,$s0,$s1,$s2);
167
168 $code.=<<___;
169 ext $vconst_tmp1.16b, $vconst2.16b, $vconst2.16b, #4
170 ___
171
172 &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
173 $s4,$s0,$s1,$s2,$s3);
174 &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
175 $s0,$s1,$s2,$s3,$s4);
176 &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
177 $s1,$s2,$s3,$s4,$s0);
178 &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
179 $s2,$s3,$s4,$s0,$s1);
180 &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
181 $s3,$s4,$s0,$s1,$s2);
182 &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
183 $s4,$s0,$s1,$s2,$s3);
184 &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
185 $s0,$s1,$s2,$s3,$s4);
186 &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
187 $s1,$s2,$s3,$s4,$s0);
188 &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
189 $s2,$s3,$s4,$s0,$s1);
190 &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
191 $s3,$s4);
192 &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
193 $s4,$s0);
194 &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
195 $s0,$s1);
196
197 $code.=<<___;
198 eor $state1.16b, $state1.16b, $bkstate1.16b
199 eor $state2.16b, $state2.16b, $bkstate2.16b
200
201 // any remained blocks?
202 cbnz $num, .Loop
203
204 // save state
205 rev64 $state1.4s, $state1.4s
206 rev64 $state2.4s, $state2.4s
207 ext $state1.16b, $state1.16b, $state1.16b, #8
208 ext $state2.16b, $state2.16b, $state2.16b, #8
209 st1 {$state1.4s-$state2.4s}, [$pstate]
210 ret
211 .size ossl_hwsm3_block_data_order,.-ossl_hwsm3_block_data_order
212
213 .align 3
214 .Tj:
215 .word 0x79cc4519, 0x9d8a7a87
216 ___
217 }}}
218
219 #########################################
220 my %sm3partopcode = (
221 "sm3partw1" => 0xce60C000,
222 "sm3partw2" => 0xce60C400);
223
224 my %sm3ss1opcode = (
225 "sm3ss1" => 0xce400000);
226
227 my %sm3ttopcode = (
228 "sm3tt1a" => 0xce408000,
229 "sm3tt1b" => 0xce408400,
230 "sm3tt2a" => 0xce408800,
231 "sm3tt2b" => 0xce408C00);
232
233 sub unsm3part {
234 my ($mnemonic,$arg)=@_;
235
236 $arg=~ m/[qv](\d+)[^,]*,\s*[qv](\d+)[^,]*,\s*[qv](\d+)/o
237 &&
238 sprintf ".inst\t0x%08x\t//%s %s",
239 $sm3partopcode{$mnemonic}|$1|($2<<5)|($3<<16),
240 $mnemonic,$arg;
241 }
242
243 sub unsm3ss1 {
244 my ($mnemonic,$arg)=@_;
245
246 $arg=~ m/[qv](\d+)[^,]*,\s*[qv](\d+)[^,]*,\s*[qv](\d+)[^,]*,\s*[qv](\d+)/o
247 &&
248 sprintf ".inst\t0x%08x\t//%s %s",
249 $sm3ss1opcode{$mnemonic}|$1|($2<<5)|($3<<16)|($4<<10),
250 $mnemonic,$arg;
251 }
252
253 sub unsm3tt {
254 my ($mnemonic,$arg)=@_;
255
256 $arg=~ m/[qv](\d+)[^,]*,\s*[qv](\d+)[^,]*,\s*[qv](\d+)[^,]*\[([0-3])\]/o
257 &&
258 sprintf ".inst\t0x%08x\t//%s %s",
259 $sm3ttopcode{$mnemonic}|$1|($2<<5)|($3<<16)|($4<<12),
260 $mnemonic,$arg;
261 }
262
263 open SELF,$0;
264 while(<SELF>) {
265 next if (/^#!/);
266 last if (!s/^#/\/\// and !/^$/);
267 print;
268 }
269 close SELF;
270
271 foreach(split("\n",$code)) {
272 s/\`([^\`]*)\`/eval($1)/ge;
273
274 s/\b(sm3partw[1-2])\s+([qv].*)/unsm3part($1,$2)/ge;
275 s/\b(sm3ss1)\s+([qv].*)/unsm3ss1($1,$2)/ge;
276 s/\b(sm3tt[1-2][a-b])\s+([qv].*)/unsm3tt($1,$2)/ge;
277 print $_,"\n";
278 }
279
280 close STDOUT or die "error closing STDOUT: $!";