]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/chacha/asm/chacha-armv8-sve.pl
Copyright year updates
[thirdparty/openssl.git] / crypto / chacha / asm / chacha-armv8-sve.pl
CommitLineData
b1b2146d 1#! /usr/bin/env perl
da1c088f 2# Copyright 2022-2023 The OpenSSL Project Authors. All Rights Reserved.
b1b2146d
DH
3#
4# Licensed under the Apache License 2.0 (the "License"). You may not use
5# this file except in compliance with the License. You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8#
9#
10# ChaCha20 for ARMv8 via SVE
11#
12# $output is the last argument if it looks like a file (it has an extension)
13# $flavour is the first argument if it doesn't look like a file
14$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
15$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
16
17$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
18( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
19( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
20die "can't locate arm-xlate.pl";
21
22open OUT,"| \"$^X\" $xlate $flavour \"$output\""
23 or die "can't call $xlate: $!";
24*STDOUT=*OUT;
25
26sub AUTOLOAD() # thunk [simplified] x86-style perlasm
27{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./;
28 my $arg = pop;
29 $arg = "#$arg" if ($arg*1 eq $arg);
30 $code .= "\t$opcode\t".join(',',@_,$arg)."\n";
31}
32
33my ($outp,$inp,$len,$key,$ctr) = map("x$_",(0..4));
3f42f41a
DH
34my ($veclen) = ("x5");
35my ($counter) = ("x6");
36my ($counter_w) = ("w6");
37my @xx=(7..22);
38my @sxx=map("x$_",@xx);
39my @sx=map("w$_",@xx);
40my @K=map("x$_",(23..30));
41my @elem=(0,4,8,12,1,5,9,13,2,6,10,14,3,7,11,15);
42my @KL=map("w$_",(23..30));
43my @mx=map("z$_",@elem);
44my @vx=map("v$_",@elem);
b1b2146d
DH
45my ($xa0,$xa1,$xa2,$xa3, $xb0,$xb1,$xb2,$xb3,
46 $xc0,$xc1,$xc2,$xc3, $xd0,$xd1,$xd2,$xd3) = @mx;
bcb52bcc 47my ($zctr) = ("z16");
3f42f41a
DH
48my @tt=(17..24);
49my @xt=map("z$_",@tt);
50my @vt=map("v$_",@tt);
bcb52bcc
DH
51my @perm=map("z$_",(25..30));
52my ($rot8) = ("z31");
3f42f41a 53my @bak=(@perm[0],@perm[1],@perm[2],@perm[3],@perm[4],@perm[5],@xt[4],@xt[5],@xt[6],@xt[7],@xt[0],@xt[1],$zctr,@xt[2],@xt[3],$rot8);
b1b2146d
DH
54my $debug_encoder=0;
55
56sub SVE_ADD() {
57 my $x = shift;
58 my $y = shift;
59
60$code.=<<___;
61 add @mx[$x].s,@mx[$x].s,@mx[$y].s
3f42f41a
DH
62 .if mixin == 1
63 add @sx[$x],@sx[$x],@sx[$y]
64 .endif
b1b2146d
DH
65___
66 if (@_) {
67 &SVE_ADD(@_);
68 }
69}
70
71sub SVE_EOR() {
72 my $x = shift;
73 my $y = shift;
74
75$code.=<<___;
76 eor @mx[$x].d,@mx[$x].d,@mx[$y].d
3f42f41a
DH
77 .if mixin == 1
78 eor @sx[$x],@sx[$x],@sx[$y]
79 .endif
b1b2146d
DH
80___
81 if (@_) {
82 &SVE_EOR(@_);
83 }
84}
85
86sub SVE_LSL() {
87 my $bits = shift;
88 my $x = shift;
89 my $y = shift;
90 my $next = $x + 1;
91
92$code.=<<___;
93 lsl @xt[$x].s,@mx[$y].s,$bits
94___
95 if (@_) {
96 &SVE_LSL($bits,$next,@_);
97 }
98}
99
100sub SVE_LSR() {
101 my $bits = shift;
102 my $x = shift;
103
104$code.=<<___;
105 lsr @mx[$x].s,@mx[$x].s,$bits
3f42f41a
DH
106 .if mixin == 1
107 ror @sx[$x],@sx[$x],$bits
108 .endif
b1b2146d
DH
109___
110 if (@_) {
111 &SVE_LSR($bits,@_);
112 }
113}
114
115sub SVE_ORR() {
116 my $x = shift;
117 my $y = shift;
118 my $next = $x + 1;
119
120$code.=<<___;
121 orr @mx[$y].d,@mx[$y].d,@xt[$x].d
122___
123 if (@_) {
124 &SVE_ORR($next,@_);
125 }
126}
127
128sub SVE_REV16() {
129 my $x = shift;
130
131$code.=<<___;
132 revh @mx[$x].s,p0/m,@mx[$x].s
3f42f41a
DH
133 .if mixin == 1
134 ror @sx[$x],@sx[$x],#16
135 .endif
b1b2146d
DH
136___
137 if (@_) {
138 &SVE_REV16(@_);
139 }
140}
141
142sub SVE_ROT8() {
143 my $x = shift;
144
145$code.=<<___;
146 tbl @mx[$x].b,{@mx[$x].b},$rot8.b
3f42f41a
DH
147 .if mixin == 1
148 ror @sx[$x],@sx[$x],#24
149 .endif
b1b2146d
DH
150___
151 if (@_) {
152 &SVE_ROT8(@_);
153 }
154}
155
156sub SVE2_XAR() {
157 my $bits = shift;
158 my $x = shift;
159 my $y = shift;
160 my $rbits = 32-$bits;
161
162$code.=<<___;
3f42f41a
DH
163 .if mixin == 1
164 eor @sx[$x],@sx[$x],@sx[$y]
165 .endif
b1b2146d 166 xar @mx[$x].s,@mx[$x].s,@mx[$y].s,$rbits
3f42f41a
DH
167 .if mixin == 1
168 ror @sx[$x],@sx[$x],$rbits
169 .endif
b1b2146d
DH
170___
171 if (@_) {
172 &SVE2_XAR($bits,@_);
173 }
174}
175
3f42f41a
DH
176sub SVE2_QR_GROUP() {
177 my ($a0,$b0,$c0,$d0,$a1,$b1,$c1,$d1,$a2,$b2,$c2,$d2,$a3,$b3,$c3,$d3) = @_;
178
179 &SVE_ADD($a0,$b0,$a1,$b1,$a2,$b2,$a3,$b3);
180 &SVE2_XAR(16,$d0,$a0,$d1,$a1,$d2,$a2,$d3,$a3);
181
182 &SVE_ADD($c0,$d0,$c1,$d1,$c2,$d2,$c3,$d3);
183 &SVE2_XAR(12,$b0,$c0,$b1,$c1,$b2,$c2,$b3,$c3);
184
185 &SVE_ADD($a0,$b0,$a1,$b1,$a2,$b2,$a3,$b3);
186 &SVE2_XAR(8,$d0,$a0,$d1,$a1,$d2,$a2,$d3,$a3);
187
188 &SVE_ADD($c0,$d0,$c1,$d1,$c2,$d2,$c3,$d3);
189 &SVE2_XAR(7,$b0,$c0,$b1,$c1,$b2,$c2,$b3,$c3);
190}
191
b1b2146d 192sub SVE_QR_GROUP() {
b1b2146d
DH
193 my ($a0,$b0,$c0,$d0,$a1,$b1,$c1,$d1,$a2,$b2,$c2,$d2,$a3,$b3,$c3,$d3) = @_;
194
195 &SVE_ADD($a0,$b0,$a1,$b1,$a2,$b2,$a3,$b3);
3f42f41a
DH
196 &SVE_EOR($d0,$a0,$d1,$a1,$d2,$a2,$d3,$a3);
197 &SVE_REV16($d0,$d1,$d2,$d3);
b1b2146d
DH
198
199 &SVE_ADD($c0,$d0,$c1,$d1,$c2,$d2,$c3,$d3);
3f42f41a
DH
200 &SVE_EOR($b0,$c0,$b1,$c1,$b2,$c2,$b3,$c3);
201 &SVE_LSL(12,0,$b0,$b1,$b2,$b3);
202 &SVE_LSR(20,$b0,$b1,$b2,$b3);
203 &SVE_ORR(0,$b0,$b1,$b2,$b3);
b1b2146d
DH
204
205 &SVE_ADD($a0,$b0,$a1,$b1,$a2,$b2,$a3,$b3);
3f42f41a
DH
206 &SVE_EOR($d0,$a0,$d1,$a1,$d2,$a2,$d3,$a3);
207 &SVE_ROT8($d0,$d1,$d2,$d3);
b1b2146d
DH
208
209 &SVE_ADD($c0,$d0,$c1,$d1,$c2,$d2,$c3,$d3);
3f42f41a
DH
210 &SVE_EOR($b0,$c0,$b1,$c1,$b2,$c2,$b3,$c3);
211 &SVE_LSL(7,0,$b0,$b1,$b2,$b3);
212 &SVE_LSR(25,$b0,$b1,$b2,$b3);
213 &SVE_ORR(0,$b0,$b1,$b2,$b3);
b1b2146d
DH
214}
215
216sub SVE_INNER_BLOCK() {
217$code.=<<___;
bcb52bcc 218 mov $counter,#10
3f42f41a 21910:
bcb52bcc 220.align 5
b1b2146d 221___
3f42f41a
DH
222 &SVE_QR_GROUP(0,4,8,12,1,5,9,13,2,6,10,14,3,7,11,15);
223 &SVE_QR_GROUP(0,5,10,15,1,6,11,12,2,7,8,13,3,4,9,14);
b1b2146d 224$code.=<<___;
3f42f41a
DH
225 sub $counter,$counter,1
226 cbnz $counter,10b
b1b2146d 227___
bcb52bcc
DH
228}
229
230sub SVE2_INNER_BLOCK() {
b1b2146d 231$code.=<<___;
bcb52bcc 232 mov $counter,#10
3f42f41a 23310:
bcb52bcc
DH
234.align 5
235___
3f42f41a
DH
236 &SVE2_QR_GROUP(0,4,8,12,1,5,9,13,2,6,10,14,3,7,11,15);
237 &SVE2_QR_GROUP(0,5,10,15,1,6,11,12,2,7,8,13,3,4,9,14);
bcb52bcc 238$code.=<<___;
3f42f41a
DH
239 sub $counter,$counter,1
240 cbnz $counter,10b
b1b2146d
DH
241___
242}
243
3f42f41a
DH
244sub load_regs() {
245 my $offset = shift;
246 my $reg = shift;
247 my $next_offset = $offset + 1;
248$code.=<<___;
249 ld1w {$reg.s},p0/z,[$inp,#$offset,MUL VL]
cd7a8e96 250#ifdef __AARCH64EB__
251 revb $reg.s,p0/m,$reg.s
252#endif
3f42f41a
DH
253___
254 if (@_) {
255 &load_regs($next_offset, @_);
256 } else {
b1b2146d 257$code.=<<___;
3f42f41a 258 addvl $inp,$inp,$next_offset
b1b2146d 259___
3f42f41a 260 }
b1b2146d
DH
261}
262
3f42f41a
DH
263sub load() {
264 if (@_) {
265 &load_regs(0, @_);
266 }
267}
b1b2146d 268
3f42f41a
DH
269sub store_regs() {
270 my $offset = shift;
271 my $reg = shift;
272 my $next_offset = $offset + 1;
b1b2146d 273$code.=<<___;
cd7a8e96 274#ifdef __AARCH64EB__
275 revb $reg.s,p0/m,$reg.s
276#endif
3f42f41a 277 st1w {$reg.s},p0,[$outp,#$offset,MUL VL]
b1b2146d 278___
3f42f41a
DH
279 if (@_) {
280 &store_regs($next_offset, @_);
281 } else {
282$code.=<<___;
283 addvl $outp,$outp,$next_offset
284___
285 }
286}
287
288sub store() {
289 if (@_) {
290 &store_regs(0, @_);
291 }
b1b2146d
DH
292}
293
294sub transpose() {
295 my $xa = shift;
296 my $xb = shift;
297 my $xc = shift;
298 my $xd = shift;
3f42f41a
DH
299 my $xa1 = shift;
300 my $xb1 = shift;
301 my $xc1 = shift;
302 my $xd1 = shift;
303$code.=<<___;
304 zip1 @xt[0].s,$xa.s,$xb.s
305 zip2 @xt[1].s,$xa.s,$xb.s
306 zip1 @xt[2].s,$xc.s,$xd.s
307 zip2 @xt[3].s,$xc.s,$xd.s
308
309 zip1 @xt[4].s,$xa1.s,$xb1.s
310 zip2 @xt[5].s,$xa1.s,$xb1.s
311 zip1 @xt[6].s,$xc1.s,$xd1.s
312 zip2 @xt[7].s,$xc1.s,$xd1.s
313
314 zip1 $xa.d,@xt[0].d,@xt[2].d
315 zip2 $xb.d,@xt[0].d,@xt[2].d
316 zip1 $xc.d,@xt[1].d,@xt[3].d
317 zip2 $xd.d,@xt[1].d,@xt[3].d
318
319 zip1 $xa1.d,@xt[4].d,@xt[6].d
320 zip2 $xb1.d,@xt[4].d,@xt[6].d
321 zip1 $xc1.d,@xt[5].d,@xt[7].d
322 zip2 $xd1.d,@xt[5].d,@xt[7].d
323___
324}
325
326sub ACCUM() {
327 my $idx0 = shift;
328 my $idx1 = $idx0 + 1;
329 my $x0 = @sx[$idx0];
330 my $xx0 = @sxx[$idx0];
331 my $x1 = @sx[$idx1];
332 my $xx1 = @sxx[$idx1];
333 my $d = $idx0/2;
334 my ($tmp,$tmpw) = ($counter,$counter_w);
335 my $bk0 = @_ ? shift : @bak[$idx0];
336 my $bk1 = @_ ? shift : @bak[$idx1];
337
338$code.=<<___;
339 .if mixin == 1
340 add @sx[$idx0],@sx[$idx0],@KL[$d]
341 .endif
342 add @mx[$idx0].s,@mx[$idx0].s,$bk0.s
343 .if mixin == 1
344 add @sxx[$idx1],@sxx[$idx1],@K[$d],lsr #32
345 .endif
346 add @mx[$idx1].s,@mx[$idx1].s,$bk1.s
347 .if mixin == 1
348 add @sxx[$idx0],@sxx[$idx0],$sxx[$idx1],lsl #32 // pack
349 .endif
350___
351}
352
353sub SCA_INP() {
354 my $idx0 = shift;
355 my $idx1 = $idx0 + 2;
356$code.=<<___;
357 .if mixin == 1
358 ldp @sxx[$idx0],@sxx[$idx1],[$inp],#16
359 .endif
360___
361}
362
363sub SVE_ACCUM_STATES() {
364 my ($tmp,$tmpw) = ($counter,$counter_w);
365
366$code.=<<___;
367 lsr $tmp,@K[5],#32
368 dup @bak[10].s,@KL[5]
369 dup @bak[11].s,$tmpw
370 lsr $tmp,@K[6],#32
371 dup @bak[13].s,$tmpw
372 lsr $tmp,@K[7],#32
373___
374 &ACCUM(0);
375 &ACCUM(2);
376 &SCA_INP(1);
377 &ACCUM(4);
378 &ACCUM(6);
379 &SCA_INP(5);
380 &ACCUM(8);
381 &ACCUM(10);
382 &SCA_INP(9);
383$code.=<<___;
384 dup @bak[14].s,@KL[7]
385 dup @bak[0].s,$tmpw // bak[15] not available for SVE
386___
387 &ACCUM(12);
388 &ACCUM(14, @bak[14],@bak[0]);
389 &SCA_INP(13);
390}
391
392sub SVE2_ACCUM_STATES() {
393 &ACCUM(0);
394 &ACCUM(2);
395 &SCA_INP(1);
396 &ACCUM(4);
397 &ACCUM(6);
398 &SCA_INP(5);
399 &ACCUM(8);
400 &ACCUM(10);
401 &SCA_INP(9);
402 &ACCUM(12);
403 &ACCUM(14);
404 &SCA_INP(13);
405}
406
407sub SCA_EOR() {
408 my $idx0 = shift;
409 my $idx1 = $idx0 + 1;
410$code.=<<___;
411 .if mixin == 1
412 eor @sxx[$idx0],@sxx[$idx0],@sxx[$idx1]
413 .endif
414___
415}
416
417sub SCA_SAVE() {
418 my $idx0 = shift;
419 my $idx1 = shift;
420$code.=<<___;
421 .if mixin == 1
422 stp @sxx[$idx0],@sxx[$idx1],[$outp],#16
423 .endif
424___
425}
b1b2146d 426
3f42f41a
DH
427sub SVE_VL128_TRANSFORMS() {
428 &SCA_EOR(0);
429 &SCA_EOR(2);
430 &SCA_EOR(4);
431 &transpose($xa0,$xa1,$xa2,$xa3,$xb0,$xb1,$xb2,$xb3);
432 &SCA_EOR(6);
433 &SCA_EOR(8);
434 &SCA_EOR(10);
435 &transpose($xc0,$xc1,$xc2,$xc3,$xd0,$xd1,$xd2,$xd3);
436 &SCA_EOR(12);
437 &SCA_EOR(14);
438$code.=<<___;
439 ld1 {@vt[0].4s-@vt[3].4s},[$inp],#64
440 ld1 {@vt[4].4s-@vt[7].4s},[$inp],#64
441 eor $xa0.d,$xa0.d,@xt[0].d
442 eor $xb0.d,$xb0.d,@xt[1].d
443 eor $xc0.d,$xc0.d,@xt[2].d
444 eor $xd0.d,$xd0.d,@xt[3].d
445 eor $xa1.d,$xa1.d,@xt[4].d
446 eor $xb1.d,$xb1.d,@xt[5].d
447 eor $xc1.d,$xc1.d,@xt[6].d
448 eor $xd1.d,$xd1.d,@xt[7].d
449 ld1 {@vt[0].4s-@vt[3].4s},[$inp],#64
450 ld1 {@vt[4].4s-@vt[7].4s},[$inp],#64
451___
452 &SCA_SAVE(0,2);
453$code.=<<___;
454 eor $xa2.d,$xa2.d,@xt[0].d
455 eor $xb2.d,$xb2.d,@xt[1].d
456___
457 &SCA_SAVE(4,6);
458$code.=<<___;
459 eor $xc2.d,$xc2.d,@xt[2].d
460 eor $xd2.d,$xd2.d,@xt[3].d
461___
462 &SCA_SAVE(8,10);
b1b2146d 463$code.=<<___;
3f42f41a
DH
464 eor $xa3.d,$xa3.d,@xt[4].d
465 eor $xb3.d,$xb3.d,@xt[5].d
466___
467 &SCA_SAVE(12,14);
468$code.=<<___;
469 eor $xc3.d,$xc3.d,@xt[6].d
470 eor $xd3.d,$xd3.d,@xt[7].d
471 st1 {@vx[0].4s-@vx[12].4s},[$outp],#64
472 st1 {@vx[1].4s-@vx[13].4s},[$outp],#64
473 st1 {@vx[2].4s-@vx[14].4s},[$outp],#64
474 st1 {@vx[3].4s-@vx[15].4s},[$outp],#64
b1b2146d
DH
475___
476}
477
478sub SVE_TRANSFORMS() {
3f42f41a
DH
479$code.=<<___;
480#ifdef __AARCH64EB__
cd7a8e96 481 rev @sxx[0],@sxx[0]
482 rev @sxx[2],@sxx[2]
483 rev @sxx[4],@sxx[4]
484 rev @sxx[6],@sxx[6]
485 rev @sxx[8],@sxx[8]
486 rev @sxx[10],@sxx[10]
487 rev @sxx[12],@sxx[12]
488 rev @sxx[14],@sxx[14]
3f42f41a
DH
489#endif
490 .if mixin == 1
491 add @K[6],@K[6],#1
492 .endif
493 cmp $veclen,4
494 b.ne 200f
495___
496 &SVE_VL128_TRANSFORMS();
497$code.=<<___;
498 b 210f
499200:
b1b2146d 500___
3f42f41a
DH
501 &transpose($xa0,$xb0,$xc0,$xd0,$xa1,$xb1,$xc1,$xd1);
502 &SCA_EOR(0);
503 &SCA_EOR(2);
504 &transpose($xa2,$xb2,$xc2,$xd2,$xa3,$xb3,$xc3,$xd3);
505 &SCA_EOR(4);
506 &SCA_EOR(6);
507 &transpose($xa0,$xa1,$xa2,$xa3,$xb0,$xb1,$xb2,$xb3);
508 &SCA_EOR(8);
509 &SCA_EOR(10);
510 &transpose($xc0,$xc1,$xc2,$xc3,$xd0,$xd1,$xd2,$xd3);
511 &SCA_EOR(12);
512 &SCA_EOR(14);
513 &load(@xt[0],@xt[1],@xt[2],@xt[3],@xt[4],@xt[5],@xt[6],@xt[7]);
514$code.=<<___;
515 eor $xa0.d,$xa0.d,@xt[0].d
516 eor $xa1.d,$xa1.d,@xt[1].d
517 eor $xa2.d,$xa2.d,@xt[2].d
518 eor $xa3.d,$xa3.d,@xt[3].d
519 eor $xb0.d,$xb0.d,@xt[4].d
520 eor $xb1.d,$xb1.d,@xt[5].d
521 eor $xb2.d,$xb2.d,@xt[6].d
522 eor $xb3.d,$xb3.d,@xt[7].d
523___
524 &load(@xt[0],@xt[1],@xt[2],@xt[3],@xt[4],@xt[5],@xt[6],@xt[7]);
525 &SCA_SAVE(0,2);
526$code.=<<___;
527 eor $xc0.d,$xc0.d,@xt[0].d
528 eor $xc1.d,$xc1.d,@xt[1].d
529___
530 &SCA_SAVE(4,6);
531$code.=<<___;
532 eor $xc2.d,$xc2.d,@xt[2].d
533 eor $xc3.d,$xc3.d,@xt[3].d
534___
535 &SCA_SAVE(8,10);
536$code.=<<___;
537 eor $xd0.d,$xd0.d,@xt[4].d
538 eor $xd1.d,$xd1.d,@xt[5].d
539___
540 &SCA_SAVE(12,14);
541$code.=<<___;
542 eor $xd2.d,$xd2.d,@xt[6].d
543 eor $xd3.d,$xd3.d,@xt[7].d
544___
545 &store($xa0,$xa1,$xa2,$xa3,$xb0,$xb1,$xb2,$xb3);
b1b2146d
DH
546 &store($xc0,$xc1,$xc2,$xc3,$xd0,$xd1,$xd2,$xd3);
547$code.=<<___;
3f42f41a
DH
548210:
549 incw @K[6], ALL, MUL #1
550___
551}
552
553sub SET_STATE_BAK() {
554 my $idx0 = shift;
555 my $idx1 = $idx0 + 1;
556 my $x0 = @sx[$idx0];
557 my $xx0 = @sxx[$idx0];
558 my $x1 = @sx[$idx1];
559 my $xx1 = @sxx[$idx1];
560 my $d = $idx0/2;
561
562$code.=<<___;
563 lsr $xx1,@K[$d],#32
564 dup @mx[$idx0].s,@KL[$d]
565 dup @bak[$idx0].s,@KL[$d]
566 .if mixin == 1
567 mov $x0,@KL[$d]
568 .endif
569 dup @mx[$idx1].s,$x1
570 dup @bak[$idx1].s,$x1
571___
572}
573
574sub SET_STATE() {
575 my $idx0 = shift;
576 my $idx1 = $idx0 + 1;
577 my $x0 = @sx[$idx0];
578 my $xx0 = @sxx[$idx0];
579 my $x1 = @sx[$idx1];
580 my $xx1 = @sxx[$idx1];
581 my $d = $idx0/2;
582
583$code.=<<___;
584 lsr $xx1,@K[$d],#32
585 dup @mx[$idx0].s,@KL[$d]
586 .if mixin == 1
587 mov $x0,@KL[$d]
588 .endif
589 dup @mx[$idx1].s,$x1
b1b2146d
DH
590___
591}
b1b2146d
DH
592
593sub SVE_LOAD_STATES() {
3f42f41a
DH
594 &SET_STATE_BAK(0);
595 &SET_STATE_BAK(2);
596 &SET_STATE_BAK(4);
597 &SET_STATE_BAK(6);
598 &SET_STATE_BAK(8);
599 &SET_STATE(10);
600 &SET_STATE(14);
b1b2146d 601$code.=<<___;
3f42f41a
DH
602 .if mixin == 1
603 add @sx[13],@KL[6],#1
604 mov @sx[12],@KL[6]
605 index $zctr.s,@sx[13],1
606 index @mx[12].s,@sx[13],1
607 .else
608 index $zctr.s,@KL[6],1
609 index @mx[12].s,@KL[6],1
610 .endif
611 lsr @sxx[13],@K[6],#32
612 dup @mx[13].s,@sx[13]
b1b2146d 613___
bcb52bcc
DH
614}
615
616sub SVE2_LOAD_STATES() {
3f42f41a
DH
617 &SET_STATE_BAK(0);
618 &SET_STATE_BAK(2);
619 &SET_STATE_BAK(4);
620 &SET_STATE_BAK(6);
621 &SET_STATE_BAK(8);
622 &SET_STATE_BAK(10);
623 &SET_STATE_BAK(14);
624
b1b2146d 625$code.=<<___;
3f42f41a
DH
626 .if mixin == 1
627 add @sx[13],@KL[6],#1
628 mov @sx[12],@KL[6]
629 index $zctr.s,@sx[13],1
630 index @mx[12].s,@sx[13],1
631 .else
632 index $zctr.s,@KL[6],1
633 index @mx[12].s,@KL[6],1
634 .endif
635 lsr @sxx[13],@K[6],#32
636 dup @mx[13].s,@sx[13]
637 dup @bak[13].s,@sx[13]
b1b2146d 638___
3f42f41a
DH
639}
640
641sub chacha20_sve() {
642 my ($tmp) = (@sxx[0]);
643
bcb52bcc 644$code.=<<___;
3f42f41a
DH
645.align 5
646100:
647 subs $tmp,$len,$veclen,lsl #6
648 b.lt 110f
649 mov $len,$tmp
650 b.eq 101f
651 cmp $len,64
652 b.lt 101f
653 mixin=1
bcb52bcc
DH
654___
655 &SVE_LOAD_STATES();
b1b2146d 656 &SVE_INNER_BLOCK();
3f42f41a
DH
657 &SVE_ACCUM_STATES();
658 &SVE_TRANSFORMS();
b1b2146d 659$code.=<<___;
3f42f41a
DH
660 subs $len,$len,64
661 b.gt 100b
662 b 110f
663101:
664 mixin=0
b1b2146d 665___
3f42f41a
DH
666 &SVE_LOAD_STATES();
667 &SVE_INNER_BLOCK();
668 &SVE_ACCUM_STATES();
b1b2146d 669 &SVE_TRANSFORMS();
3f42f41a
DH
670$code.=<<___;
671110:
672___
b1b2146d
DH
673}
674
3f42f41a
DH
675sub chacha20_sve2() {
676 my ($tmp) = (@sxx[0]);
677
b1b2146d
DH
678$code.=<<___;
679.align 5
3f42f41a
DH
680100:
681 subs $tmp,$len,$veclen,lsl #6
682 b.lt 110f
683 mov $len,$tmp
684 b.eq 101f
685 cmp $len,64
686 b.lt 101f
687 mixin=1
b1b2146d 688___
3f42f41a
DH
689 &SVE2_LOAD_STATES();
690 &SVE2_INNER_BLOCK();
691 &SVE2_ACCUM_STATES();
692 &SVE_TRANSFORMS();
b1b2146d 693$code.=<<___;
3f42f41a
DH
694 subs $len,$len,64
695 b.gt 100b
696 b 110f
697101:
698 mixin=0
699___
700 &SVE2_LOAD_STATES();
701 &SVE2_INNER_BLOCK();
702 &SVE2_ACCUM_STATES();
703 &SVE_TRANSFORMS();
704$code.=<<___;
705110:
b1b2146d
DH
706___
707}
708
3f42f41a 709
b1b2146d 710{{{
3f42f41a
DH
711 my ($tmp,$tmpw) = ("x6", "w6");
712 my ($tmpw0,$tmp0,$tmpw1,$tmp1) = ("w9","x9", "w10","x10");
713 my ($sve2flag) = ("x7");
714
b1b2146d
DH
715$code.=<<___;
716#include "arm_arch.h"
717
718.arch armv8-a
719
b1b2146d
DH
720.extern OPENSSL_armcap_P
721.hidden OPENSSL_armcap_P
b1b2146d
DH
722
723.text
724.align 5
725.Lchacha20_consts:
bcb52bcc 726.quad 0x3320646e61707865,0x6b20657479622d32 // endian-neutral
b1b2146d
DH
727.Lrot8:
728 .word 0x02010003,0x04040404,0x02010003,0x04040404
729.globl ChaCha20_ctr32_sve
730.type ChaCha20_ctr32_sve,%function
731.align 5
732ChaCha20_ctr32_sve:
733 AARCH64_VALID_CALL_TARGET
bcb52bcc 734 cntw $veclen, ALL, MUL #1
3f42f41a 735 cmp $len,$veclen,lsl #6
b1b2146d 736 b.lt .Lreturn
b1b2146d
DH
737 mov $sve2flag,0
738 adrp $tmp,OPENSSL_armcap_P
739 ldr $tmpw,[$tmp,#:lo12:OPENSSL_armcap_P]
740 tst $tmpw,#ARMV8_SVE2
741 b.eq 1f
742 mov $sve2flag,1
bcb52bcc 743 b 2f
b1b2146d 7441:
bcb52bcc
DH
745 cmp $veclen,4
746 b.le .Lreturn
747 adr $tmp,.Lrot8
748 ldp $tmpw0,$tmpw1,[$tmp]
749 index $rot8.s,$tmpw0,$tmpw1
7502:
3f42f41a
DH
751 AARCH64_SIGN_LINK_REGISTER
752 stp d8,d9,[sp,-192]!
bcb52bcc
DH
753 stp d10,d11,[sp,16]
754 stp d12,d13,[sp,32]
755 stp d14,d15,[sp,48]
3f42f41a
DH
756 stp x16,x17,[sp,64]
757 stp x18,x19,[sp,80]
758 stp x20,x21,[sp,96]
759 stp x22,x23,[sp,112]
760 stp x24,x25,[sp,128]
761 stp x26,x27,[sp,144]
762 stp x28,x29,[sp,160]
763 str x30,[sp,176]
764
bcb52bcc
DH
765 adr $tmp,.Lchacha20_consts
766 ldp @K[0],@K[1],[$tmp]
767 ldp @K[2],@K[3],[$key]
768 ldp @K[4],@K[5],[$key, 16]
769 ldp @K[6],@K[7],[$ctr]
bcb52bcc
DH
770 ptrues p0.s,ALL
771#ifdef __AARCH64EB__
772 ror @K[2],@K[2],#32
773 ror @K[3],@K[3],#32
774 ror @K[4],@K[4],#32
775 ror @K[5],@K[5],#32
776 ror @K[6],@K[6],#32
777 ror @K[7],@K[7],#32
b1b2146d 778#endif
3f42f41a
DH
779 cbz $sve2flag, 1f
780___
781 &chacha20_sve2();
782$code.=<<___;
783 b 2f
7841:
b1b2146d 785___
3f42f41a 786 &chacha20_sve();
b1b2146d 787$code.=<<___;
3f42f41a
DH
7882:
789 str @KL[6],[$ctr]
b1b2146d
DH
790 ldp d10,d11,[sp,16]
791 ldp d12,d13,[sp,32]
bcb52bcc 792 ldp d14,d15,[sp,48]
3f42f41a
DH
793 ldp x16,x17,[sp,64]
794 ldp x18,x19,[sp,80]
795 ldp x20,x21,[sp,96]
796 ldp x22,x23,[sp,112]
797 ldp x24,x25,[sp,128]
798 ldp x26,x27,[sp,144]
799 ldp x28,x29,[sp,160]
800 ldr x30,[sp,176]
801 ldp d8,d9,[sp],192
802 AARCH64_VALIDATE_LINK_REGISTER
b1b2146d
DH
803.Lreturn:
804 ret
805.size ChaCha20_ctr32_sve,.-ChaCha20_ctr32_sve
806___
807
808}}}
809
810########################################
811{
812my %opcode_unpred = (
bcb52bcc 813 "movprfx" => 0x0420BC00,
b1b2146d
DH
814 "eor" => 0x04a03000,
815 "add" => 0x04200000,
816 "orr" => 0x04603000,
817 "lsl" => 0x04209C00,
818 "lsr" => 0x04209400,
3f42f41a 819 "incw" => 0x04B00000,
b1b2146d
DH
820 "xar" => 0x04203400,
821 "zip1" => 0x05206000,
822 "zip2" => 0x05206400,
823 "uzp1" => 0x05206800,
824 "uzp2" => 0x05206C00,
825 "index" => 0x04204C00,
826 "mov" => 0x05203800,
827 "dup" => 0x05203800,
bcb52bcc 828 "cntw" => 0x04A0E000,
b1b2146d
DH
829 "tbl" => 0x05203000);
830
831my %opcode_imm_unpred = (
832 "dup" => 0x2538C000,
833 "index" => 0x04204400);
834
835my %opcode_scalar_pred = (
836 "mov" => 0x0528A000,
837 "cpy" => 0x0528A000,
838 "st4w" => 0xE5606000,
839 "st1w" => 0xE5004000,
840 "ld1w" => 0xA5404000);
841
842my %opcode_gather_pred = (
843 "ld1w" => 0x85204000);
844
845my %opcode_pred = (
846 "eor" => 0x04190000,
847 "add" => 0x04000000,
848 "orr" => 0x04180000,
849 "whilelo" => 0x25200C00,
850 "whilelt" => 0x25200400,
851 "cntp" => 0x25208000,
852 "addvl" => 0x04205000,
853 "lsl" => 0x04038000,
854 "lsr" => 0x04018000,
855 "sel" => 0x0520C000,
856 "mov" => 0x0520C000,
857 "ptrue" => 0x2518E000,
858 "pfalse" => 0x2518E400,
859 "ptrues" => 0x2519E000,
860 "pnext" => 0x2519C400,
861 "ld4w" => 0xA560E000,
862 "st4w" => 0xE570E000,
863 "st1w" => 0xE500E000,
864 "ld1w" => 0xA540A000,
bcb52bcc 865 "ld1rw" => 0x8540C000,
3f42f41a 866 "lasta" => 0x0520A000,
cd7a8e96 867 "revh" => 0x05258000,
868 "revb" => 0x05248000);
b1b2146d
DH
869
870my %tsize = (
871 'b' => 0,
872 'h' => 1,
873 's' => 2,
874 'd' => 3);
875
876my %sf = (
877 "w" => 0,
878 "x" => 1);
879
880my %pattern = (
881 "POW2" => 0,
882 "VL1" => 1,
883 "VL2" => 2,
884 "VL3" => 3,
885 "VL4" => 4,
886 "VL5" => 5,
887 "VL6" => 6,
888 "VL7" => 7,
889 "VL8" => 8,
890 "VL16" => 9,
891 "VL32" => 10,
892 "VL64" => 11,
893 "VL128" => 12,
894 "VL256" => 13,
895 "MUL4" => 29,
896 "MUL3" => 30,
897 "ALL" => 31);
898
899sub create_verifier {
900 my $filename="./compile_sve.sh";
901
902$scripts = <<___;
903#! /bin/bash
904set -e
905CROSS_COMPILE=\${CROSS_COMPILE:-'aarch64-none-linux-gnu-'}
906
907[ -z "\$1" ] && exit 1
908ARCH=`uname -p | xargs echo -n`
909
910# need gcc-10 and above to compile SVE code
911# change this according to your system during debugging
912if [ \$ARCH == 'aarch64' ]; then
913 CC=gcc-11
914 OBJDUMP=objdump
915else
916 CC=\${CROSS_COMPILE}gcc
917 OBJDUMP=\${CROSS_COMPILE}objdump
918fi
919TMPFILE=/tmp/\$\$
920cat > \$TMPFILE.c << EOF
921extern __attribute__((noinline, section("disasm_output"))) void dummy_func()
922{
923 asm("\$@\\t\\n");
924}
925int main(int argc, char *argv[])
926{
927}
928EOF
929\$CC -march=armv8.2-a+sve+sve2 -o \$TMPFILE.out \$TMPFILE.c
930\$OBJDUMP -d \$TMPFILE.out | awk -F"\\n" -v RS="\\n\\n" '\$1 ~ /dummy_func/' | awk 'FNR == 2 {printf "%s",\$2}'
931rm \$TMPFILE.c \$TMPFILE.out
932___
933 open(FH, '>', $filename) or die $!;
934 print FH $scripts;
935 close(FH);
936 system("chmod a+x ./compile_sve.sh");
937}
938
939sub compile_sve {
940 return `./compile_sve.sh '@_'`
941}
942
943sub verify_inst {
944 my ($code,$inst)=@_;
945 my $hexcode = (sprintf "%08x", $code);
946
947 if ($debug_encoder == 1) {
948 my $expect=&compile_sve($inst);
949 if ($expect ne $hexcode) {
950 return (sprintf "%s // Encode Error! expect [%s] actual [%s]", $inst, $expect, $hexcode);
951 }
952 }
953 return (sprintf ".inst\t0x%s\t//%s", $hexcode, $inst);
954}
955
956sub reg_code {
957 my $code = shift;
958
959 if ($code == "zr") {
960 return "31";
961 }
962 return $code;
963}
964
965sub encode_size_imm() {
966 my ($mnemonic, $isize, $const)=@_;
967 my $esize = (8<<$tsize{$isize});
968 my $tsize_imm = $esize + $const;
969
970 if ($mnemonic eq "lsr" || $mnemonic eq "xar") {
971 $tsize_imm = 2*$esize - $const;
972 }
973 return (($tsize_imm>>5)<<22)|(($tsize_imm&0x1f)<<16);
974}
975
976sub encode_shift_pred() {
977 my ($mnemonic, $isize, $const)=@_;
978 my $esize = (8<<$tsize{$isize});
979 my $tsize_imm = $esize + $const;
980
981 if ($mnemonic eq "lsr") {
982 $tsize_imm = 2*$esize - $const;
983 }
984 return (($tsize_imm>>5)<<22)|(($tsize_imm&0x1f)<<5);
985}
986
987sub sve_unpred {
988 my ($mnemonic,$arg)=@_;
989 my $inst = (sprintf "%s %s", $mnemonic,$arg);
990
991 if ($arg =~ m/z([0-9]+)\.([bhsd]),\s*\{\s*z([0-9]+)\.[bhsd].*\},\s*z([0-9]+)\.[bhsd].*/o) {
992 return &verify_inst($opcode_unpred{$mnemonic}|$1|($3<<5)|($tsize{$2}<<22)|($4<<16),
993 $inst)
994 } elsif ($arg =~ m/z([0-9]+)\.([bhsd]),\s*([zwx][0-9]+.*)/o) {
995 my $regd = $1;
996 my $isize = $2;
997 my $regs=$3;
998
999 if (($mnemonic eq "lsl") || ($mnemonic eq "lsr")) {
1000 if ($regs =~ m/z([0-9]+)[^,]*(?:,\s*#?([0-9]+))?/o
1001 && ((8<<$tsize{$isize}) > $2)) {
1002 return &verify_inst($opcode_unpred{$mnemonic}|$regd|($1<<5)|&encode_size_imm($mnemonic,$isize,$2),
1003 $inst);
1004 }
1005 } elsif($regs =~ m/[wx]([0-9]+),\s*[wx]([0-9]+)/o) {
1006 return &verify_inst($opcode_unpred{$mnemonic}|$regd|($tsize{$isize}<<22)|($1<<5)|($2<<16), $inst);
1007 } elsif ($regs =~ m/[wx]([0-9]+),\s*#?([0-9]+)/o) {
1008 return &verify_inst($opcode_imm_unpred{$mnemonic}|$regd|($tsize{$isize}<<22)|($1<<5)|($2<<16), $inst);
1009 } elsif ($regs =~ m/[wx]([0-9]+)/o) {
1010 return &verify_inst($opcode_unpred{$mnemonic}|$regd|($tsize{$isize}<<22)|($1<<5), $inst);
1011 } else {
1012 my $encoded_size = 0;
1013 if (($mnemonic eq "add") || ($mnemonic =~ /zip./) || ($mnemonic =~ /uzp./) ) {
1014 $encoded_size = ($tsize{$isize}<<22);
1015 }
1016 if ($regs =~ m/z([0-9]+)\.[bhsd],\s*z([0-9]+)\.[bhsd],\s*([0-9]+)/o &&
1017 $1 == $regd) {
1018 return &verify_inst($opcode_unpred{$mnemonic}|$regd|($2<<5)|&encode_size_imm($mnemonic,$isize,$3), $inst);
1019 } elsif ($regs =~ m/z([0-9]+)\.[bhsd],\s*z([0-9]+)\.[bhsd]/o) {
1020 return &verify_inst($opcode_unpred{$mnemonic}|$regd|$encoded_size|($1<<5)|($2<<16), $inst);
1021 }
1022 }
1023 } elsif ($arg =~ m/z([0-9]+)\.([bhsd]),\s*#?([0-9]+)/o) {
1024 return &verify_inst($opcode_imm_unpred{$mnemonic}|$1|($3<<5)|($tsize{$2}<<22),
1025 $inst)
1026 }
1027 sprintf "%s // fail to parse", $inst;
1028}
1029
1030sub sve_pred {
1031 my ($mnemonic,,$arg)=@_;
1032 my $inst = (sprintf "%s %s", $mnemonic,$arg);
1033
1034 if ($arg =~ m/\{\s*z([0-9]+)\.([bhsd]).*\},\s*p([0-9])+(\/z)?,\s*\[(\s*[xs].*)\]/o) {
1035 my $zt = $1;
1036 my $size = $tsize{$2};
1037 my $pg = $3;
1038 my $addr = $5;
1039 my $xn = 31;
1040
1041 if ($addr =~ m/x([0-9]+)\s*/o) {
1042 $xn = $1;
1043 }
bcb52bcc
DH
1044
1045 if ($mnemonic =~m/ld1r[bhwd]/o) {
1046 $size = 0;
1047 }
b1b2146d
DH
1048 if ($addr =~ m/\w+\s*,\s*x([0-9]+),.*/o) {
1049 return &verify_inst($opcode_scalar_pred{$mnemonic}|($size<<21)|$zt|($pg<<10)|($1<<16)|($xn<<5),$inst);
1050 } elsif ($addr =~ m/\w+\s*,\s*z([0-9]+)\.s,\s*([US]\w+)/o) {
1051 my $xs = ($2 eq "SXTW") ? 1 : 0;
1052 return &verify_inst($opcode_gather_pred{$mnemonic}|($xs<<22)|$zt|($pg<<10)|($1<<16)|($xn<<5),$inst);
1053 } elsif($addr =~ m/\w+\s*,\s*#?([0-9]+)/o) {
1054 return &verify_inst($opcode_pred{$mnemonic}|($size<<21)|$zt|($pg<<10)|($1<<16)|($xn<<5),$inst);
1055 } else {
1056 return &verify_inst($opcode_pred{$mnemonic}|($size<<21)|$zt|($pg<<10)|($xn<<5),$inst);
1057 }
1058 } elsif ($arg =~ m/z([0-9]+)\.([bhsd]),\s*p([0-9]+)\/([mz]),\s*([zwx][0-9]+.*)/o) {
1059 my $regd = $1;
1060 my $isize = $2;
1061 my $pg = $3;
1062 my $mod = $4;
1063 my $regs = $5;
1064
1065 if (($mnemonic eq "lsl") || ($mnemonic eq "lsr")) {
1066 if ($regs =~ m/z([0-9]+)[^,]*(?:,\s*#?([0-9]+))?/o
1067 && $regd == $1
1068 && $mode == 'm'
1069 && ((8<<$tsize{$isize}) > $2)) {
1070 return &verify_inst($opcode_pred{$mnemonic}|$regd|($pg<<10)|&encode_shift_pred($mnemonic,$isize,$2), $inst);
1071 }
1072 } elsif($regs =~ m/[wx]([0-9]+)/o) {
1073 return &verify_inst($opcode_scalar_pred{$mnemonic}|$regd|($tsize{$isize}<<22)|($pg<<10)|($1<<5), $inst);
1074 } elsif ($regs =~ m/z([0-9]+)[^,]*(?:,\s*z([0-9]+))?/o) {
1075 if ($mnemonic eq "sel") {
1076 return &verify_inst($opcode_pred{$mnemonic}|$regd|($tsize{$isize}<<22)|($pg<<10)|($1<<5)|($2<<16), $inst);
1077 } elsif ($mnemonic eq "mov") {
1078 return &verify_inst($opcode_pred{$mnemonic}|$regd|($tsize{$isize}<<22)|($pg<<10)|($1<<5)|($regd<<16), $inst);
1079 } elsif (length $2 > 0) {
1080 return &verify_inst($opcode_pred{$mnemonic}|$regd|($tsize{$isize}<<22)|($pg<<10)|($2<<5), $inst);
1081 } else {
1082 return &verify_inst($opcode_pred{$mnemonic}|$regd|($tsize{$isize}<<22)|($pg<<10)|($1<<5), $inst);
1083 }
1084 }
1085 } elsif ($arg =~ m/p([0-9]+)\.([bhsd]),\s*(\w+.*)/o) {
1086 my $pg = $1;
1087 my $isize = $2;
1088 my $regs = $3;
1089
1090 if ($regs =~ m/([wx])(zr|[0-9]+),\s*[wx](zr|[0-9]+)/o) {
1091 return &verify_inst($opcode_pred{$mnemonic}|($tsize{$isize}<<22)|$pg|($sf{$1}<<12)|(&reg_code($2)<<5)|(&reg_code($3)<<16), $inst);
1092 } elsif ($regs =~ m/p([0-9]+),\s*p([0-9]+)\.[bhsd]/o) {
1093 return &verify_inst($opcode_pred{$mnemonic}|($tsize{$isize}<<22)|$pg|($1<<5), $inst);
1094 } else {
1095 return &verify_inst($opcode_pred{$mnemonic}|($tsize{$isize}<<22)|$pg|($pattern{$regs}<<5), $inst);
1096 }
1097 } elsif ($arg =~ m/p([0-9]+)\.([bhsd])/o) {
1098 return &verify_inst($opcode_pred{$mnemonic}|$1, $inst);
1099 }
1100
1101 sprintf "%s // fail to parse", $inst;
1102}
1103
1104sub sve_other {
1105 my ($mnemonic,$arg)=@_;
1106 my $inst = (sprintf "%s %s", $mnemonic,$arg);
1107
1108 if ($arg =~ m/x([0-9]+)[^,]*,\s*p([0-9]+)[^,]*,\s*p([0-9]+)\.([bhsd])/o) {
1109 return &verify_inst($opcode_pred{$mnemonic}|($tsize{$4}<<22)|$1|($2<<10)|($3<<5), $inst);
3f42f41a
DH
1110 } elsif ($arg =~ m/(x|w)([0-9]+)[^,]*,\s*p([0-9]+)[^,]*,\s*z([0-9]+)\.([bhsd])/o) {
1111 return &verify_inst($opcode_pred{$mnemonic}|($tsize{$5}<<22)|$1|($3<<10)|($4<<5)|$2, $inst);
1112 }elsif ($mnemonic =~ /inc[bhdw]/) {
b1b2146d 1113 if ($arg =~ m/x([0-9]+)[^,]*,\s*(\w+)[^,]*,\s*MUL\s*#?([0-9]+)/o) {
3f42f41a 1114 return &verify_inst($opcode_unpred{$mnemonic}|$1|($pattern{$2}<<5)|(2<<12)|(($3 - 1)<<16)|0xE000, $inst);
b1b2146d 1115 } elsif ($arg =~ m/z([0-9]+)[^,]*,\s*(\w+)[^,]*,\s*MUL\s*#?([0-9]+)/o) {
3f42f41a 1116 return &verify_inst($opcode_unpred{$mnemonic}|$1|($pattern{$2}<<5)|(($3 - 1)<<16)|0xC000, $inst);
b1b2146d 1117 } elsif ($arg =~ m/x([0-9]+)/o) {
3f42f41a 1118 return &verify_inst($opcode_unpred{$mnemonic}|$1|(31<<5)|(0<<16)|0xE000, $inst);
b1b2146d 1119 }
bcb52bcc
DH
1120 } elsif ($mnemonic =~ /cnt[bhdw]/) {
1121 if ($arg =~ m/x([0-9]+)[^,]*,\s*(\w+)[^,]*,\s*MUL\s*#?([0-9]+)/o) {
1122 return &verify_inst($opcode_unpred{$mnemonic}|$1|($pattern{$2}<<5)|(($3 - 1)<<16), $inst);
1123 }
b1b2146d
DH
1124 } elsif ($arg =~ m/x([0-9]+)[^,]*,\s*x([0-9]+)[^,]*,\s*#?([0-9]+)/o) {
1125 return &verify_inst($opcode_pred{$mnemonic}|$1|($2<<16)|($3<<5), $inst);
bcb52bcc
DH
1126 } elsif ($arg =~ m/z([0-9]+)[^,]*,\s*z([0-9]+)/o) {
1127 return &verify_inst($opcode_unpred{$mnemonic}|$1|($2<<5), $inst);
b1b2146d
DH
1128 }
1129 sprintf "%s // fail to parse", $inst;
1130}
1131}
1132
1133open SELF,$0;
1134while(<SELF>) {
1135 next if (/^#!/);
1136 last if (!s/^#/\/\// and !/^$/);
1137 print;
1138}
1139close SELF;
1140
1141if ($debug_encoder == 1) {
1142 &create_verifier();
1143}
1144
1145foreach(split("\n",$code)) {
1146 s/\`([^\`]*)\`/eval($1)/ge;
1147 s/\b(\w+)\s+(z[0-9]+\.[bhsd],\s*[#zwx]?[0-9]+.*)/sve_unpred($1,$2)/ge;
1148 s/\b(\w+)\s+(z[0-9]+\.[bhsd],\s*\{.*\},\s*z[0-9]+.*)/sve_unpred($1,$2)/ge;
1149 s/\b(\w+)\s+(z[0-9]+\.[bhsd],\s*p[0-9].*)/sve_pred($1,$2)/ge;
bcb52bcc 1150 s/\b(\w+[1-4]r[bhwd])\s+(\{\s*z[0-9]+.*\},\s*p[0-9]+.*)/sve_pred($1,$2)/ge;
b1b2146d
DH
1151 s/\b(\w+[1-4][bhwd])\s+(\{\s*z[0-9]+.*\},\s*p[0-9]+.*)/sve_pred($1,$2)/ge;
1152 s/\b(\w+)\s+(p[0-9]+\.[bhsd].*)/sve_pred($1,$2)/ge;
3f42f41a 1153 s/\b(movprfx|lasta|cntp|cnt[bhdw]|addvl|inc[bhdw])\s+((x|z|w).*)/sve_other($1,$2)/ge;
b1b2146d
DH
1154 print $_,"\n";
1155}
1156
1157close STDOUT or die "error closing STDOUT: $!";