2 # Copyright 2022 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the Apache License 2.0 (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
10 # ChaCha20 for ARMv8 via SVE
12 # $output is the last argument if it looks like a file (it has an extension)
13 # $flavour is the first argument if it doesn't look like a file
14 $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m
|\
.\w
+$| ?
pop : undef;
15 $flavour = $#ARGV >= 0 && $ARGV[0] !~ m
|\
.| ?
shift : undef;
17 $0 =~ m/(.*[\/\\])[^\
/\\]+$/; $dir=$1;
18 ( $xlate="${dir}arm-xlate.pl" and -f
$xlate ) or
19 ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f
$xlate) or
20 die "can't locate arm-xlate.pl";
22 open OUT
,"| \"$^X\" $xlate $flavour \"$output\""
23 or die "can't call $xlate: $!";
26 sub AUTOLOAD
() # thunk [simplified] x86-style perlasm
27 { my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./;
29 $arg = "#$arg" if ($arg*1 eq $arg);
30 $code .= "\t$opcode\t".join(',',@_,$arg)."\n";
33 my ($outp,$inp,$len,$key,$ctr) = map("x$_",(0..4));
34 my ($veclen_w,$veclen,$blocks) = ("w5","x5","x6");
35 my ($sve2flag) = ("x7");
36 my ($wctr, $xctr) = ("w8", "x8");
37 my ($tmpw0,$tmp0,$tmpw1,$tmp1) = ("w9","x9", "w10","x10");
38 my ($tmp,$tmpw) = ("x10", "w10");
39 my ($counter) = ("x11");
40 my @K=map("x$_",(12..15,19..22));
41 my @KL=map("w$_",(12..15,19..22));
42 my @mx=map("z$_",(0..15));
43 my ($xa0,$xa1,$xa2,$xa3, $xb0,$xb1,$xb2,$xb3,
44 $xc0,$xc1,$xc2,$xc3, $xd0,$xd1,$xd2,$xd3) = @mx;
46 my @xt=map("z$_",(17..24));
47 my @perm=map("z$_",(25..30));
49 my ($xt0,$xt1,$xt2,$xt3,$xt4,$xt5,$xt6,$xt7)=@xt;
50 # in SVE mode we can only use bak0 ~ bak9 (the rest used as scratch register)
51 # in SVE2 we use all 15 backup register
52 my ($bak0,$bak1,$bak2,$bak3,$bak4,$bak5,$bak6,$bak7,$bak8,$bak9,$bak10,$bak11,$bak13,$bak14,$bak15)=(@perm[0],@perm[1],@perm[2],@perm[3],@perm[4],@perm[5],$xt4,$xt5,$xt6,$xt7,$xt0,$xt1,$xt2,$xt3,$rot8);
60 add
@mx[$x].s
,@mx[$x].s
,@mx[$y].s
72 eor
@mx[$x].d
,@mx[$x].d
,@mx[$y].d
86 lsl
@xt[$x].s
,@mx[$y].s
,$bits
89 &SVE_LSL
($bits,$next,@_);
98 lsr
@mx[$x].s
,@mx[$x].s
,$bits
111 orr
@mx[$y].d
,@mx[$y].d
,@xt[$x].d
122 revh
@mx[$x].s
,p0
/m
,@mx[$x].s
133 tbl
@mx[$x].b
,{@mx[$x].b
},$rot8.b
144 my $rbits = 32-$bits;
147 xar
@mx[$x].s
,@mx[$x].s
,@mx[$y].s
,$rbits
155 my $have_sve2 = shift;
156 my ($a0,$b0,$c0,$d0,$a1,$b1,$c1,$d1,$a2,$b2,$c2,$d2,$a3,$b3,$c3,$d3) = @_;
158 &SVE_ADD
($a0,$b0,$a1,$b1,$a2,$b2,$a3,$b3);
159 if ($have_sve2 == 0) {
160 &SVE_EOR
($d0,$a0,$d1,$a1,$d2,$a2,$d3,$a3);
161 &SVE_REV16
($d0,$d1,$d2,$d3);
163 &SVE2_XAR
(16,$d0,$a0,$d1,$a1,$d2,$a2,$d3,$a3);
166 &SVE_ADD
($c0,$d0,$c1,$d1,$c2,$d2,$c3,$d3);
167 if ($have_sve2 == 0) {
168 &SVE_EOR
($b0,$c0,$b1,$c1,$b2,$c2,$b3,$c3);
169 &SVE_LSL
(12,0,$b0,$b1,$b2,$b3);
170 &SVE_LSR
(20,$b0,$b1,$b2,$b3);
171 &SVE_ORR
(0,$b0,$b1,$b2,$b3,);
173 &SVE2_XAR
(12,$b0,$c0,$b1,$c1,$b2,$c2,$b3,$c3);
176 &SVE_ADD
($a0,$b0,$a1,$b1,$a2,$b2,$a3,$b3);
177 if ($have_sve2 == 0) {
178 &SVE_EOR
($d0,$a0,$d1,$a1,$d2,$a2,$d3,$a3);
179 &SVE_ROT8
($d0,$d1,$d2,$d3);
181 &SVE2_XAR
(8,$d0,$a0,$d1,$a1,$d2,$a2,$d3,$a3);
184 &SVE_ADD
($c0,$d0,$c1,$d1,$c2,$d2,$c3,$d3);
185 if ($have_sve2 == 0) {
186 &SVE_EOR
($b0,$c0,$b1,$c1,$b2,$c2,$b3,$c3);
187 &SVE_LSL
(7,0,$b0,$b1,$b2,$b3);
188 &SVE_LSR
(25,$b0,$b1,$b2,$b3);
189 &SVE_ORR
(0,$b0,$b1,$b2,$b3);
191 &SVE2_XAR
(7,$b0,$c0,$b1,$c1,$b2,$c2,$b3,$c3);
195 sub SVE_INNER_BLOCK
() {
201 &SVE_QR_GROUP
(0,0,4,8,12,1,5,9,13,2,6,10,14,3,7,11,15);
202 &SVE_QR_GROUP
(0,0,5,10,15,1,6,11,12,2,7,8,13,3,4,9,14);
204 subs
$counter,$counter,1
209 sub SVE2_INNER_BLOCK
() {
215 &SVE_QR_GROUP
(1,0,4,8,12,1,5,9,13,2,6,10,14,3,7,11,15);
216 &SVE_QR_GROUP
(1,0,5,10,15,1,6,11,12,2,7,8,13,3,4,9,14);
218 subs
$counter,$counter,1
234 ld1w
{$x0.s
},p0
/z
,[$inp]
235 ld1w
{$x1.s
},p0
/z
,[$inp, #1, MUL VL]
236 ld1w
{$x2.s
},p0
/z
,[$inp, #2, MUL VL]
237 ld1w
{$x3.s
},p0
/z
,[$inp, #3, MUL VL]
238 ld1w
{$x4.s
},p0
/z
,[$inp, #4, MUL VL]
239 ld1w
{$x5.s
},p0
/z
,[$inp, #5, MUL VL]
240 ld1w
{$x6.s
},p0
/z
,[$inp, #6, MUL VL]
241 ld1w
{$x7.s
},p0
/z
,[$inp, #7, MUL VL]
257 st1w
{$x0.s
},p0
,[$outp]
258 st1w
{$x1.s
},p0
,[$outp, #1, MUL VL]
259 st1w
{$x2.s
},p0
,[$outp, #2, MUL VL]
260 st1w
{$x3.s
},p0
,[$outp, #3, MUL VL]
261 st1w
{$x4.s
},p0
,[$outp, #4, MUL VL]
262 st1w
{$x5.s
},p0
,[$outp, #5, MUL VL]
263 st1w
{$x6.s
},p0
,[$outp, #6, MUL VL]
264 st1w
{$x7.s
},p0
,[$outp, #7, MUL VL]
276 zip1
$xt0.s
,$xa.s
,$xb.s
277 zip2
$xt1.s
,$xa.s
,$xb.s
278 zip1
$xt2.s
,$xc.s
,$xd.s
279 zip2
$xt3.s
,$xc.s
,$xd.s
280 zip1
$xa.d
,$xt0.d
,$xt2.d
281 zip2
$xb.d
,$xt0.d
,$xt2.d
282 zip1
$xc.d
,$xt1.d
,$xt3.d
283 zip2
$xd.d
,$xt1.d
,$xt3.d
287 sub SVE_ADD_STATES
() {
292 add
@mx[0].s
,@mx[0].s
,$bak0.s
293 add
@mx[1].s
,@mx[1].s
,$bak1.s
294 add
@mx[2].s
,@mx[2].s
,$bak2.s
295 add
@mx[3].s
,@mx[3].s
,$bak3.s
296 add
@mx[4].s
,@mx[4].s
,$bak4.s
297 add
@mx[5].s
,@mx[5].s
,$bak5.s
298 add
@mx[6].s
,@mx[6].s
,$bak6.s
299 add
@mx[7].s
,@mx[7].s
,$bak7.s
300 add
@mx[8].s
,@mx[8].s
,$bak8.s
301 add
@mx[9].s
,@mx[9].s
,$bak9.s
307 add
@mx[10].s
,@mx[10].s
,$xt0.s
308 add
@mx[11].s
,@mx[11].s
,$xt1.s
309 add
@mx[12].s
,@mx[12].s
,$zctr.s
310 add
@mx[13].s
,@mx[13].s
,$xt4.s
311 add
@mx[14].s
,@mx[14].s
,$xt5.s
312 add
@mx[15].s
,@mx[15].s
,$xt6.s
316 sub SVE2_ADD_STATES
() {
318 add
@mx[0].s
,@mx[0].s
,$bak0.s
319 add
@mx[1].s
,@mx[1].s
,$bak1.s
320 add
@mx[2].s
,@mx[2].s
,$bak2.s
321 add
@mx[3].s
,@mx[3].s
,$bak3.s
322 add
@mx[4].s
,@mx[4].s
,$bak4.s
323 add
@mx[5].s
,@mx[5].s
,$bak5.s
324 add
@mx[6].s
,@mx[6].s
,$bak6.s
325 add
@mx[7].s
,@mx[7].s
,$bak7.s
326 add
@mx[8].s
,@mx[8].s
,$bak8.s
327 add
@mx[9].s
,@mx[9].s
,$bak9.s
328 add
@mx[10].s
,@mx[10].s
,$bak10.s
329 add
@mx[11].s
,@mx[11].s
,$bak11.s
330 add
@mx[12].s
,@mx[12].s
,$zctr.s
331 add
@mx[13].s
,@mx[13].s
,$bak13.s
332 add
@mx[14].s
,@mx[14].s
,$bak14.s
333 add
@mx[15].s
,@mx[15].s
,$bak15.s
337 sub SVE_TRANSFORMS
() {
338 &transpose
($xa0,$xb0,$xc0,$xd0);
339 &transpose
($xa1,$xb1,$xc1,$xd1);
340 &transpose
($xa2,$xb2,$xc2,$xd2);
341 &transpose
($xa3,$xb3,$xc3,$xd3);
342 &transpose
($xa0,$xa1,$xa2,$xa3);
343 &transpose
($xb0,$xb1,$xb2,$xb3);
344 &load
($xt0,$xt1,$xt2,$xt3,$xt4,$xt5,$xt6,$xt7);
346 eor
$xa0.d
,$xa0.d
,$xt0.d
347 eor
$xa1.d
,$xa1.d
,$xt1.d
348 eor
$xa2.d
,$xa2.d
,$xt2.d
349 eor
$xa3.d
,$xa3.d
,$xt3.d
350 eor
$xb0.d
,$xb0.d
,$xt4.d
351 eor
$xb1.d
,$xb1.d
,$xt5.d
352 eor
$xb2.d
,$xb2.d
,$xt6.d
353 eor
$xb3.d
,$xb3.d
,$xt7.d
355 &transpose
($xc0,$xc1,$xc2,$xc3);
356 &store
($xa0,$xa1,$xa2,$xa3,$xb0,$xb1,$xb2,$xb3);
357 &transpose
($xd0,$xd1,$xd2,$xd3);
358 &load
($xt0,$xt1,$xt2,$xt3,$xt4,$xt5,$xt6,$xt7);
360 eor
$xc0.d
,$xc0.d
,$xt0.d
361 eor
$xc1.d
,$xc1.d
,$xt1.d
362 eor
$xc2.d
,$xc2.d
,$xt2.d
363 eor
$xc3.d
,$xc3.d
,$xt3.d
364 eor
$xd0.d
,$xd0.d
,$xt4.d
365 eor
$xd1.d
,$xd1.d
,$xt5.d
366 eor
$xd2.d
,$xd2.d
,$xt6.d
367 eor
$xd3.d
,$xd3.d
,$xt7.d
369 &store
($xc0,$xc1,$xc2,$xc3,$xd0,$xd1,$xd2,$xd3);
371 incw
$xctr, ALL
, MUL
#1
372 incw
$zctr.s
, ALL
, MUL
#1
376 sub SVE_LOAD_STATES
() {
406 orr
@mx[12].d
,$zctr.d
,$zctr.d
415 sub SVE2_LOAD_STATES
() {
447 orr
@mx[12].d
,$zctr.d
,$zctr.d
459 sub sve_handle_blocks
() {
461 cbz
$sve2flag,.sve_inner
479 sub chacha20_process
() {
486 &sve_handle_blocks
();
488 subs
$blocks,$blocks,$veclen
496 #include "arm_arch.h"
500 .extern OPENSSL_armcap_P
501 .hidden OPENSSL_armcap_P
506 .quad
0x3320646e61707865,0x6b20657479622d32 // endian
-neutral
508 .word
0x02010003,0x04040404,0x02010003,0x04040404
509 .globl ChaCha20_ctr32_sve
510 .type ChaCha20_ctr32_sve
,%function
513 AARCH64_VALID_CALL_TARGET
514 cntw
$veclen, ALL
, MUL
#1
519 adrp
$tmp,OPENSSL_armcap_P
520 ldr
$tmpw,[$tmp,#:lo12:OPENSSL_armcap_P]
521 tst
$tmpw,#ARMV8_SVE2
529 ldp
$tmpw0,$tmpw1,[$tmp]
530 index $rot8.s
,$tmpw0,$tmpw1
538 adr
$tmp,.Lchacha20_consts
539 ldp
@K[0],@K[1],[$tmp]
540 ldp
@K[2],@K[3],[$key]
541 ldp
@K[4],@K[5],[$key, 16]
542 ldp
@K[6],@K[7],[$ctr]
544 index $zctr.s
,$wctr,1
565 add
$len,$len,$blocks,lsl
#6
568 .size ChaCha20_ctr32_sve
,.-ChaCha20_ctr32_sve
573 ########################################
575 my %opcode_unpred = (
576 "movprfx" => 0x0420BC00,
582 "incw" => 0x04B0C000,
584 "zip1" => 0x05206000,
585 "zip2" => 0x05206400,
586 "uzp1" => 0x05206800,
587 "uzp2" => 0x05206C00,
588 "index" => 0x04204C00,
591 "cntw" => 0x04A0E000,
592 "tbl" => 0x05203000);
594 my %opcode_imm_unpred = (
596 "index" => 0x04204400);
598 my %opcode_scalar_pred = (
601 "st4w" => 0xE5606000,
602 "st1w" => 0xE5004000,
603 "ld1w" => 0xA5404000);
605 my %opcode_gather_pred = (
606 "ld1w" => 0x85204000);
612 "whilelo" => 0x25200C00,
613 "whilelt" => 0x25200400,
614 "cntp" => 0x25208000,
615 "addvl" => 0x04205000,
620 "ptrue" => 0x2518E000,
621 "pfalse" => 0x2518E400,
622 "ptrues" => 0x2519E000,
623 "pnext" => 0x2519C400,
624 "ld4w" => 0xA560E000,
625 "st4w" => 0xE570E000,
626 "st1w" => 0xE500E000,
627 "ld1w" => 0xA540A000,
628 "ld1rw" => 0x8540C000,
629 "revh" => 0x05258000);
660 sub create_verifier
{
661 my $filename="./compile_sve.sh";
666 CROSS_COMPILE
=\
${CROSS_COMPILE
:-'aarch64-none-linux-gnu-'}
668 [ -z
"\$1" ] && exit 1
669 ARCH
=`uname -p | xargs echo -n`
671 # need gcc-10 and above to compile SVE code
672 # change this according to your system during debugging
673 if [ \
$ARCH == 'aarch64' ]; then
677 CC
=\
${CROSS_COMPILE
}gcc
678 OBJDUMP
=\
${CROSS_COMPILE
}objdump
681 cat
> \
$TMPFILE.c
<< EOF
682 extern __attribute__
((noinline
, section
("disasm_output"))) void dummy_func
()
686 int main
(int argc
, char
*argv
[])
690 \
$CC -march
=armv8
.2
-a
+sve
+sve2
-o \
$TMPFILE.out \
$TMPFILE.c
691 \
$OBJDUMP -d \
$TMPFILE.out
| awk
-F
"\\n" -v RS
="\\n\\n" '\$1 ~ /dummy_func/' | awk
'FNR == 2 {printf "%s",\$2}'
692 rm \
$TMPFILE.c \
$TMPFILE.out
694 open(FH
, '>', $filename) or die $!;
697 system("chmod a+x ./compile_sve.sh");
701 return `./compile_sve.sh '@_'`
706 my $hexcode = (sprintf "%08x", $code);
708 if ($debug_encoder == 1) {
709 my $expect=&compile_sve
($inst);
710 if ($expect ne $hexcode) {
711 return (sprintf "%s // Encode Error! expect [%s] actual [%s]", $inst, $expect, $hexcode);
714 return (sprintf ".inst\t0x%s\t//%s", $hexcode, $inst);
726 sub encode_size_imm
() {
727 my ($mnemonic, $isize, $const)=@_;
728 my $esize = (8<<$tsize{$isize});
729 my $tsize_imm = $esize + $const;
731 if ($mnemonic eq "lsr" || $mnemonic eq "xar") {
732 $tsize_imm = 2*$esize - $const;
734 return (($tsize_imm>>5)<<22)|(($tsize_imm&0x1f)<<16);
737 sub encode_shift_pred
() {
738 my ($mnemonic, $isize, $const)=@_;
739 my $esize = (8<<$tsize{$isize});
740 my $tsize_imm = $esize + $const;
742 if ($mnemonic eq "lsr") {
743 $tsize_imm = 2*$esize - $const;
745 return (($tsize_imm>>5)<<22)|(($tsize_imm&0x1f)<<5);
749 my ($mnemonic,$arg)=@_;
750 my $inst = (sprintf "%s %s", $mnemonic,$arg);
752 if ($arg =~ m/z([0-9]+)\.([bhsd]),\s*\{\s*z([0-9]+)\.[bhsd].*\},\s*z([0-9]+)\.[bhsd].*/o) {
753 return &verify_inst
($opcode_unpred{$mnemonic}|$1|($3<<5)|($tsize{$2}<<22)|($4<<16),
755 } elsif ($arg =~ m/z([0-9]+)\.([bhsd]),\s*([zwx][0-9]+.*)/o) {
760 if (($mnemonic eq "lsl") || ($mnemonic eq "lsr")) {
761 if ($regs =~ m/z([0-9]+)[^,]*(?:,\s*#?([0-9]+))?/o
762 && ((8<<$tsize{$isize}) > $2)) {
763 return &verify_inst
($opcode_unpred{$mnemonic}|$regd|($1<<5)|&encode_size_imm
($mnemonic,$isize,$2),
766 } elsif($regs =~ m/[wx]([0-9]+),\s*[wx]([0-9]+)/o) {
767 return &verify_inst
($opcode_unpred{$mnemonic}|$regd|($tsize{$isize}<<22)|($1<<5)|($2<<16), $inst);
768 } elsif ($regs =~ m/[wx]([0-9]+),\s*#?([0-9]+)/o) {
769 return &verify_inst
($opcode_imm_unpred{$mnemonic}|$regd|($tsize{$isize}<<22)|($1<<5)|($2<<16), $inst);
770 } elsif ($regs =~ m/[wx]([0-9]+)/o) {
771 return &verify_inst
($opcode_unpred{$mnemonic}|$regd|($tsize{$isize}<<22)|($1<<5), $inst);
773 my $encoded_size = 0;
774 if (($mnemonic eq "add") || ($mnemonic =~ /zip./) || ($mnemonic =~ /uzp./) ) {
775 $encoded_size = ($tsize{$isize}<<22);
777 if ($regs =~ m/z([0-9]+)\.[bhsd],\s*z([0-9]+)\.[bhsd],\s*([0-9]+)/o &&
779 return &verify_inst
($opcode_unpred{$mnemonic}|$regd|($2<<5)|&encode_size_imm
($mnemonic,$isize,$3), $inst);
780 } elsif ($regs =~ m/z([0-9]+)\.[bhsd],\s*z([0-9]+)\.[bhsd]/o) {
781 return &verify_inst
($opcode_unpred{$mnemonic}|$regd|$encoded_size|($1<<5)|($2<<16), $inst);
784 } elsif ($arg =~ m/z([0-9]+)\.([bhsd]),\s*#?([0-9]+)/o) {
785 return &verify_inst
($opcode_imm_unpred{$mnemonic}|$1|($3<<5)|($tsize{$2}<<22),
788 sprintf "%s // fail to parse", $inst;
792 my ($mnemonic,,$arg)=@_;
793 my $inst = (sprintf "%s %s", $mnemonic,$arg);
795 if ($arg =~ m/\{\s*z([0-9]+)\.([bhsd]).*\},\s*p([0-9])+(\/z
)?
,\s
*\
[(\s
*[xs
].*)\
]/o
) {
797 my $size = $tsize{$2};
802 if ($addr =~ m/x([0-9]+)\s*/o) {
806 if ($mnemonic =~m/ld1r[bhwd]/o) {
809 if ($addr =~ m/\w+\s*,\s*x([0-9]+),.*/o) {
810 return &verify_inst
($opcode_scalar_pred{$mnemonic}|($size<<21)|$zt|($pg<<10)|($1<<16)|($xn<<5),$inst);
811 } elsif ($addr =~ m/\w+\s*,\s*z([0-9]+)\.s,\s*([US]\w+)/o) {
812 my $xs = ($2 eq "SXTW") ?
1 : 0;
813 return &verify_inst
($opcode_gather_pred{$mnemonic}|($xs<<22)|$zt|($pg<<10)|($1<<16)|($xn<<5),$inst);
814 } elsif($addr =~ m/\w+\s*,\s*#?([0-9]+)/o) {
815 return &verify_inst
($opcode_pred{$mnemonic}|($size<<21)|$zt|($pg<<10)|($1<<16)|($xn<<5),$inst);
817 return &verify_inst
($opcode_pred{$mnemonic}|($size<<21)|$zt|($pg<<10)|($xn<<5),$inst);
819 } elsif ($arg =~ m/z([0-9]+)\.([bhsd]),\s*p([0-9]+)\/([mz
]),\s
*([zwx
][0-9]+.*)/o
) {
826 if (($mnemonic eq "lsl") || ($mnemonic eq "lsr")) {
827 if ($regs =~ m/z([0-9]+)[^,]*(?:,\s*#?([0-9]+))?/o
830 && ((8<<$tsize{$isize}) > $2)) {
831 return &verify_inst
($opcode_pred{$mnemonic}|$regd|($pg<<10)|&encode_shift_pred
($mnemonic,$isize,$2), $inst);
833 } elsif($regs =~ m/[wx]([0-9]+)/o) {
834 return &verify_inst
($opcode_scalar_pred{$mnemonic}|$regd|($tsize{$isize}<<22)|($pg<<10)|($1<<5), $inst);
835 } elsif ($regs =~ m/z([0-9]+)[^,]*(?:,\s*z([0-9]+))?/o) {
836 if ($mnemonic eq "sel") {
837 return &verify_inst
($opcode_pred{$mnemonic}|$regd|($tsize{$isize}<<22)|($pg<<10)|($1<<5)|($2<<16), $inst);
838 } elsif ($mnemonic eq "mov") {
839 return &verify_inst
($opcode_pred{$mnemonic}|$regd|($tsize{$isize}<<22)|($pg<<10)|($1<<5)|($regd<<16), $inst);
840 } elsif (length $2 > 0) {
841 return &verify_inst
($opcode_pred{$mnemonic}|$regd|($tsize{$isize}<<22)|($pg<<10)|($2<<5), $inst);
843 return &verify_inst
($opcode_pred{$mnemonic}|$regd|($tsize{$isize}<<22)|($pg<<10)|($1<<5), $inst);
846 } elsif ($arg =~ m/p([0-9]+)\.([bhsd]),\s*(\w+.*)/o) {
851 if ($regs =~ m/([wx])(zr|[0-9]+),\s*[wx](zr|[0-9]+)/o) {
852 return &verify_inst
($opcode_pred{$mnemonic}|($tsize{$isize}<<22)|$pg|($sf{$1}<<12)|(®_code
($2)<<5)|(®_code
($3)<<16), $inst);
853 } elsif ($regs =~ m/p([0-9]+),\s*p([0-9]+)\.[bhsd]/o) {
854 return &verify_inst
($opcode_pred{$mnemonic}|($tsize{$isize}<<22)|$pg|($1<<5), $inst);
856 return &verify_inst
($opcode_pred{$mnemonic}|($tsize{$isize}<<22)|$pg|($pattern{$regs}<<5), $inst);
858 } elsif ($arg =~ m/p([0-9]+)\.([bhsd])/o) {
859 return &verify_inst
($opcode_pred{$mnemonic}|$1, $inst);
862 sprintf "%s // fail to parse", $inst;
866 my ($mnemonic,$arg)=@_;
867 my $inst = (sprintf "%s %s", $mnemonic,$arg);
869 if ($arg =~ m/x([0-9]+)[^,]*,\s*p([0-9]+)[^,]*,\s*p([0-9]+)\.([bhsd])/o) {
870 return &verify_inst
($opcode_pred{$mnemonic}|($tsize{$4}<<22)|$1|($2<<10)|($3<<5), $inst);
871 } elsif ($mnemonic =~ /inc[bhdw]/) {
872 if ($arg =~ m/x([0-9]+)[^,]*,\s*(\w+)[^,]*,\s*MUL\s*#?([0-9]+)/o) {
873 return &verify_inst
($opcode_unpred{$mnemonic}|$1|($pattern{$2}<<5)|(2<<12)|(($3 - 1)<<16), $inst);
874 } elsif ($arg =~ m/z([0-9]+)[^,]*,\s*(\w+)[^,]*,\s*MUL\s*#?([0-9]+)/o) {
875 return &verify_inst
($opcode_unpred{$mnemonic}|$1|($pattern{$2}<<5)|(($3 - 1)<<16), $inst);
876 } elsif ($arg =~ m/x([0-9]+)/o) {
877 return &verify_inst
($opcode_unpred{$mnemonic}|$1|(31<<5)|(0<<16), $inst);
879 } elsif ($mnemonic =~ /cnt[bhdw]/) {
880 if ($arg =~ m/x([0-9]+)[^,]*,\s*(\w+)[^,]*,\s*MUL\s*#?([0-9]+)/o) {
881 return &verify_inst
($opcode_unpred{$mnemonic}|$1|($pattern{$2}<<5)|(($3 - 1)<<16), $inst);
883 } elsif ($arg =~ m/x([0-9]+)[^,]*,\s*x([0-9]+)[^,]*,\s*#?([0-9]+)/o) {
884 return &verify_inst
($opcode_pred{$mnemonic}|$1|($2<<16)|($3<<5), $inst);
885 } elsif ($arg =~ m/z([0-9]+)[^,]*,\s*z([0-9]+)/o) {
886 return &verify_inst
($opcode_unpred{$mnemonic}|$1|($2<<5), $inst);
888 sprintf "%s // fail to parse", $inst;
895 last if (!s/^#/\/\
// and !/^$/);
900 if ($debug_encoder == 1) {
904 foreach(split("\n",$code)) {
905 s/\`([^\`]*)\`/eval($1)/ge;
906 s/\b(\w+)\s+(z[0-9]+\.[bhsd],\s*[#zwx]?[0-9]+.*)/sve_unpred($1,$2)/ge;
907 s/\b(\w+)\s+(z[0-9]+\.[bhsd],\s*\{.*\},\s*z[0-9]+.*)/sve_unpred($1,$2)/ge;
908 s/\b(\w+)\s+(z[0-9]+\.[bhsd],\s*p[0-9].*)/sve_pred($1,$2)/ge;
909 s/\b(\w+[1-4]r[bhwd])\s+(\{\s*z[0-9]+.*\},\s*p[0-9]+.*)/sve_pred($1,$2)/ge;
910 s/\b(\w+[1-4][bhwd])\s+(\{\s*z[0-9]+.*\},\s*p[0-9]+.*)/sve_pred($1,$2)/ge;
911 s/\b(\w+)\s+(p[0-9]+\.[bhsd].*)/sve_pred($1,$2)/ge;
912 s/\b(movprfx|cntp|cnt[bhdw]|addvl|inc[bhdw])\s+((x|z).*)/sve_other($1,$2)/ge;
916 close STDOUT
or die "error closing STDOUT: $!";