2 # Copyright 2022 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the Apache License 2.0 (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
10 # ChaCha20 for ARMv8 via SVE
12 # $output is the last argument if it looks like a file (it has an extension)
13 # $flavour is the first argument if it doesn't look like a file
14 $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
15 $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
17 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
18 ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
19 ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
20 die "can't locate arm-xlate.pl";
22 open OUT,"| \"$^X\" $xlate $flavour \"$output\""
23 or die "can't call $xlate: $!";
26 sub AUTOLOAD() # thunk [simplified] x86-style perlasm
27 { my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./;
29 $arg = "#$arg" if ($arg*1 eq $arg);
30 $code .= "\t$opcode\t".join(',',@_,$arg)."\n";
33 my ($outp,$inp,$len,$key,$ctr) = map("x$_",(0..4));
35 my ($veclen_w,$veclen,$blocks) = ("w6","x6","x7");
36 my ($saved_outp) = ("x8");
37 my ($wctr, $xctr) = ("w9", "x9");
38 my @mx=map("z$_",(0..7,16..23));
39 my ($xa0,$xa1,$xa2,$xa3, $xb0,$xb1,$xb2,$xb3,
40 $xc0,$xc1,$xc2,$xc3, $xd0,$xd1,$xd2,$xd3) = @mx;
41 my @xt=map("z$_",(24..31,8..11));
44 my ($xt0,$xt1,$xt2,$xt3,$xt4,$xt5,$xt6,$xt7,$xt8,$xt9,$xt10,$xt11)=@xt;
52 add @mx[$x].s,@mx[$x].s,@mx[$y].s
64 eor @mx[$x].d,@mx[$x].d,@mx[$y].d
78 lsl @xt[$x].s,@mx[$y].s,$bits
81 &SVE_LSL($bits,$next,@_);
90 lsr @mx[$x].s,@mx[$x].s,$bits
103 orr @mx[$y].d,@mx[$y].d,@xt[$x].d
114 revh @mx[$x].s,p0/m,@mx[$x].s
125 tbl @mx[$x].b,{@mx[$x].b},$rot8.b
136 my $rbits = 32-$bits;
139 xar @mx[$x].s,@mx[$x].s,@mx[$y].s,$rbits
147 my $have_sve2 = shift;
148 my ($a0,$b0,$c0,$d0,$a1,$b1,$c1,$d1,$a2,$b2,$c2,$d2,$a3,$b3,$c3,$d3) = @_;
150 &SVE_ADD($a0,$b0,$a1,$b1,$a2,$b2,$a3,$b3);
151 &SVE_EOR($d0,$a0,$d1,$a1,$d2,$a2,$d3,$a3);
152 &SVE_REV16($d0,$d1,$d2,$d3);
154 &SVE_ADD($c0,$d0,$c1,$d1,$c2,$d2,$c3,$d3);
155 if ($have_sve2 == 0) {
156 &SVE_EOR($b0,$c0,$b1,$c1,$b2,$c2,$b3,$c3);
157 &SVE_LSL(12,0,$b0,$b1,$b2,$b3);
158 &SVE_LSR(20,$b0,$b1,$b2,$b3);
159 &SVE_ORR(0,$b0,$b1,$b2,$b3,);
161 &SVE2_XAR(12,$b0,$c0,$b1,$c1,$b2,$c2,$b3,$c3);
164 &SVE_ADD($a0,$b0,$a1,$b1,$a2,$b2,$a3,$b3);
165 &SVE_EOR($d0,$a0,$d1,$a1,$d2,$a2,$d3,$a3);
166 &SVE_ROT8($d0,$d1,$d2,$d3);
168 &SVE_ADD($c0,$d0,$c1,$d1,$c2,$d2,$c3,$d3);
169 if ($have_sve2 == 0) {
170 &SVE_EOR($b0,$c0,$b1,$c1,$b2,$c2,$b3,$c3);
171 &SVE_LSL(7,0,$b0,$b1,$b2,$b3);
172 &SVE_LSR(25,$b0,$b1,$b2,$b3);
173 &SVE_ORR(0,$b0,$b1,$b2,$b3);
175 &SVE2_XAR(7,$b0,$c0,$b1,$c1,$b2,$c2,$b3,$c3);
179 sub SVE_INNER_BLOCK() {
181 //cbnz $sve2flag, 10f
183 &SVE_QR_GROUP(0,0,4,8,12,1,5,9,13,2,6,10,14,3,7,11,15);
184 &SVE_QR_GROUP(0,0,5,10,15,1,6,11,12,2,7,8,13,3,4,9,14);
186 // SVE 2 not enabled until hardware available
191 # &SVE_QR_GROUP(1,0,4,8,12,1,5,9,13,2,6,10,14,3,7,11,15);
192 # &SVE_QR_GROUP(1,0,5,10,15,1,6,11,12,2,7,8,13,3,4,9,14);
200 my ($dlen,$rsize,$tmp) = ("x10","x11","x12");
213 ld1w {$x0.s},p0/z,[$inp]
214 ld1w {$x1.s},p0/z,[$inp, #1, MUL VL]
215 ld1w {$x2.s},p0/z,[$inp, #2, MUL VL]
216 ld1w {$x3.s},p0/z,[$inp, #3, MUL VL]
217 ld1w {$x4.s},p0/z,[$inp, #4, MUL VL]
218 ld1w {$x5.s},p0/z,[$inp, #5, MUL VL]
219 ld1w {$x6.s},p0/z,[$inp, #6, MUL VL]
220 ld1w {$x7.s},p0/z,[$inp, #7, MUL VL]
236 st1w {$x0.s},p0,[$outp]
237 st1w {$x1.s},p0,[$outp, #1, MUL VL]
238 st1w {$x2.s},p0,[$outp, #2, MUL VL]
239 st1w {$x3.s},p0,[$outp, #3, MUL VL]
240 st1w {$x4.s},p0,[$outp, #4, MUL VL]
241 st1w {$x5.s},p0,[$outp, #5, MUL VL]
242 st1w {$x6.s},p0,[$outp, #6, MUL VL]
243 st1w {$x7.s},p0,[$outp, #7, MUL VL]
255 zip1 $xt8.s,$xa.s,$xb.s
256 zip2 $xt9.s,$xa.s,$xb.s
257 zip1 $xt10.s,$xc.s,$xd.s
258 zip2 $xt11.s,$xc.s,$xd.s
259 zip1 $xa.d,$xt8.d,$xt10.d
260 zip2 $xb.d,$xt8.d,$xt10.d
261 zip1 $xc.d,$xt9.d,$xt11.d
262 zip2 $xd.d,$xt9.d,$xt11.d
267 my ($tmpw0,$tmpw1,$tmpw2,$tmpw3) = ("w10","w11","w12","w13");
270 ldp $tmpw0,$tmpw1,[$state]
271 ldp $tmpw2,$tmpw3,[$state,#8]
276 ldp $tmpw0,$tmpw1,[$state,#16]
277 ldp $tmpw2,$tmpw3,[$state,#24]
278 add @mx[0].s,@mx[0].s,$xt0.s
279 add @mx[1].s,@mx[1].s,$xt1.s
280 add @mx[2].s,@mx[2].s,$xt2.s
281 add @mx[3].s,@mx[3].s,$xt3.s
286 ldp $tmpw0,$tmpw1,[$state,#32]
287 ldp $tmpw2,$tmpw3,[$state,#40]
288 add @mx[4].s,@mx[4].s,$xt4.s
289 add @mx[5].s,@mx[5].s,$xt5.s
290 add @mx[6].s,@mx[6].s,$xt6.s
291 add @mx[7].s,@mx[7].s,$xt7.s
296 ldp $tmpw0,$tmpw1,[$state,#48]
297 ldp $tmpw2,$tmpw3,[$state,#56]
298 add @mx[8].s,@mx[8].s,$xt0.s
299 add @mx[9].s,@mx[9].s,$xt1.s
300 add @mx[10].s,@mx[10].s,$xt2.s
301 add @mx[11].s,@mx[11].s,$xt3.s
305 add @mx[12].s,@mx[12].s,$zctr.s
306 add @mx[13].s,@mx[13].s,$xt5.s
307 add @mx[14].s,@mx[14].s,$xt6.s
308 add @mx[15].s,@mx[15].s,$xt7.s
312 sub SVE_TRANSFORMS() {
314 &transpose($xa0,$xb0,$xc0,$xd0);
315 &transpose($xa1,$xb1,$xc1,$xd1);
316 &transpose($xa2,$xb2,$xc2,$xd2);
317 &transpose($xa3,$xb3,$xc3,$xd3);
318 &load($xt0,$xt1,$xt2,$xt3,$xt4,$xt5,$xt6,$xt7);
319 &transpose($xa0,$xa1,$xa2,$xa3);
320 &transpose($xb0,$xb1,$xb2,$xb3);
322 eor $xa0.d,$xa0.d,$xt0.d
323 eor $xa1.d,$xa1.d,$xt1.d
324 eor $xa2.d,$xa2.d,$xt2.d
325 eor $xa3.d,$xa3.d,$xt3.d
326 eor $xb0.d,$xb0.d,$xt4.d
327 eor $xb1.d,$xb1.d,$xt5.d
328 eor $xb2.d,$xb2.d,$xt6.d
329 eor $xb3.d,$xb3.d,$xt7.d
331 &transpose($xc0,$xc1,$xc2,$xc3);
332 &store($xa0,$xa1,$xa2,$xa3,$xb0,$xb1,$xb2,$xb3);
333 &load($xt0,$xt1,$xt2,$xt3,$xt4,$xt5,$xt6,$xt7);
334 &transpose($xd0,$xd1,$xd2,$xd3);
336 eor $xc0.d,$xc0.d,$xt0.d
337 eor $xc1.d,$xc1.d,$xt1.d
338 eor $xc2.d,$xc2.d,$xt2.d
339 eor $xc3.d,$xc3.d,$xt3.d
340 eor $xd0.d,$xd0.d,$xt4.d
341 eor $xd1.d,$xd1.d,$xt5.d
342 eor $xd2.d,$xd2.d,$xt6.d
343 eor $xd3.d,$xd3.d,$xt7.d
345 &store($xc0,$xc1,$xc2,$xc3,$xd0,$xd1,$xd2,$xd3);
347 incw $xctr, ALL, MUL #1
348 incw $zctr.s, ALL, MUL #1
353 sub SVE_LOAD_STATES() {
354 my ($tmpw0,$tmpw1,$tmpw2,$tmpw3) = ("w10","w11","w12","w13");
357 // FIXME following code are not functionally necessary
358 // but appear to enhance performance
369 ldp $tmpw0,$tmpw1,[$state]
370 ldp $tmpw2,$tmpw3,[$state,#8]
375 ldp $tmpw0,$tmpw1,[$state,#16]
376 ldp $tmpw2,$tmpw3,[$state,#24]
381 ldp $tmpw0,$tmpw1,[$state,#32]
382 ldp $tmpw2,$tmpw3,[$state,#40]
387 ldp $tmpw0,$tmpw1,[$state, #48]
388 ldp $tmpw2,$tmpw3,[$state,#56]
389 mov @mx[12].s,p0/m,$zctr.s
396 sub sve_handle_blocks() {
397 my ($counter) = ("x10");
408 subs $counter,$counter,1
414 sub chacha20_process() {
415 my ($counter) = ("x10");
416 my ($tmpw) = ("w11");
424 &sve_handle_blocks();
426 subs $blocks,$blocks,$veclen
433 my ($tmp,$tmpw) = ("x10", "w10");
434 my ($tmpw0,$tmpw1) = ("w11", "w12");
438 #include "arm_arch.h"
443 .extern OPENSSL_armcap_P
444 .hidden OPENSSL_armcap_P
455 .word 0x02010003,0x04040404,0x02010003,0x04040404
456 .globl ChaCha20_ctr32_sve
457 .type ChaCha20_ctr32_sve,%function
460 AARCH64_VALID_CALL_TARGET
462 whilelo p0.s,xzr,$tmp
464 // run Neon if we only have 128-bit SVE
465 // in the future, we need to check SVE2
475 adr $tmp,.Lchacha20_consts
478 ldp $tmpw0,$tmpw1,[$tmp]
479 ld1 {v1.4s,v2.4s},[$key]
482 index $zctr.s,$wctr,1
483 index $rot8.s,$tmpw0,$tmpw1
484 st1 {v0.4s,v1.4s,v2.4s,v3.4s},[sp]
487 // SVE2 code not enabled until we have hardware
490 adrp $tmp,OPENSSL_armcap_P
491 ldr $tmpw,[$tmp,#:lo12:OPENSSL_armcap_P]
492 tst $tmpw,#ARMV8_SVE2
506 add $len,$len,$blocks,lsl #6
509 .size ChaCha20_ctr32_sve,.-ChaCha20_ctr32_sve
514 ########################################
516 my %opcode_unpred = (
522 "incw" => 0x04B0C000,
524 "zip1" => 0x05206000,
525 "zip2" => 0x05206400,
526 "uzp1" => 0x05206800,
527 "uzp2" => 0x05206C00,
528 "index" => 0x04204C00,
531 "tbl" => 0x05203000);
533 my %opcode_imm_unpred = (
535 "index" => 0x04204400);
537 my %opcode_scalar_pred = (
540 "st4w" => 0xE5606000,
541 "st1w" => 0xE5004000,
542 "ld1w" => 0xA5404000);
544 my %opcode_gather_pred = (
545 "ld1w" => 0x85204000);
551 "whilelo" => 0x25200C00,
552 "whilelt" => 0x25200400,
553 "cntp" => 0x25208000,
554 "addvl" => 0x04205000,
559 "ptrue" => 0x2518E000,
560 "pfalse" => 0x2518E400,
561 "ptrues" => 0x2519E000,
562 "pnext" => 0x2519C400,
563 "ld4w" => 0xA560E000,
564 "st4w" => 0xE570E000,
565 "st1w" => 0xE500E000,
566 "ld1w" => 0xA540A000,
567 "revh" => 0x05258000);
598 sub create_verifier {
599 my $filename="./compile_sve.sh";
604 CROSS_COMPILE=\${CROSS_COMPILE:-'aarch64-none-linux-gnu-'}
606 [ -z "\$1" ] && exit 1
607 ARCH=`uname -p | xargs echo -n`
609 # need gcc-10 and above to compile SVE code
610 # change this according to your system during debugging
611 if [ \$ARCH == 'aarch64' ]; then
615 CC=\${CROSS_COMPILE}gcc
616 OBJDUMP=\${CROSS_COMPILE}objdump
619 cat > \$TMPFILE.c << EOF
620 extern __attribute__((noinline, section("disasm_output"))) void dummy_func()
624 int main(int argc, char *argv[])
628 \$CC -march=armv8.2-a+sve+sve2 -o \$TMPFILE.out \$TMPFILE.c
629 \$OBJDUMP -d \$TMPFILE.out | awk -F"\\n" -v RS="\\n\\n" '\$1 ~ /dummy_func/' | awk 'FNR == 2 {printf "%s",\$2}'
630 rm \$TMPFILE.c \$TMPFILE.out
632 open(FH, '>', $filename) or die $!;
635 system("chmod a+x ./compile_sve.sh");
639 return `./compile_sve.sh '@_'`
644 my $hexcode = (sprintf "%08x", $code);
646 if ($debug_encoder == 1) {
647 my $expect=&compile_sve($inst);
648 if ($expect ne $hexcode) {
649 return (sprintf "%s // Encode Error! expect [%s] actual [%s]", $inst, $expect, $hexcode);
652 return (sprintf ".inst\t0x%s\t//%s", $hexcode, $inst);
664 sub encode_size_imm() {
665 my ($mnemonic, $isize, $const)=@_;
666 my $esize = (8<<$tsize{$isize});
667 my $tsize_imm = $esize + $const;
669 if ($mnemonic eq "lsr" || $mnemonic eq "xar") {
670 $tsize_imm = 2*$esize - $const;
672 return (($tsize_imm>>5)<<22)|(($tsize_imm&0x1f)<<16);
675 sub encode_shift_pred() {
676 my ($mnemonic, $isize, $const)=@_;
677 my $esize = (8<<$tsize{$isize});
678 my $tsize_imm = $esize + $const;
680 if ($mnemonic eq "lsr") {
681 $tsize_imm = 2*$esize - $const;
683 return (($tsize_imm>>5)<<22)|(($tsize_imm&0x1f)<<5);
687 my ($mnemonic,$arg)=@_;
688 my $inst = (sprintf "%s %s", $mnemonic,$arg);
690 if ($arg =~ m/z([0-9]+)\.([bhsd]),\s*\{\s*z([0-9]+)\.[bhsd].*\},\s*z([0-9]+)\.[bhsd].*/o) {
691 return &verify_inst($opcode_unpred{$mnemonic}|$1|($3<<5)|($tsize{$2}<<22)|($4<<16),
693 } elsif ($arg =~ m/z([0-9]+)\.([bhsd]),\s*([zwx][0-9]+.*)/o) {
698 if (($mnemonic eq "lsl") || ($mnemonic eq "lsr")) {
699 if ($regs =~ m/z([0-9]+)[^,]*(?:,\s*#?([0-9]+))?/o
700 && ((8<<$tsize{$isize}) > $2)) {
701 return &verify_inst($opcode_unpred{$mnemonic}|$regd|($1<<5)|&encode_size_imm($mnemonic,$isize,$2),
704 } elsif($regs =~ m/[wx]([0-9]+),\s*[wx]([0-9]+)/o) {
705 return &verify_inst($opcode_unpred{$mnemonic}|$regd|($tsize{$isize}<<22)|($1<<5)|($2<<16), $inst);
706 } elsif ($regs =~ m/[wx]([0-9]+),\s*#?([0-9]+)/o) {
707 return &verify_inst($opcode_imm_unpred{$mnemonic}|$regd|($tsize{$isize}<<22)|($1<<5)|($2<<16), $inst);
708 } elsif ($regs =~ m/[wx]([0-9]+)/o) {
709 return &verify_inst($opcode_unpred{$mnemonic}|$regd|($tsize{$isize}<<22)|($1<<5), $inst);
711 my $encoded_size = 0;
712 if (($mnemonic eq "add") || ($mnemonic =~ /zip./) || ($mnemonic =~ /uzp./) ) {
713 $encoded_size = ($tsize{$isize}<<22);
715 if ($regs =~ m/z([0-9]+)\.[bhsd],\s*z([0-9]+)\.[bhsd],\s*([0-9]+)/o &&
717 return &verify_inst($opcode_unpred{$mnemonic}|$regd|($2<<5)|&encode_size_imm($mnemonic,$isize,$3), $inst);
718 } elsif ($regs =~ m/z([0-9]+)\.[bhsd],\s*z([0-9]+)\.[bhsd]/o) {
719 return &verify_inst($opcode_unpred{$mnemonic}|$regd|$encoded_size|($1<<5)|($2<<16), $inst);
722 } elsif ($arg =~ m/z([0-9]+)\.([bhsd]),\s*#?([0-9]+)/o) {
723 return &verify_inst($opcode_imm_unpred{$mnemonic}|$1|($3<<5)|($tsize{$2}<<22),
726 sprintf "%s // fail to parse", $inst;
730 my ($mnemonic,,$arg)=@_;
731 my $inst = (sprintf "%s %s", $mnemonic,$arg);
733 if ($arg =~ m/\{\s*z([0-9]+)\.([bhsd]).*\},\s*p([0-9])+(\/z)?,\s*\[(\s*[xs].*)\]/o) {
735 my $size = $tsize{$2};
740 if ($addr =~ m/x([0-9]+)\s*/o) {
743 if ($addr =~ m/\w+\s*,\s*x([0-9]+),.*/o) {
744 return &verify_inst($opcode_scalar_pred{$mnemonic}|($size<<21)|$zt|($pg<<10)|($1<<16)|($xn<<5),$inst);
745 } elsif ($addr =~ m/\w+\s*,\s*z([0-9]+)\.s,\s*([US]\w+)/o) {
746 my $xs = ($2 eq "SXTW") ? 1 : 0;
747 return &verify_inst($opcode_gather_pred{$mnemonic}|($xs<<22)|$zt|($pg<<10)|($1<<16)|($xn<<5),$inst);
748 } elsif($addr =~ m/\w+\s*,\s*#?([0-9]+)/o) {
749 return &verify_inst($opcode_pred{$mnemonic}|($size<<21)|$zt|($pg<<10)|($1<<16)|($xn<<5),$inst);
751 return &verify_inst($opcode_pred{$mnemonic}|($size<<21)|$zt|($pg<<10)|($xn<<5),$inst);
753 } elsif ($arg =~ m/z([0-9]+)\.([bhsd]),\s*p([0-9]+)\/([mz]),\s*([zwx][0-9]+.*)/o) {
760 if (($mnemonic eq "lsl") || ($mnemonic eq "lsr")) {
761 if ($regs =~ m/z([0-9]+)[^,]*(?:,\s*#?([0-9]+))?/o
764 && ((8<<$tsize{$isize}) > $2)) {
765 return &verify_inst($opcode_pred{$mnemonic}|$regd|($pg<<10)|&encode_shift_pred($mnemonic,$isize,$2), $inst);
767 } elsif($regs =~ m/[wx]([0-9]+)/o) {
768 return &verify_inst($opcode_scalar_pred{$mnemonic}|$regd|($tsize{$isize}<<22)|($pg<<10)|($1<<5), $inst);
769 } elsif ($regs =~ m/z([0-9]+)[^,]*(?:,\s*z([0-9]+))?/o) {
770 if ($mnemonic eq "sel") {
771 return &verify_inst($opcode_pred{$mnemonic}|$regd|($tsize{$isize}<<22)|($pg<<10)|($1<<5)|($2<<16), $inst);
772 } elsif ($mnemonic eq "mov") {
773 return &verify_inst($opcode_pred{$mnemonic}|$regd|($tsize{$isize}<<22)|($pg<<10)|($1<<5)|($regd<<16), $inst);
774 } elsif (length $2 > 0) {
775 return &verify_inst($opcode_pred{$mnemonic}|$regd|($tsize{$isize}<<22)|($pg<<10)|($2<<5), $inst);
777 return &verify_inst($opcode_pred{$mnemonic}|$regd|($tsize{$isize}<<22)|($pg<<10)|($1<<5), $inst);
780 } elsif ($arg =~ m/p([0-9]+)\.([bhsd]),\s*(\w+.*)/o) {
785 if ($regs =~ m/([wx])(zr|[0-9]+),\s*[wx](zr|[0-9]+)/o) {
786 return &verify_inst($opcode_pred{$mnemonic}|($tsize{$isize}<<22)|$pg|($sf{$1}<<12)|(®_code($2)<<5)|(®_code($3)<<16), $inst);
787 } elsif ($regs =~ m/p([0-9]+),\s*p([0-9]+)\.[bhsd]/o) {
788 return &verify_inst($opcode_pred{$mnemonic}|($tsize{$isize}<<22)|$pg|($1<<5), $inst);
790 return &verify_inst($opcode_pred{$mnemonic}|($tsize{$isize}<<22)|$pg|($pattern{$regs}<<5), $inst);
792 } elsif ($arg =~ m/p([0-9]+)\.([bhsd])/o) {
793 return &verify_inst($opcode_pred{$mnemonic}|$1, $inst);
796 sprintf "%s // fail to parse", $inst;
800 my ($mnemonic,$arg)=@_;
801 my $inst = (sprintf "%s %s", $mnemonic,$arg);
803 if ($arg =~ m/x([0-9]+)[^,]*,\s*p([0-9]+)[^,]*,\s*p([0-9]+)\.([bhsd])/o) {
804 return &verify_inst($opcode_pred{$mnemonic}|($tsize{$4}<<22)|$1|($2<<10)|($3<<5), $inst);
805 } elsif ($mnemonic =~ /inc[bhdw]/) {
806 if ($arg =~ m/x([0-9]+)[^,]*,\s*(\w+)[^,]*,\s*MUL\s*#?([0-9]+)/o) {
807 return &verify_inst($opcode_unpred{$mnemonic}|$1|($pattern{$2}<<5)|(2<<12)|(($3 - 1)<<16), $inst);
808 } elsif ($arg =~ m/z([0-9]+)[^,]*,\s*(\w+)[^,]*,\s*MUL\s*#?([0-9]+)/o) {
809 return &verify_inst($opcode_unpred{$mnemonic}|$1|($pattern{$2}<<5)|(($3 - 1)<<16), $inst);
810 } elsif ($arg =~ m/x([0-9]+)/o) {
811 return &verify_inst($opcode_unpred{$mnemonic}|$1|(31<<5)|(0<<16), $inst);
813 } elsif ($arg =~ m/x([0-9]+)[^,]*,\s*x([0-9]+)[^,]*,\s*#?([0-9]+)/o) {
814 return &verify_inst($opcode_pred{$mnemonic}|$1|($2<<16)|($3<<5), $inst);
816 sprintf "%s // fail to parse", $inst;
823 last if (!s/^#/\/\// and !/^$/);
828 if ($debug_encoder == 1) {
832 foreach(split("\n",$code)) {
833 s/\`([^\`]*)\`/eval($1)/ge;
834 s/\b(\w+)\s+(z[0-9]+\.[bhsd],\s*[#zwx]?[0-9]+.*)/sve_unpred($1,$2)/ge;
835 s/\b(\w+)\s+(z[0-9]+\.[bhsd],\s*\{.*\},\s*z[0-9]+.*)/sve_unpred($1,$2)/ge;
836 s/\b(\w+)\s+(z[0-9]+\.[bhsd],\s*p[0-9].*)/sve_pred($1,$2)/ge;
837 s/\b(\w+[1-4][bhwd])\s+(\{\s*z[0-9]+.*\},\s*p[0-9]+.*)/sve_pred($1,$2)/ge;
838 s/\b(\w+)\s+(p[0-9]+\.[bhsd].*)/sve_pred($1,$2)/ge;
839 s/\b(cntp|addvl|inc[bhdw])\s+((x|z).*)/sve_other($1,$2)/ge;
843 close STDOUT or die "error closing STDOUT: $!";