3 # ====================================================================
4 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
10 # This module implements support for AES instructions as per PowerISA
11 # specification version 2.07, first implemented by POWER8 processor.
12 # The module is endian-agnostic in sense that it supports both big-
13 # and little-endian cases. Data alignment in parallelizable modes is
14 # handled with VSX loads and stores, which implies MSR.VSX flag being
15 # set. It should also be noted that ISA specification doesn't prohibit
16 # alignment exceptions for these instructions on page boundaries.
17 # Initially alignment was handled in pure AltiVec/VMX way [when data
18 # is aligned programmatically, which in turn guarantees exception-
19 # free execution], but it turned to hamper performance when vcipher
20 # instructions are interleaved. It's reckoned that eventual
21 # misalignment penalties at page boundaries are in average lower
22 # than additional overhead in pure AltiVec approach.
26 if ($flavour =~ /64/) {
34 } elsif ($flavour =~ /32/) {
42 } else { die "nonsense $flavour"; }
44 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
46 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
47 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
48 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
49 die "can't locate ppc-xlate.pl";
51 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
59 #########################################################################
60 {{{ # Key setup procedures #
61 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
62 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
63 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
72 .long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
73 .long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
74 .long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
79 mflr $ptr #vvvvv "distance between . and rcon
84 .byte 0,12,0x14,0,0,0,0,0
85 .asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
87 .globl .${prefix}_set_encrypt_key
89 .${prefix}_set_encrypt_key:
93 $PUSH r11,$LRSAVE($sp)
102 addi $inp,$inp,15 # 15 is not typo
103 lvsr $key,0,r9 # borrow $key
107 le?vspltisb $mask,0x0f # borrow $mask
109 le?vxor $key,$key,$mask # adjust for byte swap
112 vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
114 vxor $zero,$zero,$zero
117 ?lvsr $outperm,0,$out
120 ?vperm $outmask,$zero,$outmask,$outperm
130 vperm $key,$in0,$in0,$mask # rotate-n-splat
131 vsldoi $tmp,$zero,$in0,12 # >>32
132 vperm $outtail,$in0,$in0,$outperm # rotate
133 vsel $stage,$outhead,$outtail,$outmask
134 vmr $outhead,$outtail
135 vcipherlast $key,$key,$rcon
140 vsldoi $tmp,$zero,$tmp,12 # >>32
142 vsldoi $tmp,$zero,$tmp,12 # >>32
144 vadduwm $rcon,$rcon,$rcon
148 lvx $rcon,0,$ptr # last two round keys
150 vperm $key,$in0,$in0,$mask # rotate-n-splat
151 vsldoi $tmp,$zero,$in0,12 # >>32
152 vperm $outtail,$in0,$in0,$outperm # rotate
153 vsel $stage,$outhead,$outtail,$outmask
154 vmr $outhead,$outtail
155 vcipherlast $key,$key,$rcon
160 vsldoi $tmp,$zero,$tmp,12 # >>32
162 vsldoi $tmp,$zero,$tmp,12 # >>32
164 vadduwm $rcon,$rcon,$rcon
167 vperm $key,$in0,$in0,$mask # rotate-n-splat
168 vsldoi $tmp,$zero,$in0,12 # >>32
169 vperm $outtail,$in0,$in0,$outperm # rotate
170 vsel $stage,$outhead,$outtail,$outmask
171 vmr $outhead,$outtail
172 vcipherlast $key,$key,$rcon
177 vsldoi $tmp,$zero,$tmp,12 # >>32
179 vsldoi $tmp,$zero,$tmp,12 # >>32
182 vperm $outtail,$in0,$in0,$outperm # rotate
183 vsel $stage,$outhead,$outtail,$outmask
184 vmr $outhead,$outtail
187 addi $inp,$out,15 # 15 is not typo
197 vperm $outtail,$in0,$in0,$outperm # rotate
198 vsel $stage,$outhead,$outtail,$outmask
199 vmr $outhead,$outtail
202 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
203 vspltisb $key,8 # borrow $key
205 vsububm $mask,$mask,$key # adjust the mask
208 vperm $key,$in1,$in1,$mask # roate-n-splat
209 vsldoi $tmp,$zero,$in0,12 # >>32
210 vcipherlast $key,$key,$rcon
213 vsldoi $tmp,$zero,$tmp,12 # >>32
215 vsldoi $tmp,$zero,$tmp,12 # >>32
218 vsldoi $stage,$zero,$in1,8
221 vsldoi $in1,$zero,$in1,12 # >>32
222 vadduwm $rcon,$rcon,$rcon
226 vsldoi $stage,$stage,$in0,8
228 vperm $key,$in1,$in1,$mask # rotate-n-splat
229 vsldoi $tmp,$zero,$in0,12 # >>32
230 vperm $outtail,$stage,$stage,$outperm # rotate
231 vsel $stage,$outhead,$outtail,$outmask
232 vmr $outhead,$outtail
233 vcipherlast $key,$key,$rcon
237 vsldoi $stage,$in0,$in1,8
239 vsldoi $tmp,$zero,$tmp,12 # >>32
240 vperm $outtail,$stage,$stage,$outperm # rotate
241 vsel $stage,$outhead,$outtail,$outmask
242 vmr $outhead,$outtail
244 vsldoi $tmp,$zero,$tmp,12 # >>32
251 vsldoi $in1,$zero,$in1,12 # >>32
252 vadduwm $rcon,$rcon,$rcon
256 vperm $outtail,$in0,$in0,$outperm # rotate
257 vsel $stage,$outhead,$outtail,$outmask
258 vmr $outhead,$outtail
260 addi $inp,$out,15 # 15 is not typo
273 vperm $outtail,$in0,$in0,$outperm # rotate
274 vsel $stage,$outhead,$outtail,$outmask
275 vmr $outhead,$outtail
278 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
282 vperm $key,$in1,$in1,$mask # rotate-n-splat
283 vsldoi $tmp,$zero,$in0,12 # >>32
284 vperm $outtail,$in1,$in1,$outperm # rotate
285 vsel $stage,$outhead,$outtail,$outmask
286 vmr $outhead,$outtail
287 vcipherlast $key,$key,$rcon
292 vsldoi $tmp,$zero,$tmp,12 # >>32
294 vsldoi $tmp,$zero,$tmp,12 # >>32
296 vadduwm $rcon,$rcon,$rcon
298 vperm $outtail,$in0,$in0,$outperm # rotate
299 vsel $stage,$outhead,$outtail,$outmask
300 vmr $outhead,$outtail
302 addi $inp,$out,15 # 15 is not typo
306 vspltw $key,$in0,3 # just splat
307 vsldoi $tmp,$zero,$in1,12 # >>32
311 vsldoi $tmp,$zero,$tmp,12 # >>32
313 vsldoi $tmp,$zero,$tmp,12 # >>32
321 lvx $in1,0,$inp # redundant in aligned case
322 vsel $in1,$outhead,$in1,$outmask
324 xor r3,r3,r3 # return value
330 .byte 0,12,0x14,1,0,0,3,0
332 .size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
334 .globl .${prefix}_set_decrypt_key
336 .${prefix}_set_decrypt_key:
337 $STU $sp,-$FRAME($sp)
339 $PUSH r10,$FRAME+$LRSAVE($sp)
344 subi $inp,$out,240 # first round key
345 srwi $rounds,$rounds,1
346 add $out,$inp,$cnt # last round key
370 xor r3,r3,r3 # return value
374 .byte 0,12,4,1,0x80,0,3,0
376 .size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
379 #########################################################################
380 {{{ # Single block en- and decrypt procedures #
383 my $n = $dir eq "de" ? "n" : "";
384 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
387 .globl .${prefix}_${dir}crypt
389 .${prefix}_${dir}crypt:
390 lwz $rounds,240($key)
393 li $idx,15 # 15 is not typo
399 lvsl v2,0,$inp # inpperm
401 ?lvsl v3,0,r11 # outperm
404 vperm v0,v0,v1,v2 # align [and byte swap in LE]
406 ?lvsl v5,0,$key # keyperm
407 srwi $rounds,$rounds,1
410 subi $rounds,$rounds,1
411 ?vperm v1,v1,v2,v5 # align round key
433 v${n}cipherlast v0,v0,v1
437 li $idx,15 # 15 is not typo
438 ?vperm v2,v1,v2,v3 # outmask
440 lvx v1,0,$out # outhead
441 vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
451 .byte 0,12,0x14,0,0,0,3,0
453 .size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
459 #########################################################################
460 {{{ # CBC en- and decrypt procedures #
461 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
462 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
463 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
466 .globl .${prefix}_cbc_encrypt
468 .${prefix}_cbc_encrypt:
472 cmpwi $enc,0 # test direction
478 vxor $rndkey0,$rndkey0,$rndkey0
479 le?vspltisb $tmp,0x0f
481 lvx $ivec,0,$ivp # load [unaligned] iv
483 lvx $inptail,$idx,$ivp
484 le?vxor $inpperm,$inpperm,$tmp
485 vperm $ivec,$ivec,$inptail,$inpperm
488 ?lvsl $keyperm,0,$key # prepare for unaligned key
489 lwz $rounds,240($key)
491 lvsr $inpperm,0,r11 # prepare for unaligned load
493 addi $inp,$inp,15 # 15 is not typo
494 le?vxor $inpperm,$inpperm,$tmp
496 ?lvsr $outperm,0,$out # prepare for unaligned store
499 ?vperm $outmask,$rndkey0,$outmask,$outperm
500 le?vxor $outperm,$outperm,$tmp
502 srwi $rounds,$rounds,1
504 subi $rounds,$rounds,1
512 subi $len,$len,16 # len-=16
515 vperm $inout,$inout,$inptail,$inpperm
516 lvx $rndkey1,$idx,$key
518 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
519 vxor $inout,$inout,$rndkey0
520 lvx $rndkey0,$idx,$key
522 vxor $inout,$inout,$ivec
525 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
526 vcipher $inout,$inout,$rndkey1
527 lvx $rndkey1,$idx,$key
529 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
530 vcipher $inout,$inout,$rndkey0
531 lvx $rndkey0,$idx,$key
535 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
536 vcipher $inout,$inout,$rndkey1
537 lvx $rndkey1,$idx,$key
539 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
540 vcipherlast $ivec,$inout,$rndkey0
543 vperm $tmp,$ivec,$ivec,$outperm
544 vsel $inout,$outhead,$tmp,$outmask
555 bge _aesp8_cbc_decrypt8x
560 subi $len,$len,16 # len-=16
563 vperm $tmp,$tmp,$inptail,$inpperm
564 lvx $rndkey1,$idx,$key
566 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
567 vxor $inout,$tmp,$rndkey0
568 lvx $rndkey0,$idx,$key
572 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
573 vncipher $inout,$inout,$rndkey1
574 lvx $rndkey1,$idx,$key
576 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
577 vncipher $inout,$inout,$rndkey0
578 lvx $rndkey0,$idx,$key
582 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
583 vncipher $inout,$inout,$rndkey1
584 lvx $rndkey1,$idx,$key
586 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
587 vncipherlast $inout,$inout,$rndkey0
590 vxor $inout,$inout,$ivec
592 vperm $tmp,$inout,$inout,$outperm
593 vsel $inout,$outhead,$tmp,$outmask
601 lvx $inout,0,$out # redundant in aligned case
602 vsel $inout,$outhead,$inout,$outmask
605 neg $enc,$ivp # write [unaligned] iv
606 li $idx,15 # 15 is not typo
607 vxor $rndkey0,$rndkey0,$rndkey0
609 le?vspltisb $tmp,0x0f
610 ?lvsl $outperm,0,$enc
611 ?vperm $outmask,$rndkey0,$outmask,$outperm
612 le?vxor $outperm,$outperm,$tmp
614 vperm $ivec,$ivec,$ivec,$outperm
615 vsel $inout,$outhead,$ivec,$outmask
616 lvx $inptail,$idx,$ivp
618 vsel $inout,$ivec,$inptail,$outmask
619 stvx $inout,$idx,$ivp
624 .byte 0,12,0x14,0,0,0,6,0
627 #########################################################################
628 {{ # Optimized CBC decrypt procedure #
630 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
631 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
632 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
633 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
634 # v26-v31 last 6 round keys
635 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
639 _aesp8_cbc_decrypt8x:
640 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
641 li r10,`$FRAME+8*16+15`
642 li r11,`$FRAME+8*16+31`
643 stvx v20,r10,$sp # ABI says so
666 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
668 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
670 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
672 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
674 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
676 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
678 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
682 subi $rounds,$rounds,3 # -4 in total
683 subi $len,$len,128 # bias
685 lvx $rndkey0,$x00,$key # load key schedule
689 ?vperm $rndkey0,$rndkey0,v30,$keyperm
690 addi $key_,$sp,$FRAME+15
694 ?vperm v24,v30,v31,$keyperm
697 stvx v24,$x00,$key_ # off-load round[1]
698 ?vperm v25,v31,v30,$keyperm
700 stvx v25,$x10,$key_ # off-load round[2]
701 addi $key_,$key_,0x20
702 bdnz Load_cbc_dec_key
705 ?vperm v24,v30,v31,$keyperm
707 stvx v24,$x00,$key_ # off-load round[3]
708 ?vperm v25,v31,v26,$keyperm
710 stvx v25,$x10,$key_ # off-load round[4]
711 addi $key_,$sp,$FRAME+15 # rewind $key_
712 ?vperm v26,v26,v27,$keyperm
714 ?vperm v27,v27,v28,$keyperm
716 ?vperm v28,v28,v29,$keyperm
718 ?vperm v29,v29,v30,$keyperm
719 lvx $out0,$x70,$key # borrow $out0
720 ?vperm v30,v30,v31,$keyperm
721 lvx v24,$x00,$key_ # pre-load round[1]
722 ?vperm v31,v31,$out0,$keyperm
723 lvx v25,$x10,$key_ # pre-load round[2]
725 #lvx $inptail,0,$inp # "caller" already did this
726 #addi $inp,$inp,15 # 15 is not typo
727 subi $inp,$inp,15 # undo "caller"
730 lvx_u $in0,$x00,$inp # load first 8 "words"
731 le?lvsl $inpperm,0,$idx
732 le?vspltisb $tmp,0x0f
734 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
736 le?vperm $in0,$in0,$in0,$inpperm
738 le?vperm $in1,$in1,$in1,$inpperm
740 le?vperm $in2,$in2,$in2,$inpperm
741 vxor $out0,$in0,$rndkey0
743 le?vperm $in3,$in3,$in3,$inpperm
744 vxor $out1,$in1,$rndkey0
746 le?vperm $in4,$in4,$in4,$inpperm
747 vxor $out2,$in2,$rndkey0
750 le?vperm $in5,$in5,$in5,$inpperm
751 vxor $out3,$in3,$rndkey0
752 le?vperm $in6,$in6,$in6,$inpperm
753 vxor $out4,$in4,$rndkey0
754 le?vperm $in7,$in7,$in7,$inpperm
755 vxor $out5,$in5,$rndkey0
756 vxor $out6,$in6,$rndkey0
757 vxor $out7,$in7,$rndkey0
763 vncipher $out0,$out0,v24
764 vncipher $out1,$out1,v24
765 vncipher $out2,$out2,v24
766 vncipher $out3,$out3,v24
767 vncipher $out4,$out4,v24
768 vncipher $out5,$out5,v24
769 vncipher $out6,$out6,v24
770 vncipher $out7,$out7,v24
771 lvx v24,$x20,$key_ # round[3]
772 addi $key_,$key_,0x20
774 vncipher $out0,$out0,v25
775 vncipher $out1,$out1,v25
776 vncipher $out2,$out2,v25
777 vncipher $out3,$out3,v25
778 vncipher $out4,$out4,v25
779 vncipher $out5,$out5,v25
780 vncipher $out6,$out6,v25
781 vncipher $out7,$out7,v25
782 lvx v25,$x10,$key_ # round[4]
785 subic $len,$len,128 # $len-=128
786 vncipher $out0,$out0,v24
787 vncipher $out1,$out1,v24
788 vncipher $out2,$out2,v24
789 vncipher $out3,$out3,v24
790 vncipher $out4,$out4,v24
791 vncipher $out5,$out5,v24
792 vncipher $out6,$out6,v24
793 vncipher $out7,$out7,v24
795 subfe. r0,r0,r0 # borrow?-1:0
796 vncipher $out0,$out0,v25
797 vncipher $out1,$out1,v25
798 vncipher $out2,$out2,v25
799 vncipher $out3,$out3,v25
800 vncipher $out4,$out4,v25
801 vncipher $out5,$out5,v25
802 vncipher $out6,$out6,v25
803 vncipher $out7,$out7,v25
806 vncipher $out0,$out0,v26
807 vncipher $out1,$out1,v26
808 vncipher $out2,$out2,v26
809 vncipher $out3,$out3,v26
810 vncipher $out4,$out4,v26
811 vncipher $out5,$out5,v26
812 vncipher $out6,$out6,v26
813 vncipher $out7,$out7,v26
815 add $inp,$inp,r0 # $inp is adjusted in such
816 # way that at exit from the
817 # loop inX-in7 are loaded
819 vncipher $out0,$out0,v27
820 vncipher $out1,$out1,v27
821 vncipher $out2,$out2,v27
822 vncipher $out3,$out3,v27
823 vncipher $out4,$out4,v27
824 vncipher $out5,$out5,v27
825 vncipher $out6,$out6,v27
826 vncipher $out7,$out7,v27
828 addi $key_,$sp,$FRAME+15 # rewind $key_
829 vncipher $out0,$out0,v28
830 vncipher $out1,$out1,v28
831 vncipher $out2,$out2,v28
832 vncipher $out3,$out3,v28
833 vncipher $out4,$out4,v28
834 vncipher $out5,$out5,v28
835 vncipher $out6,$out6,v28
836 vncipher $out7,$out7,v28
837 lvx v24,$x00,$key_ # re-pre-load round[1]
839 vncipher $out0,$out0,v29
840 vncipher $out1,$out1,v29
841 vncipher $out2,$out2,v29
842 vncipher $out3,$out3,v29
843 vncipher $out4,$out4,v29
844 vncipher $out5,$out5,v29
845 vncipher $out6,$out6,v29
846 vncipher $out7,$out7,v29
847 lvx v25,$x10,$key_ # re-pre-load round[2]
849 vncipher $out0,$out0,v30
850 vxor $ivec,$ivec,v31 # xor with last round key
851 vncipher $out1,$out1,v30
853 vncipher $out2,$out2,v30
855 vncipher $out3,$out3,v30
857 vncipher $out4,$out4,v30
859 vncipher $out5,$out5,v30
861 vncipher $out6,$out6,v30
863 vncipher $out7,$out7,v30
866 vncipherlast $out0,$out0,$ivec
867 vncipherlast $out1,$out1,$in0
868 lvx_u $in0,$x00,$inp # load next input block
869 vncipherlast $out2,$out2,$in1
871 vncipherlast $out3,$out3,$in2
872 le?vperm $in0,$in0,$in0,$inpperm
874 vncipherlast $out4,$out4,$in3
875 le?vperm $in1,$in1,$in1,$inpperm
877 vncipherlast $out5,$out5,$in4
878 le?vperm $in2,$in2,$in2,$inpperm
880 vncipherlast $out6,$out6,$in5
881 le?vperm $in3,$in3,$in3,$inpperm
883 vncipherlast $out7,$out7,$in6
884 le?vperm $in4,$in4,$in4,$inpperm
887 le?vperm $in5,$in5,$in5,$inpperm
891 le?vperm $out0,$out0,$out0,$inpperm
892 le?vperm $out1,$out1,$out1,$inpperm
893 stvx_u $out0,$x00,$out
894 le?vperm $in6,$in6,$in6,$inpperm
895 vxor $out0,$in0,$rndkey0
896 le?vperm $out2,$out2,$out2,$inpperm
897 stvx_u $out1,$x10,$out
898 le?vperm $in7,$in7,$in7,$inpperm
899 vxor $out1,$in1,$rndkey0
900 le?vperm $out3,$out3,$out3,$inpperm
901 stvx_u $out2,$x20,$out
902 vxor $out2,$in2,$rndkey0
903 le?vperm $out4,$out4,$out4,$inpperm
904 stvx_u $out3,$x30,$out
905 vxor $out3,$in3,$rndkey0
906 le?vperm $out5,$out5,$out5,$inpperm
907 stvx_u $out4,$x40,$out
908 vxor $out4,$in4,$rndkey0
909 le?vperm $out6,$out6,$out6,$inpperm
910 stvx_u $out5,$x50,$out
911 vxor $out5,$in5,$rndkey0
912 le?vperm $out7,$out7,$out7,$inpperm
913 stvx_u $out6,$x60,$out
914 vxor $out6,$in6,$rndkey0
915 stvx_u $out7,$x70,$out
917 vxor $out7,$in7,$rndkey0
920 beq Loop_cbc_dec8x # did $len-=128 borrow?
927 Loop_cbc_dec8x_tail: # up to 7 "words" tail...
928 vncipher $out1,$out1,v24
929 vncipher $out2,$out2,v24
930 vncipher $out3,$out3,v24
931 vncipher $out4,$out4,v24
932 vncipher $out5,$out5,v24
933 vncipher $out6,$out6,v24
934 vncipher $out7,$out7,v24
935 lvx v24,$x20,$key_ # round[3]
936 addi $key_,$key_,0x20
938 vncipher $out1,$out1,v25
939 vncipher $out2,$out2,v25
940 vncipher $out3,$out3,v25
941 vncipher $out4,$out4,v25
942 vncipher $out5,$out5,v25
943 vncipher $out6,$out6,v25
944 vncipher $out7,$out7,v25
945 lvx v25,$x10,$key_ # round[4]
946 bdnz Loop_cbc_dec8x_tail
948 vncipher $out1,$out1,v24
949 vncipher $out2,$out2,v24
950 vncipher $out3,$out3,v24
951 vncipher $out4,$out4,v24
952 vncipher $out5,$out5,v24
953 vncipher $out6,$out6,v24
954 vncipher $out7,$out7,v24
956 vncipher $out1,$out1,v25
957 vncipher $out2,$out2,v25
958 vncipher $out3,$out3,v25
959 vncipher $out4,$out4,v25
960 vncipher $out5,$out5,v25
961 vncipher $out6,$out6,v25
962 vncipher $out7,$out7,v25
964 vncipher $out1,$out1,v26
965 vncipher $out2,$out2,v26
966 vncipher $out3,$out3,v26
967 vncipher $out4,$out4,v26
968 vncipher $out5,$out5,v26
969 vncipher $out6,$out6,v26
970 vncipher $out7,$out7,v26
972 vncipher $out1,$out1,v27
973 vncipher $out2,$out2,v27
974 vncipher $out3,$out3,v27
975 vncipher $out4,$out4,v27
976 vncipher $out5,$out5,v27
977 vncipher $out6,$out6,v27
978 vncipher $out7,$out7,v27
980 vncipher $out1,$out1,v28
981 vncipher $out2,$out2,v28
982 vncipher $out3,$out3,v28
983 vncipher $out4,$out4,v28
984 vncipher $out5,$out5,v28
985 vncipher $out6,$out6,v28
986 vncipher $out7,$out7,v28
988 vncipher $out1,$out1,v29
989 vncipher $out2,$out2,v29
990 vncipher $out3,$out3,v29
991 vncipher $out4,$out4,v29
992 vncipher $out5,$out5,v29
993 vncipher $out6,$out6,v29
994 vncipher $out7,$out7,v29
996 vncipher $out1,$out1,v30
997 vxor $ivec,$ivec,v31 # last round key
998 vncipher $out2,$out2,v30
1000 vncipher $out3,$out3,v30
1002 vncipher $out4,$out4,v30
1004 vncipher $out5,$out5,v30
1006 vncipher $out6,$out6,v30
1008 vncipher $out7,$out7,v30
1011 cmplwi $len,32 # switch($len)
1016 blt Lcbc_dec8x_three
1025 vncipherlast $out1,$out1,$ivec
1026 vncipherlast $out2,$out2,$in1
1027 vncipherlast $out3,$out3,$in2
1028 vncipherlast $out4,$out4,$in3
1029 vncipherlast $out5,$out5,$in4
1030 vncipherlast $out6,$out6,$in5
1031 vncipherlast $out7,$out7,$in6
1034 le?vperm $out1,$out1,$out1,$inpperm
1035 le?vperm $out2,$out2,$out2,$inpperm
1036 stvx_u $out1,$x00,$out
1037 le?vperm $out3,$out3,$out3,$inpperm
1038 stvx_u $out2,$x10,$out
1039 le?vperm $out4,$out4,$out4,$inpperm
1040 stvx_u $out3,$x20,$out
1041 le?vperm $out5,$out5,$out5,$inpperm
1042 stvx_u $out4,$x30,$out
1043 le?vperm $out6,$out6,$out6,$inpperm
1044 stvx_u $out5,$x40,$out
1045 le?vperm $out7,$out7,$out7,$inpperm
1046 stvx_u $out6,$x50,$out
1047 stvx_u $out7,$x60,$out
1053 vncipherlast $out2,$out2,$ivec
1054 vncipherlast $out3,$out3,$in2
1055 vncipherlast $out4,$out4,$in3
1056 vncipherlast $out5,$out5,$in4
1057 vncipherlast $out6,$out6,$in5
1058 vncipherlast $out7,$out7,$in6
1061 le?vperm $out2,$out2,$out2,$inpperm
1062 le?vperm $out3,$out3,$out3,$inpperm
1063 stvx_u $out2,$x00,$out
1064 le?vperm $out4,$out4,$out4,$inpperm
1065 stvx_u $out3,$x10,$out
1066 le?vperm $out5,$out5,$out5,$inpperm
1067 stvx_u $out4,$x20,$out
1068 le?vperm $out6,$out6,$out6,$inpperm
1069 stvx_u $out5,$x30,$out
1070 le?vperm $out7,$out7,$out7,$inpperm
1071 stvx_u $out6,$x40,$out
1072 stvx_u $out7,$x50,$out
1078 vncipherlast $out3,$out3,$ivec
1079 vncipherlast $out4,$out4,$in3
1080 vncipherlast $out5,$out5,$in4
1081 vncipherlast $out6,$out6,$in5
1082 vncipherlast $out7,$out7,$in6
1085 le?vperm $out3,$out3,$out3,$inpperm
1086 le?vperm $out4,$out4,$out4,$inpperm
1087 stvx_u $out3,$x00,$out
1088 le?vperm $out5,$out5,$out5,$inpperm
1089 stvx_u $out4,$x10,$out
1090 le?vperm $out6,$out6,$out6,$inpperm
1091 stvx_u $out5,$x20,$out
1092 le?vperm $out7,$out7,$out7,$inpperm
1093 stvx_u $out6,$x30,$out
1094 stvx_u $out7,$x40,$out
1100 vncipherlast $out4,$out4,$ivec
1101 vncipherlast $out5,$out5,$in4
1102 vncipherlast $out6,$out6,$in5
1103 vncipherlast $out7,$out7,$in6
1106 le?vperm $out4,$out4,$out4,$inpperm
1107 le?vperm $out5,$out5,$out5,$inpperm
1108 stvx_u $out4,$x00,$out
1109 le?vperm $out6,$out6,$out6,$inpperm
1110 stvx_u $out5,$x10,$out
1111 le?vperm $out7,$out7,$out7,$inpperm
1112 stvx_u $out6,$x20,$out
1113 stvx_u $out7,$x30,$out
1119 vncipherlast $out5,$out5,$ivec
1120 vncipherlast $out6,$out6,$in5
1121 vncipherlast $out7,$out7,$in6
1124 le?vperm $out5,$out5,$out5,$inpperm
1125 le?vperm $out6,$out6,$out6,$inpperm
1126 stvx_u $out5,$x00,$out
1127 le?vperm $out7,$out7,$out7,$inpperm
1128 stvx_u $out6,$x10,$out
1129 stvx_u $out7,$x20,$out
1135 vncipherlast $out6,$out6,$ivec
1136 vncipherlast $out7,$out7,$in6
1139 le?vperm $out6,$out6,$out6,$inpperm
1140 le?vperm $out7,$out7,$out7,$inpperm
1141 stvx_u $out6,$x00,$out
1142 stvx_u $out7,$x10,$out
1148 vncipherlast $out7,$out7,$ivec
1151 le?vperm $out7,$out7,$out7,$inpperm
1156 le?vperm $ivec,$ivec,$ivec,$inpperm
1157 stvx_u $ivec,0,$ivp # write [unaligned] iv
1161 stvx $inpperm,r10,$sp # wipe copies of round keys
1163 stvx $inpperm,r11,$sp
1165 stvx $inpperm,r10,$sp
1167 stvx $inpperm,r11,$sp
1169 stvx $inpperm,r10,$sp
1171 stvx $inpperm,r11,$sp
1173 stvx $inpperm,r10,$sp
1175 stvx $inpperm,r11,$sp
1179 lvx v20,r10,$sp # ABI says so
1201 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1202 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1203 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1204 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1205 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1206 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1207 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1210 .byte 0,12,0x14,0,0x80,6,6,0
1212 .size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1216 #########################################################################
1217 {{{ # CTR procedure[s] #
1218 my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1219 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
1220 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1225 .globl .${prefix}_ctr32_encrypt_blocks
1227 .${prefix}_ctr32_encrypt_blocks:
1236 vxor $rndkey0,$rndkey0,$rndkey0
1237 le?vspltisb $tmp,0x0f
1239 lvx $ivec,0,$ivp # load [unaligned] iv
1240 lvsl $inpperm,0,$ivp
1241 lvx $inptail,$idx,$ivp
1243 le?vxor $inpperm,$inpperm,$tmp
1244 vperm $ivec,$ivec,$inptail,$inpperm
1245 vsldoi $one,$rndkey0,$one,1
1248 ?lvsl $keyperm,0,$key # prepare for unaligned key
1249 lwz $rounds,240($key)
1251 lvsr $inpperm,0,r11 # prepare for unaligned load
1253 addi $inp,$inp,15 # 15 is not typo
1254 le?vxor $inpperm,$inpperm,$tmp
1256 srwi $rounds,$rounds,1
1258 subi $rounds,$rounds,1
1261 bge _aesp8_ctr32_encrypt8x
1263 ?lvsr $outperm,0,$out # prepare for unaligned store
1264 vspltisb $outmask,-1
1266 ?vperm $outmask,$rndkey0,$outmask,$outperm
1267 le?vxor $outperm,$outperm,$tmp
1271 lvx $rndkey1,$idx,$key
1273 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1274 vxor $inout,$ivec,$rndkey0
1275 lvx $rndkey0,$idx,$key
1281 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1282 vcipher $inout,$inout,$rndkey1
1283 lvx $rndkey1,$idx,$key
1285 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1286 vcipher $inout,$inout,$rndkey0
1287 lvx $rndkey0,$idx,$key
1291 vadduwm $ivec,$ivec,$one
1295 subic. $len,$len,1 # blocks--
1297 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1298 vcipher $inout,$inout,$rndkey1
1299 lvx $rndkey1,$idx,$key
1300 vperm $dat,$dat,$inptail,$inpperm
1302 ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
1304 vxor $dat,$dat,$rndkey1 # last round key
1305 vcipherlast $inout,$inout,$dat
1307 lvx $rndkey1,$idx,$key
1309 vperm $inout,$inout,$inout,$outperm
1310 vsel $dat,$outhead,$inout,$outmask
1312 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1314 vxor $inout,$ivec,$rndkey0
1315 lvx $rndkey0,$idx,$key
1322 lvx $inout,0,$out # redundant in aligned case
1323 vsel $inout,$outhead,$inout,$outmask
1329 .byte 0,12,0x14,0,0,0,6,0
1332 #########################################################################
1333 {{ # Optimized CTR procedure #
1335 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1336 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1337 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1338 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
1339 # v26-v31 last 6 round keys
1340 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1341 my ($two,$three,$four)=($outhead,$outperm,$outmask);
1345 _aesp8_ctr32_encrypt8x:
1346 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1347 li r10,`$FRAME+8*16+15`
1348 li r11,`$FRAME+8*16+31`
1349 stvx v20,r10,$sp # ABI says so
1372 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
1374 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1376 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1378 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1380 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1382 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1384 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1388 subi $rounds,$rounds,3 # -4 in total
1390 lvx $rndkey0,$x00,$key # load key schedule
1394 ?vperm $rndkey0,$rndkey0,v30,$keyperm
1395 addi $key_,$sp,$FRAME+15
1399 ?vperm v24,v30,v31,$keyperm
1402 stvx v24,$x00,$key_ # off-load round[1]
1403 ?vperm v25,v31,v30,$keyperm
1405 stvx v25,$x10,$key_ # off-load round[2]
1406 addi $key_,$key_,0x20
1407 bdnz Load_ctr32_enc_key
1410 ?vperm v24,v30,v31,$keyperm
1412 stvx v24,$x00,$key_ # off-load round[3]
1413 ?vperm v25,v31,v26,$keyperm
1415 stvx v25,$x10,$key_ # off-load round[4]
1416 addi $key_,$sp,$FRAME+15 # rewind $key_
1417 ?vperm v26,v26,v27,$keyperm
1419 ?vperm v27,v27,v28,$keyperm
1421 ?vperm v28,v28,v29,$keyperm
1423 ?vperm v29,v29,v30,$keyperm
1424 lvx $out0,$x70,$key # borrow $out0
1425 ?vperm v30,v30,v31,$keyperm
1426 lvx v24,$x00,$key_ # pre-load round[1]
1427 ?vperm v31,v31,$out0,$keyperm
1428 lvx v25,$x10,$key_ # pre-load round[2]
1430 vadduwm $two,$one,$one
1431 subi $inp,$inp,15 # undo "caller"
1434 vadduwm $out1,$ivec,$one # counter values ...
1435 vadduwm $out2,$ivec,$two
1436 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1438 vadduwm $out3,$out1,$two
1439 vxor $out1,$out1,$rndkey0
1440 le?lvsl $inpperm,0,$idx
1441 vadduwm $out4,$out2,$two
1442 vxor $out2,$out2,$rndkey0
1443 le?vspltisb $tmp,0x0f
1444 vadduwm $out5,$out3,$two
1445 vxor $out3,$out3,$rndkey0
1446 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
1447 vadduwm $out6,$out4,$two
1448 vxor $out4,$out4,$rndkey0
1449 vadduwm $out7,$out5,$two
1450 vxor $out5,$out5,$rndkey0
1451 vadduwm $ivec,$out6,$two # next counter value
1452 vxor $out6,$out6,$rndkey0
1453 vxor $out7,$out7,$rndkey0
1459 vcipher $out0,$out0,v24
1460 vcipher $out1,$out1,v24
1461 vcipher $out2,$out2,v24
1462 vcipher $out3,$out3,v24
1463 vcipher $out4,$out4,v24
1464 vcipher $out5,$out5,v24
1465 vcipher $out6,$out6,v24
1466 vcipher $out7,$out7,v24
1467 Loop_ctr32_enc8x_middle:
1468 lvx v24,$x20,$key_ # round[3]
1469 addi $key_,$key_,0x20
1471 vcipher $out0,$out0,v25
1472 vcipher $out1,$out1,v25
1473 vcipher $out2,$out2,v25
1474 vcipher $out3,$out3,v25
1475 vcipher $out4,$out4,v25
1476 vcipher $out5,$out5,v25
1477 vcipher $out6,$out6,v25
1478 vcipher $out7,$out7,v25
1479 lvx v25,$x10,$key_ # round[4]
1480 bdnz Loop_ctr32_enc8x
1482 subic r11,$len,256 # $len-256, borrow $key_
1483 vcipher $out0,$out0,v24
1484 vcipher $out1,$out1,v24
1485 vcipher $out2,$out2,v24
1486 vcipher $out3,$out3,v24
1487 vcipher $out4,$out4,v24
1488 vcipher $out5,$out5,v24
1489 vcipher $out6,$out6,v24
1490 vcipher $out7,$out7,v24
1492 subfe r0,r0,r0 # borrow?-1:0
1493 vcipher $out0,$out0,v25
1494 vcipher $out1,$out1,v25
1495 vcipher $out2,$out2,v25
1496 vcipher $out3,$out3,v25
1497 vcipher $out4,$out4,v25
1498 vcipher $out5,$out5,v25
1499 vcipher $out6,$out6,v25
1500 vcipher $out7,$out7,v25
1503 addi $key_,$sp,$FRAME+15 # rewind $key_
1504 vcipher $out0,$out0,v26
1505 vcipher $out1,$out1,v26
1506 vcipher $out2,$out2,v26
1507 vcipher $out3,$out3,v26
1508 vcipher $out4,$out4,v26
1509 vcipher $out5,$out5,v26
1510 vcipher $out6,$out6,v26
1511 vcipher $out7,$out7,v26
1512 lvx v24,$x00,$key_ # re-pre-load round[1]
1514 subic $len,$len,129 # $len-=129
1515 vcipher $out0,$out0,v27
1516 addi $len,$len,1 # $len-=128 really
1517 vcipher $out1,$out1,v27
1518 vcipher $out2,$out2,v27
1519 vcipher $out3,$out3,v27
1520 vcipher $out4,$out4,v27
1521 vcipher $out5,$out5,v27
1522 vcipher $out6,$out6,v27
1523 vcipher $out7,$out7,v27
1524 lvx v25,$x10,$key_ # re-pre-load round[2]
1526 vcipher $out0,$out0,v28
1527 lvx_u $in0,$x00,$inp # load input
1528 vcipher $out1,$out1,v28
1529 lvx_u $in1,$x10,$inp
1530 vcipher $out2,$out2,v28
1531 lvx_u $in2,$x20,$inp
1532 vcipher $out3,$out3,v28
1533 lvx_u $in3,$x30,$inp
1534 vcipher $out4,$out4,v28
1535 lvx_u $in4,$x40,$inp
1536 vcipher $out5,$out5,v28
1537 lvx_u $in5,$x50,$inp
1538 vcipher $out6,$out6,v28
1539 lvx_u $in6,$x60,$inp
1540 vcipher $out7,$out7,v28
1541 lvx_u $in7,$x70,$inp
1544 vcipher $out0,$out0,v29
1545 le?vperm $in0,$in0,$in0,$inpperm
1546 vcipher $out1,$out1,v29
1547 le?vperm $in1,$in1,$in1,$inpperm
1548 vcipher $out2,$out2,v29
1549 le?vperm $in2,$in2,$in2,$inpperm
1550 vcipher $out3,$out3,v29
1551 le?vperm $in3,$in3,$in3,$inpperm
1552 vcipher $out4,$out4,v29
1553 le?vperm $in4,$in4,$in4,$inpperm
1554 vcipher $out5,$out5,v29
1555 le?vperm $in5,$in5,$in5,$inpperm
1556 vcipher $out6,$out6,v29
1557 le?vperm $in6,$in6,$in6,$inpperm
1558 vcipher $out7,$out7,v29
1559 le?vperm $in7,$in7,$in7,$inpperm
1561 add $inp,$inp,r0 # $inp is adjusted in such
1562 # way that at exit from the
1563 # loop inX-in7 are loaded
1565 subfe. r0,r0,r0 # borrow?-1:0
1566 vcipher $out0,$out0,v30
1567 vxor $in0,$in0,v31 # xor with last round key
1568 vcipher $out1,$out1,v30
1570 vcipher $out2,$out2,v30
1572 vcipher $out3,$out3,v30
1574 vcipher $out4,$out4,v30
1576 vcipher $out5,$out5,v30
1578 vcipher $out6,$out6,v30
1580 vcipher $out7,$out7,v30
1583 bne Lctr32_enc8x_break # did $len-129 borrow?
1585 vcipherlast $in0,$out0,$in0
1586 vcipherlast $in1,$out1,$in1
1587 vadduwm $out1,$ivec,$one # counter values ...
1588 vcipherlast $in2,$out2,$in2
1589 vadduwm $out2,$ivec,$two
1590 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1591 vcipherlast $in3,$out3,$in3
1592 vadduwm $out3,$out1,$two
1593 vxor $out1,$out1,$rndkey0
1594 vcipherlast $in4,$out4,$in4
1595 vadduwm $out4,$out2,$two
1596 vxor $out2,$out2,$rndkey0
1597 vcipherlast $in5,$out5,$in5
1598 vadduwm $out5,$out3,$two
1599 vxor $out3,$out3,$rndkey0
1600 vcipherlast $in6,$out6,$in6
1601 vadduwm $out6,$out4,$two
1602 vxor $out4,$out4,$rndkey0
1603 vcipherlast $in7,$out7,$in7
1604 vadduwm $out7,$out5,$two
1605 vxor $out5,$out5,$rndkey0
1606 le?vperm $in0,$in0,$in0,$inpperm
1607 vadduwm $ivec,$out6,$two # next counter value
1608 vxor $out6,$out6,$rndkey0
1609 le?vperm $in1,$in1,$in1,$inpperm
1610 vxor $out7,$out7,$rndkey0
1613 vcipher $out0,$out0,v24
1614 stvx_u $in0,$x00,$out
1615 le?vperm $in2,$in2,$in2,$inpperm
1616 vcipher $out1,$out1,v24
1617 stvx_u $in1,$x10,$out
1618 le?vperm $in3,$in3,$in3,$inpperm
1619 vcipher $out2,$out2,v24
1620 stvx_u $in2,$x20,$out
1621 le?vperm $in4,$in4,$in4,$inpperm
1622 vcipher $out3,$out3,v24
1623 stvx_u $in3,$x30,$out
1624 le?vperm $in5,$in5,$in5,$inpperm
1625 vcipher $out4,$out4,v24
1626 stvx_u $in4,$x40,$out
1627 le?vperm $in6,$in6,$in6,$inpperm
1628 vcipher $out5,$out5,v24
1629 stvx_u $in5,$x50,$out
1630 le?vperm $in7,$in7,$in7,$inpperm
1631 vcipher $out6,$out6,v24
1632 stvx_u $in6,$x60,$out
1633 vcipher $out7,$out7,v24
1634 stvx_u $in7,$x70,$out
1637 b Loop_ctr32_enc8x_middle
1642 blt Lctr32_enc8x_one
1644 beq Lctr32_enc8x_two
1646 blt Lctr32_enc8x_three
1648 beq Lctr32_enc8x_four
1650 blt Lctr32_enc8x_five
1652 beq Lctr32_enc8x_six
1654 blt Lctr32_enc8x_seven
1657 vcipherlast $out0,$out0,$in0
1658 vcipherlast $out1,$out1,$in1
1659 vcipherlast $out2,$out2,$in2
1660 vcipherlast $out3,$out3,$in3
1661 vcipherlast $out4,$out4,$in4
1662 vcipherlast $out5,$out5,$in5
1663 vcipherlast $out6,$out6,$in6
1664 vcipherlast $out7,$out7,$in7
1666 le?vperm $out0,$out0,$out0,$inpperm
1667 le?vperm $out1,$out1,$out1,$inpperm
1668 stvx_u $out0,$x00,$out
1669 le?vperm $out2,$out2,$out2,$inpperm
1670 stvx_u $out1,$x10,$out
1671 le?vperm $out3,$out3,$out3,$inpperm
1672 stvx_u $out2,$x20,$out
1673 le?vperm $out4,$out4,$out4,$inpperm
1674 stvx_u $out3,$x30,$out
1675 le?vperm $out5,$out5,$out5,$inpperm
1676 stvx_u $out4,$x40,$out
1677 le?vperm $out6,$out6,$out6,$inpperm
1678 stvx_u $out5,$x50,$out
1679 le?vperm $out7,$out7,$out7,$inpperm
1680 stvx_u $out6,$x60,$out
1681 stvx_u $out7,$x70,$out
1687 vcipherlast $out0,$out0,$in1
1688 vcipherlast $out1,$out1,$in2
1689 vcipherlast $out2,$out2,$in3
1690 vcipherlast $out3,$out3,$in4
1691 vcipherlast $out4,$out4,$in5
1692 vcipherlast $out5,$out5,$in6
1693 vcipherlast $out6,$out6,$in7
1695 le?vperm $out0,$out0,$out0,$inpperm
1696 le?vperm $out1,$out1,$out1,$inpperm
1697 stvx_u $out0,$x00,$out
1698 le?vperm $out2,$out2,$out2,$inpperm
1699 stvx_u $out1,$x10,$out
1700 le?vperm $out3,$out3,$out3,$inpperm
1701 stvx_u $out2,$x20,$out
1702 le?vperm $out4,$out4,$out4,$inpperm
1703 stvx_u $out3,$x30,$out
1704 le?vperm $out5,$out5,$out5,$inpperm
1705 stvx_u $out4,$x40,$out
1706 le?vperm $out6,$out6,$out6,$inpperm
1707 stvx_u $out5,$x50,$out
1708 stvx_u $out6,$x60,$out
1714 vcipherlast $out0,$out0,$in2
1715 vcipherlast $out1,$out1,$in3
1716 vcipherlast $out2,$out2,$in4
1717 vcipherlast $out3,$out3,$in5
1718 vcipherlast $out4,$out4,$in6
1719 vcipherlast $out5,$out5,$in7
1721 le?vperm $out0,$out0,$out0,$inpperm
1722 le?vperm $out1,$out1,$out1,$inpperm
1723 stvx_u $out0,$x00,$out
1724 le?vperm $out2,$out2,$out2,$inpperm
1725 stvx_u $out1,$x10,$out
1726 le?vperm $out3,$out3,$out3,$inpperm
1727 stvx_u $out2,$x20,$out
1728 le?vperm $out4,$out4,$out4,$inpperm
1729 stvx_u $out3,$x30,$out
1730 le?vperm $out5,$out5,$out5,$inpperm
1731 stvx_u $out4,$x40,$out
1732 stvx_u $out5,$x50,$out
1738 vcipherlast $out0,$out0,$in3
1739 vcipherlast $out1,$out1,$in4
1740 vcipherlast $out2,$out2,$in5
1741 vcipherlast $out3,$out3,$in6
1742 vcipherlast $out4,$out4,$in7
1744 le?vperm $out0,$out0,$out0,$inpperm
1745 le?vperm $out1,$out1,$out1,$inpperm
1746 stvx_u $out0,$x00,$out
1747 le?vperm $out2,$out2,$out2,$inpperm
1748 stvx_u $out1,$x10,$out
1749 le?vperm $out3,$out3,$out3,$inpperm
1750 stvx_u $out2,$x20,$out
1751 le?vperm $out4,$out4,$out4,$inpperm
1752 stvx_u $out3,$x30,$out
1753 stvx_u $out4,$x40,$out
1759 vcipherlast $out0,$out0,$in4
1760 vcipherlast $out1,$out1,$in5
1761 vcipherlast $out2,$out2,$in6
1762 vcipherlast $out3,$out3,$in7
1764 le?vperm $out0,$out0,$out0,$inpperm
1765 le?vperm $out1,$out1,$out1,$inpperm
1766 stvx_u $out0,$x00,$out
1767 le?vperm $out2,$out2,$out2,$inpperm
1768 stvx_u $out1,$x10,$out
1769 le?vperm $out3,$out3,$out3,$inpperm
1770 stvx_u $out2,$x20,$out
1771 stvx_u $out3,$x30,$out
1777 vcipherlast $out0,$out0,$in5
1778 vcipherlast $out1,$out1,$in6
1779 vcipherlast $out2,$out2,$in7
1781 le?vperm $out0,$out0,$out0,$inpperm
1782 le?vperm $out1,$out1,$out1,$inpperm
1783 stvx_u $out0,$x00,$out
1784 le?vperm $out2,$out2,$out2,$inpperm
1785 stvx_u $out1,$x10,$out
1786 stvx_u $out2,$x20,$out
1792 vcipherlast $out0,$out0,$in6
1793 vcipherlast $out1,$out1,$in7
1795 le?vperm $out0,$out0,$out0,$inpperm
1796 le?vperm $out1,$out1,$out1,$inpperm
1797 stvx_u $out0,$x00,$out
1798 stvx_u $out1,$x10,$out
1804 vcipherlast $out0,$out0,$in7
1806 le?vperm $out0,$out0,$out0,$inpperm
1813 stvx $inpperm,r10,$sp # wipe copies of round keys
1815 stvx $inpperm,r11,$sp
1817 stvx $inpperm,r10,$sp
1819 stvx $inpperm,r11,$sp
1821 stvx $inpperm,r10,$sp
1823 stvx $inpperm,r11,$sp
1825 stvx $inpperm,r10,$sp
1827 stvx $inpperm,r11,$sp
1831 lvx v20,r10,$sp # ABI says so
1853 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1854 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1855 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1856 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1857 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1858 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1859 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1862 .byte 0,12,0x14,0,0x80,6,6,0
1864 .size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
1869 foreach(split("\n",$code)) {
1870 s/\`([^\`]*)\`/eval($1)/geo;
1872 # constants table endian-specific conversion
1873 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
1877 # convert to endian-agnostic format
1879 foreach (split(/,\s*/,$2)) {
1880 my $l = /^0/?oct:int;
1881 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
1884 @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
1887 # little-endian conversion
1888 if ($flavour =~ /le$/o) {
1889 SWITCH: for($conv) {
1890 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
1891 /\?rev/ && do { @bytes=reverse(@bytes); last; };
1896 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
1899 $consts=0 if (m/Lconsts:/o); # end of table
1901 # instructions prefixed with '?' are endian-specific and need
1902 # to be adjusted accordingly...
1903 if ($flavour =~ /le$/o) { # little-endian
1908 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
1909 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
1910 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
1911 } else { # big-endian