2 # Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the OpenSSL license (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
17 # This module implements support for AES instructions as per PowerISA
18 # specification version 2.07, first implemented by POWER8 processor.
19 # The module is endian-agnostic in sense that it supports both big-
20 # and little-endian cases. Data alignment in parallelizable modes is
21 # handled with VSX loads and stores, which implies MSR.VSX flag being
22 # set. It should also be noted that ISA specification doesn't prohibit
23 # alignment exceptions for these instructions on page boundaries.
24 # Initially alignment was handled in pure AltiVec/VMX way [when data
25 # is aligned programmatically, which in turn guarantees exception-
26 # free execution], but it turned to hamper performance when vcipher
27 # instructions are interleaved. It's reckoned that eventual
28 # misalignment penalties at page boundaries are in average lower
29 # than additional overhead in pure AltiVec approach.
33 # Add XTS subroutine, 9x on little- and 12x improvement on big-endian
34 # systems were measured.
36 ######################################################################
37 # Current large-block performance in cycles per byte processed with
38 # 128-bit key (less is better).
40 # CBC en-/decrypt CTR XTS
41 # POWER8[le] 3.96/0.72 0.74 1.1
42 # POWER8[be] 3.75/0.65 0.66 1.0
46 if ($flavour =~ /64/) {
54 } elsif ($flavour =~ /32/) {
62 } else { die "nonsense $flavour"; }
64 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
66 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
67 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
68 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
69 die "can't locate ppc-xlate.pl";
71 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
79 #########################################################################
80 {{{ # Key setup procedures #
81 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
82 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
83 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
92 .long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
93 .long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
94 .long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
99 mflr $ptr #vvvvv "distance between . and rcon
104 .byte 0,12,0x14,0,0,0,0,0
105 .asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
107 .globl .${prefix}_set_encrypt_key
109 .${prefix}_set_encrypt_key:
112 $PUSH r11,$LRSAVE($sp)
116 beq- Lenc_key_abort # if ($inp==0) return -1;
118 beq- Lenc_key_abort # if ($out==0) return -1;
136 addi $inp,$inp,15 # 15 is not typo
137 lvsr $key,0,r9 # borrow $key
141 le?vspltisb $mask,0x0f # borrow $mask
143 le?vxor $key,$key,$mask # adjust for byte swap
146 vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
148 vxor $zero,$zero,$zero
151 ?lvsr $outperm,0,$out
154 ?vperm $outmask,$zero,$outmask,$outperm
164 vperm $key,$in0,$in0,$mask # rotate-n-splat
165 vsldoi $tmp,$zero,$in0,12 # >>32
166 vperm $outtail,$in0,$in0,$outperm # rotate
167 vsel $stage,$outhead,$outtail,$outmask
168 vmr $outhead,$outtail
169 vcipherlast $key,$key,$rcon
174 vsldoi $tmp,$zero,$tmp,12 # >>32
176 vsldoi $tmp,$zero,$tmp,12 # >>32
178 vadduwm $rcon,$rcon,$rcon
182 lvx $rcon,0,$ptr # last two round keys
184 vperm $key,$in0,$in0,$mask # rotate-n-splat
185 vsldoi $tmp,$zero,$in0,12 # >>32
186 vperm $outtail,$in0,$in0,$outperm # rotate
187 vsel $stage,$outhead,$outtail,$outmask
188 vmr $outhead,$outtail
189 vcipherlast $key,$key,$rcon
194 vsldoi $tmp,$zero,$tmp,12 # >>32
196 vsldoi $tmp,$zero,$tmp,12 # >>32
198 vadduwm $rcon,$rcon,$rcon
201 vperm $key,$in0,$in0,$mask # rotate-n-splat
202 vsldoi $tmp,$zero,$in0,12 # >>32
203 vperm $outtail,$in0,$in0,$outperm # rotate
204 vsel $stage,$outhead,$outtail,$outmask
205 vmr $outhead,$outtail
206 vcipherlast $key,$key,$rcon
211 vsldoi $tmp,$zero,$tmp,12 # >>32
213 vsldoi $tmp,$zero,$tmp,12 # >>32
216 vperm $outtail,$in0,$in0,$outperm # rotate
217 vsel $stage,$outhead,$outtail,$outmask
218 vmr $outhead,$outtail
221 addi $inp,$out,15 # 15 is not typo
231 vperm $outtail,$in0,$in0,$outperm # rotate
232 vsel $stage,$outhead,$outtail,$outmask
233 vmr $outhead,$outtail
236 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
237 vspltisb $key,8 # borrow $key
239 vsububm $mask,$mask,$key # adjust the mask
242 vperm $key,$in1,$in1,$mask # roate-n-splat
243 vsldoi $tmp,$zero,$in0,12 # >>32
244 vcipherlast $key,$key,$rcon
247 vsldoi $tmp,$zero,$tmp,12 # >>32
249 vsldoi $tmp,$zero,$tmp,12 # >>32
252 vsldoi $stage,$zero,$in1,8
255 vsldoi $in1,$zero,$in1,12 # >>32
256 vadduwm $rcon,$rcon,$rcon
260 vsldoi $stage,$stage,$in0,8
262 vperm $key,$in1,$in1,$mask # rotate-n-splat
263 vsldoi $tmp,$zero,$in0,12 # >>32
264 vperm $outtail,$stage,$stage,$outperm # rotate
265 vsel $stage,$outhead,$outtail,$outmask
266 vmr $outhead,$outtail
267 vcipherlast $key,$key,$rcon
271 vsldoi $stage,$in0,$in1,8
273 vsldoi $tmp,$zero,$tmp,12 # >>32
274 vperm $outtail,$stage,$stage,$outperm # rotate
275 vsel $stage,$outhead,$outtail,$outmask
276 vmr $outhead,$outtail
278 vsldoi $tmp,$zero,$tmp,12 # >>32
285 vsldoi $in1,$zero,$in1,12 # >>32
286 vadduwm $rcon,$rcon,$rcon
290 vperm $outtail,$in0,$in0,$outperm # rotate
291 vsel $stage,$outhead,$outtail,$outmask
292 vmr $outhead,$outtail
294 addi $inp,$out,15 # 15 is not typo
307 vperm $outtail,$in0,$in0,$outperm # rotate
308 vsel $stage,$outhead,$outtail,$outmask
309 vmr $outhead,$outtail
312 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
316 vperm $key,$in1,$in1,$mask # rotate-n-splat
317 vsldoi $tmp,$zero,$in0,12 # >>32
318 vperm $outtail,$in1,$in1,$outperm # rotate
319 vsel $stage,$outhead,$outtail,$outmask
320 vmr $outhead,$outtail
321 vcipherlast $key,$key,$rcon
326 vsldoi $tmp,$zero,$tmp,12 # >>32
328 vsldoi $tmp,$zero,$tmp,12 # >>32
330 vadduwm $rcon,$rcon,$rcon
332 vperm $outtail,$in0,$in0,$outperm # rotate
333 vsel $stage,$outhead,$outtail,$outmask
334 vmr $outhead,$outtail
336 addi $inp,$out,15 # 15 is not typo
340 vspltw $key,$in0,3 # just splat
341 vsldoi $tmp,$zero,$in1,12 # >>32
345 vsldoi $tmp,$zero,$tmp,12 # >>32
347 vsldoi $tmp,$zero,$tmp,12 # >>32
355 lvx $in1,0,$inp # redundant in aligned case
356 vsel $in1,$outhead,$in1,$outmask
366 .byte 0,12,0x14,1,0,0,3,0
368 .size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
370 .globl .${prefix}_set_decrypt_key
372 .${prefix}_set_decrypt_key:
373 $STU $sp,-$FRAME($sp)
375 $PUSH r10,$FRAME+$LRSAVE($sp)
383 subi $inp,$out,240 # first round key
384 srwi $rounds,$rounds,1
385 add $out,$inp,$cnt # last round key
409 xor r3,r3,r3 # return value
414 .byte 0,12,4,1,0x80,0,3,0
416 .size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
419 #########################################################################
420 {{{ # Single block en- and decrypt procedures #
423 my $n = $dir eq "de" ? "n" : "";
424 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
427 .globl .${prefix}_${dir}crypt
429 .${prefix}_${dir}crypt:
430 lwz $rounds,240($key)
433 li $idx,15 # 15 is not typo
439 lvsl v2,0,$inp # inpperm
441 ?lvsl v3,0,r11 # outperm
444 vperm v0,v0,v1,v2 # align [and byte swap in LE]
446 ?lvsl v5,0,$key # keyperm
447 srwi $rounds,$rounds,1
450 subi $rounds,$rounds,1
451 ?vperm v1,v1,v2,v5 # align round key
473 v${n}cipherlast v0,v0,v1
477 li $idx,15 # 15 is not typo
478 ?vperm v2,v1,v2,v3 # outmask
480 lvx v1,0,$out # outhead
481 vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
491 .byte 0,12,0x14,0,0,0,3,0
493 .size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
499 #########################################################################
500 {{{ # CBC en- and decrypt procedures #
501 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
502 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
503 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
506 .globl .${prefix}_cbc_encrypt
508 .${prefix}_cbc_encrypt:
512 cmpwi $enc,0 # test direction
518 vxor $rndkey0,$rndkey0,$rndkey0
519 le?vspltisb $tmp,0x0f
521 lvx $ivec,0,$ivp # load [unaligned] iv
523 lvx $inptail,$idx,$ivp
524 le?vxor $inpperm,$inpperm,$tmp
525 vperm $ivec,$ivec,$inptail,$inpperm
528 ?lvsl $keyperm,0,$key # prepare for unaligned key
529 lwz $rounds,240($key)
531 lvsr $inpperm,0,r11 # prepare for unaligned load
533 addi $inp,$inp,15 # 15 is not typo
534 le?vxor $inpperm,$inpperm,$tmp
536 ?lvsr $outperm,0,$out # prepare for unaligned store
539 ?vperm $outmask,$rndkey0,$outmask,$outperm
540 le?vxor $outperm,$outperm,$tmp
542 srwi $rounds,$rounds,1
544 subi $rounds,$rounds,1
552 subi $len,$len,16 # len-=16
555 vperm $inout,$inout,$inptail,$inpperm
556 lvx $rndkey1,$idx,$key
558 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
559 vxor $inout,$inout,$rndkey0
560 lvx $rndkey0,$idx,$key
562 vxor $inout,$inout,$ivec
565 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
566 vcipher $inout,$inout,$rndkey1
567 lvx $rndkey1,$idx,$key
569 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
570 vcipher $inout,$inout,$rndkey0
571 lvx $rndkey0,$idx,$key
575 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
576 vcipher $inout,$inout,$rndkey1
577 lvx $rndkey1,$idx,$key
579 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
580 vcipherlast $ivec,$inout,$rndkey0
583 vperm $tmp,$ivec,$ivec,$outperm
584 vsel $inout,$outhead,$tmp,$outmask
595 bge _aesp8_cbc_decrypt8x
600 subi $len,$len,16 # len-=16
603 vperm $tmp,$tmp,$inptail,$inpperm
604 lvx $rndkey1,$idx,$key
606 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
607 vxor $inout,$tmp,$rndkey0
608 lvx $rndkey0,$idx,$key
612 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
613 vncipher $inout,$inout,$rndkey1
614 lvx $rndkey1,$idx,$key
616 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
617 vncipher $inout,$inout,$rndkey0
618 lvx $rndkey0,$idx,$key
622 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
623 vncipher $inout,$inout,$rndkey1
624 lvx $rndkey1,$idx,$key
626 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
627 vncipherlast $inout,$inout,$rndkey0
630 vxor $inout,$inout,$ivec
632 vperm $tmp,$inout,$inout,$outperm
633 vsel $inout,$outhead,$tmp,$outmask
641 lvx $inout,0,$out # redundant in aligned case
642 vsel $inout,$outhead,$inout,$outmask
645 neg $enc,$ivp # write [unaligned] iv
646 li $idx,15 # 15 is not typo
647 vxor $rndkey0,$rndkey0,$rndkey0
649 le?vspltisb $tmp,0x0f
650 ?lvsl $outperm,0,$enc
651 ?vperm $outmask,$rndkey0,$outmask,$outperm
652 le?vxor $outperm,$outperm,$tmp
654 vperm $ivec,$ivec,$ivec,$outperm
655 vsel $inout,$outhead,$ivec,$outmask
656 lvx $inptail,$idx,$ivp
658 vsel $inout,$ivec,$inptail,$outmask
659 stvx $inout,$idx,$ivp
664 .byte 0,12,0x14,0,0,0,6,0
667 #########################################################################
668 {{ # Optimized CBC decrypt procedure #
670 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
671 $x00=0 if ($flavour =~ /osx/);
672 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
673 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
674 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
675 # v26-v31 last 6 round keys
676 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
680 _aesp8_cbc_decrypt8x:
681 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
682 li r10,`$FRAME+8*16+15`
683 li r11,`$FRAME+8*16+31`
684 stvx v20,r10,$sp # ABI says so
707 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
709 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
711 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
713 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
715 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
717 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
719 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
723 subi $rounds,$rounds,3 # -4 in total
724 subi $len,$len,128 # bias
726 lvx $rndkey0,$x00,$key # load key schedule
730 ?vperm $rndkey0,$rndkey0,v30,$keyperm
731 addi $key_,$sp,$FRAME+15
735 ?vperm v24,v30,v31,$keyperm
738 stvx v24,$x00,$key_ # off-load round[1]
739 ?vperm v25,v31,v30,$keyperm
741 stvx v25,$x10,$key_ # off-load round[2]
742 addi $key_,$key_,0x20
743 bdnz Load_cbc_dec_key
746 ?vperm v24,v30,v31,$keyperm
748 stvx v24,$x00,$key_ # off-load round[3]
749 ?vperm v25,v31,v26,$keyperm
751 stvx v25,$x10,$key_ # off-load round[4]
752 addi $key_,$sp,$FRAME+15 # rewind $key_
753 ?vperm v26,v26,v27,$keyperm
755 ?vperm v27,v27,v28,$keyperm
757 ?vperm v28,v28,v29,$keyperm
759 ?vperm v29,v29,v30,$keyperm
760 lvx $out0,$x70,$key # borrow $out0
761 ?vperm v30,v30,v31,$keyperm
762 lvx v24,$x00,$key_ # pre-load round[1]
763 ?vperm v31,v31,$out0,$keyperm
764 lvx v25,$x10,$key_ # pre-load round[2]
766 #lvx $inptail,0,$inp # "caller" already did this
767 #addi $inp,$inp,15 # 15 is not typo
768 subi $inp,$inp,15 # undo "caller"
771 lvx_u $in0,$x00,$inp # load first 8 "words"
772 le?lvsl $inpperm,0,$idx
773 le?vspltisb $tmp,0x0f
775 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
777 le?vperm $in0,$in0,$in0,$inpperm
779 le?vperm $in1,$in1,$in1,$inpperm
781 le?vperm $in2,$in2,$in2,$inpperm
782 vxor $out0,$in0,$rndkey0
784 le?vperm $in3,$in3,$in3,$inpperm
785 vxor $out1,$in1,$rndkey0
787 le?vperm $in4,$in4,$in4,$inpperm
788 vxor $out2,$in2,$rndkey0
791 le?vperm $in5,$in5,$in5,$inpperm
792 vxor $out3,$in3,$rndkey0
793 le?vperm $in6,$in6,$in6,$inpperm
794 vxor $out4,$in4,$rndkey0
795 le?vperm $in7,$in7,$in7,$inpperm
796 vxor $out5,$in5,$rndkey0
797 vxor $out6,$in6,$rndkey0
798 vxor $out7,$in7,$rndkey0
804 vncipher $out0,$out0,v24
805 vncipher $out1,$out1,v24
806 vncipher $out2,$out2,v24
807 vncipher $out3,$out3,v24
808 vncipher $out4,$out4,v24
809 vncipher $out5,$out5,v24
810 vncipher $out6,$out6,v24
811 vncipher $out7,$out7,v24
812 lvx v24,$x20,$key_ # round[3]
813 addi $key_,$key_,0x20
815 vncipher $out0,$out0,v25
816 vncipher $out1,$out1,v25
817 vncipher $out2,$out2,v25
818 vncipher $out3,$out3,v25
819 vncipher $out4,$out4,v25
820 vncipher $out5,$out5,v25
821 vncipher $out6,$out6,v25
822 vncipher $out7,$out7,v25
823 lvx v25,$x10,$key_ # round[4]
826 subic $len,$len,128 # $len-=128
827 vncipher $out0,$out0,v24
828 vncipher $out1,$out1,v24
829 vncipher $out2,$out2,v24
830 vncipher $out3,$out3,v24
831 vncipher $out4,$out4,v24
832 vncipher $out5,$out5,v24
833 vncipher $out6,$out6,v24
834 vncipher $out7,$out7,v24
836 subfe. r0,r0,r0 # borrow?-1:0
837 vncipher $out0,$out0,v25
838 vncipher $out1,$out1,v25
839 vncipher $out2,$out2,v25
840 vncipher $out3,$out3,v25
841 vncipher $out4,$out4,v25
842 vncipher $out5,$out5,v25
843 vncipher $out6,$out6,v25
844 vncipher $out7,$out7,v25
847 vncipher $out0,$out0,v26
848 vncipher $out1,$out1,v26
849 vncipher $out2,$out2,v26
850 vncipher $out3,$out3,v26
851 vncipher $out4,$out4,v26
852 vncipher $out5,$out5,v26
853 vncipher $out6,$out6,v26
854 vncipher $out7,$out7,v26
856 add $inp,$inp,r0 # $inp is adjusted in such
857 # way that at exit from the
858 # loop inX-in7 are loaded
860 vncipher $out0,$out0,v27
861 vncipher $out1,$out1,v27
862 vncipher $out2,$out2,v27
863 vncipher $out3,$out3,v27
864 vncipher $out4,$out4,v27
865 vncipher $out5,$out5,v27
866 vncipher $out6,$out6,v27
867 vncipher $out7,$out7,v27
869 addi $key_,$sp,$FRAME+15 # rewind $key_
870 vncipher $out0,$out0,v28
871 vncipher $out1,$out1,v28
872 vncipher $out2,$out2,v28
873 vncipher $out3,$out3,v28
874 vncipher $out4,$out4,v28
875 vncipher $out5,$out5,v28
876 vncipher $out6,$out6,v28
877 vncipher $out7,$out7,v28
878 lvx v24,$x00,$key_ # re-pre-load round[1]
880 vncipher $out0,$out0,v29
881 vncipher $out1,$out1,v29
882 vncipher $out2,$out2,v29
883 vncipher $out3,$out3,v29
884 vncipher $out4,$out4,v29
885 vncipher $out5,$out5,v29
886 vncipher $out6,$out6,v29
887 vncipher $out7,$out7,v29
888 lvx v25,$x10,$key_ # re-pre-load round[2]
890 vncipher $out0,$out0,v30
891 vxor $ivec,$ivec,v31 # xor with last round key
892 vncipher $out1,$out1,v30
894 vncipher $out2,$out2,v30
896 vncipher $out3,$out3,v30
898 vncipher $out4,$out4,v30
900 vncipher $out5,$out5,v30
902 vncipher $out6,$out6,v30
904 vncipher $out7,$out7,v30
907 vncipherlast $out0,$out0,$ivec
908 vncipherlast $out1,$out1,$in0
909 lvx_u $in0,$x00,$inp # load next input block
910 vncipherlast $out2,$out2,$in1
912 vncipherlast $out3,$out3,$in2
913 le?vperm $in0,$in0,$in0,$inpperm
915 vncipherlast $out4,$out4,$in3
916 le?vperm $in1,$in1,$in1,$inpperm
918 vncipherlast $out5,$out5,$in4
919 le?vperm $in2,$in2,$in2,$inpperm
921 vncipherlast $out6,$out6,$in5
922 le?vperm $in3,$in3,$in3,$inpperm
924 vncipherlast $out7,$out7,$in6
925 le?vperm $in4,$in4,$in4,$inpperm
928 le?vperm $in5,$in5,$in5,$inpperm
932 le?vperm $out0,$out0,$out0,$inpperm
933 le?vperm $out1,$out1,$out1,$inpperm
934 stvx_u $out0,$x00,$out
935 le?vperm $in6,$in6,$in6,$inpperm
936 vxor $out0,$in0,$rndkey0
937 le?vperm $out2,$out2,$out2,$inpperm
938 stvx_u $out1,$x10,$out
939 le?vperm $in7,$in7,$in7,$inpperm
940 vxor $out1,$in1,$rndkey0
941 le?vperm $out3,$out3,$out3,$inpperm
942 stvx_u $out2,$x20,$out
943 vxor $out2,$in2,$rndkey0
944 le?vperm $out4,$out4,$out4,$inpperm
945 stvx_u $out3,$x30,$out
946 vxor $out3,$in3,$rndkey0
947 le?vperm $out5,$out5,$out5,$inpperm
948 stvx_u $out4,$x40,$out
949 vxor $out4,$in4,$rndkey0
950 le?vperm $out6,$out6,$out6,$inpperm
951 stvx_u $out5,$x50,$out
952 vxor $out5,$in5,$rndkey0
953 le?vperm $out7,$out7,$out7,$inpperm
954 stvx_u $out6,$x60,$out
955 vxor $out6,$in6,$rndkey0
956 stvx_u $out7,$x70,$out
958 vxor $out7,$in7,$rndkey0
961 beq Loop_cbc_dec8x # did $len-=128 borrow?
968 Loop_cbc_dec8x_tail: # up to 7 "words" tail...
969 vncipher $out1,$out1,v24
970 vncipher $out2,$out2,v24
971 vncipher $out3,$out3,v24
972 vncipher $out4,$out4,v24
973 vncipher $out5,$out5,v24
974 vncipher $out6,$out6,v24
975 vncipher $out7,$out7,v24
976 lvx v24,$x20,$key_ # round[3]
977 addi $key_,$key_,0x20
979 vncipher $out1,$out1,v25
980 vncipher $out2,$out2,v25
981 vncipher $out3,$out3,v25
982 vncipher $out4,$out4,v25
983 vncipher $out5,$out5,v25
984 vncipher $out6,$out6,v25
985 vncipher $out7,$out7,v25
986 lvx v25,$x10,$key_ # round[4]
987 bdnz Loop_cbc_dec8x_tail
989 vncipher $out1,$out1,v24
990 vncipher $out2,$out2,v24
991 vncipher $out3,$out3,v24
992 vncipher $out4,$out4,v24
993 vncipher $out5,$out5,v24
994 vncipher $out6,$out6,v24
995 vncipher $out7,$out7,v24
997 vncipher $out1,$out1,v25
998 vncipher $out2,$out2,v25
999 vncipher $out3,$out3,v25
1000 vncipher $out4,$out4,v25
1001 vncipher $out5,$out5,v25
1002 vncipher $out6,$out6,v25
1003 vncipher $out7,$out7,v25
1005 vncipher $out1,$out1,v26
1006 vncipher $out2,$out2,v26
1007 vncipher $out3,$out3,v26
1008 vncipher $out4,$out4,v26
1009 vncipher $out5,$out5,v26
1010 vncipher $out6,$out6,v26
1011 vncipher $out7,$out7,v26
1013 vncipher $out1,$out1,v27
1014 vncipher $out2,$out2,v27
1015 vncipher $out3,$out3,v27
1016 vncipher $out4,$out4,v27
1017 vncipher $out5,$out5,v27
1018 vncipher $out6,$out6,v27
1019 vncipher $out7,$out7,v27
1021 vncipher $out1,$out1,v28
1022 vncipher $out2,$out2,v28
1023 vncipher $out3,$out3,v28
1024 vncipher $out4,$out4,v28
1025 vncipher $out5,$out5,v28
1026 vncipher $out6,$out6,v28
1027 vncipher $out7,$out7,v28
1029 vncipher $out1,$out1,v29
1030 vncipher $out2,$out2,v29
1031 vncipher $out3,$out3,v29
1032 vncipher $out4,$out4,v29
1033 vncipher $out5,$out5,v29
1034 vncipher $out6,$out6,v29
1035 vncipher $out7,$out7,v29
1037 vncipher $out1,$out1,v30
1038 vxor $ivec,$ivec,v31 # last round key
1039 vncipher $out2,$out2,v30
1041 vncipher $out3,$out3,v30
1043 vncipher $out4,$out4,v30
1045 vncipher $out5,$out5,v30
1047 vncipher $out6,$out6,v30
1049 vncipher $out7,$out7,v30
1052 cmplwi $len,32 # switch($len)
1057 blt Lcbc_dec8x_three
1066 vncipherlast $out1,$out1,$ivec
1067 vncipherlast $out2,$out2,$in1
1068 vncipherlast $out3,$out3,$in2
1069 vncipherlast $out4,$out4,$in3
1070 vncipherlast $out5,$out5,$in4
1071 vncipherlast $out6,$out6,$in5
1072 vncipherlast $out7,$out7,$in6
1075 le?vperm $out1,$out1,$out1,$inpperm
1076 le?vperm $out2,$out2,$out2,$inpperm
1077 stvx_u $out1,$x00,$out
1078 le?vperm $out3,$out3,$out3,$inpperm
1079 stvx_u $out2,$x10,$out
1080 le?vperm $out4,$out4,$out4,$inpperm
1081 stvx_u $out3,$x20,$out
1082 le?vperm $out5,$out5,$out5,$inpperm
1083 stvx_u $out4,$x30,$out
1084 le?vperm $out6,$out6,$out6,$inpperm
1085 stvx_u $out5,$x40,$out
1086 le?vperm $out7,$out7,$out7,$inpperm
1087 stvx_u $out6,$x50,$out
1088 stvx_u $out7,$x60,$out
1094 vncipherlast $out2,$out2,$ivec
1095 vncipherlast $out3,$out3,$in2
1096 vncipherlast $out4,$out4,$in3
1097 vncipherlast $out5,$out5,$in4
1098 vncipherlast $out6,$out6,$in5
1099 vncipherlast $out7,$out7,$in6
1102 le?vperm $out2,$out2,$out2,$inpperm
1103 le?vperm $out3,$out3,$out3,$inpperm
1104 stvx_u $out2,$x00,$out
1105 le?vperm $out4,$out4,$out4,$inpperm
1106 stvx_u $out3,$x10,$out
1107 le?vperm $out5,$out5,$out5,$inpperm
1108 stvx_u $out4,$x20,$out
1109 le?vperm $out6,$out6,$out6,$inpperm
1110 stvx_u $out5,$x30,$out
1111 le?vperm $out7,$out7,$out7,$inpperm
1112 stvx_u $out6,$x40,$out
1113 stvx_u $out7,$x50,$out
1119 vncipherlast $out3,$out3,$ivec
1120 vncipherlast $out4,$out4,$in3
1121 vncipherlast $out5,$out5,$in4
1122 vncipherlast $out6,$out6,$in5
1123 vncipherlast $out7,$out7,$in6
1126 le?vperm $out3,$out3,$out3,$inpperm
1127 le?vperm $out4,$out4,$out4,$inpperm
1128 stvx_u $out3,$x00,$out
1129 le?vperm $out5,$out5,$out5,$inpperm
1130 stvx_u $out4,$x10,$out
1131 le?vperm $out6,$out6,$out6,$inpperm
1132 stvx_u $out5,$x20,$out
1133 le?vperm $out7,$out7,$out7,$inpperm
1134 stvx_u $out6,$x30,$out
1135 stvx_u $out7,$x40,$out
1141 vncipherlast $out4,$out4,$ivec
1142 vncipherlast $out5,$out5,$in4
1143 vncipherlast $out6,$out6,$in5
1144 vncipherlast $out7,$out7,$in6
1147 le?vperm $out4,$out4,$out4,$inpperm
1148 le?vperm $out5,$out5,$out5,$inpperm
1149 stvx_u $out4,$x00,$out
1150 le?vperm $out6,$out6,$out6,$inpperm
1151 stvx_u $out5,$x10,$out
1152 le?vperm $out7,$out7,$out7,$inpperm
1153 stvx_u $out6,$x20,$out
1154 stvx_u $out7,$x30,$out
1160 vncipherlast $out5,$out5,$ivec
1161 vncipherlast $out6,$out6,$in5
1162 vncipherlast $out7,$out7,$in6
1165 le?vperm $out5,$out5,$out5,$inpperm
1166 le?vperm $out6,$out6,$out6,$inpperm
1167 stvx_u $out5,$x00,$out
1168 le?vperm $out7,$out7,$out7,$inpperm
1169 stvx_u $out6,$x10,$out
1170 stvx_u $out7,$x20,$out
1176 vncipherlast $out6,$out6,$ivec
1177 vncipherlast $out7,$out7,$in6
1180 le?vperm $out6,$out6,$out6,$inpperm
1181 le?vperm $out7,$out7,$out7,$inpperm
1182 stvx_u $out6,$x00,$out
1183 stvx_u $out7,$x10,$out
1189 vncipherlast $out7,$out7,$ivec
1192 le?vperm $out7,$out7,$out7,$inpperm
1197 le?vperm $ivec,$ivec,$ivec,$inpperm
1198 stvx_u $ivec,0,$ivp # write [unaligned] iv
1202 stvx $inpperm,r10,$sp # wipe copies of round keys
1204 stvx $inpperm,r11,$sp
1206 stvx $inpperm,r10,$sp
1208 stvx $inpperm,r11,$sp
1210 stvx $inpperm,r10,$sp
1212 stvx $inpperm,r11,$sp
1214 stvx $inpperm,r10,$sp
1216 stvx $inpperm,r11,$sp
1220 lvx v20,r10,$sp # ABI says so
1242 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1243 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1244 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1245 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1246 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1247 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1248 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1251 .byte 0,12,0x04,0,0x80,6,6,0
1253 .size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1257 #########################################################################
1258 {{{ # CTR procedure[s] #
1259 my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1260 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
1261 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1266 .globl .${prefix}_ctr32_encrypt_blocks
1268 .${prefix}_ctr32_encrypt_blocks:
1277 vxor $rndkey0,$rndkey0,$rndkey0
1278 le?vspltisb $tmp,0x0f
1280 lvx $ivec,0,$ivp # load [unaligned] iv
1281 lvsl $inpperm,0,$ivp
1282 lvx $inptail,$idx,$ivp
1284 le?vxor $inpperm,$inpperm,$tmp
1285 vperm $ivec,$ivec,$inptail,$inpperm
1286 vsldoi $one,$rndkey0,$one,1
1289 ?lvsl $keyperm,0,$key # prepare for unaligned key
1290 lwz $rounds,240($key)
1292 lvsr $inpperm,0,r11 # prepare for unaligned load
1294 addi $inp,$inp,15 # 15 is not typo
1295 le?vxor $inpperm,$inpperm,$tmp
1297 srwi $rounds,$rounds,1
1299 subi $rounds,$rounds,1
1302 bge _aesp8_ctr32_encrypt8x
1304 ?lvsr $outperm,0,$out # prepare for unaligned store
1305 vspltisb $outmask,-1
1307 ?vperm $outmask,$rndkey0,$outmask,$outperm
1308 le?vxor $outperm,$outperm,$tmp
1312 lvx $rndkey1,$idx,$key
1314 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1315 vxor $inout,$ivec,$rndkey0
1316 lvx $rndkey0,$idx,$key
1322 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1323 vcipher $inout,$inout,$rndkey1
1324 lvx $rndkey1,$idx,$key
1326 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1327 vcipher $inout,$inout,$rndkey0
1328 lvx $rndkey0,$idx,$key
1332 vadduwm $ivec,$ivec,$one
1336 subic. $len,$len,1 # blocks--
1338 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1339 vcipher $inout,$inout,$rndkey1
1340 lvx $rndkey1,$idx,$key
1341 vperm $dat,$dat,$inptail,$inpperm
1343 ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
1345 vxor $dat,$dat,$rndkey1 # last round key
1346 vcipherlast $inout,$inout,$dat
1348 lvx $rndkey1,$idx,$key
1350 vperm $inout,$inout,$inout,$outperm
1351 vsel $dat,$outhead,$inout,$outmask
1353 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1355 vxor $inout,$ivec,$rndkey0
1356 lvx $rndkey0,$idx,$key
1363 lvx $inout,0,$out # redundant in aligned case
1364 vsel $inout,$outhead,$inout,$outmask
1370 .byte 0,12,0x14,0,0,0,6,0
1373 #########################################################################
1374 {{ # Optimized CTR procedure #
1376 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1377 $x00=0 if ($flavour =~ /osx/);
1378 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1379 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1380 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
1381 # v26-v31 last 6 round keys
1382 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1383 my ($two,$three,$four)=($outhead,$outperm,$outmask);
1387 _aesp8_ctr32_encrypt8x:
1388 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1389 li r10,`$FRAME+8*16+15`
1390 li r11,`$FRAME+8*16+31`
1391 stvx v20,r10,$sp # ABI says so
1414 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
1416 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1418 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1420 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1422 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1424 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1426 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1430 subi $rounds,$rounds,3 # -4 in total
1432 lvx $rndkey0,$x00,$key # load key schedule
1436 ?vperm $rndkey0,$rndkey0,v30,$keyperm
1437 addi $key_,$sp,$FRAME+15
1441 ?vperm v24,v30,v31,$keyperm
1444 stvx v24,$x00,$key_ # off-load round[1]
1445 ?vperm v25,v31,v30,$keyperm
1447 stvx v25,$x10,$key_ # off-load round[2]
1448 addi $key_,$key_,0x20
1449 bdnz Load_ctr32_enc_key
1452 ?vperm v24,v30,v31,$keyperm
1454 stvx v24,$x00,$key_ # off-load round[3]
1455 ?vperm v25,v31,v26,$keyperm
1457 stvx v25,$x10,$key_ # off-load round[4]
1458 addi $key_,$sp,$FRAME+15 # rewind $key_
1459 ?vperm v26,v26,v27,$keyperm
1461 ?vperm v27,v27,v28,$keyperm
1463 ?vperm v28,v28,v29,$keyperm
1465 ?vperm v29,v29,v30,$keyperm
1466 lvx $out0,$x70,$key # borrow $out0
1467 ?vperm v30,v30,v31,$keyperm
1468 lvx v24,$x00,$key_ # pre-load round[1]
1469 ?vperm v31,v31,$out0,$keyperm
1470 lvx v25,$x10,$key_ # pre-load round[2]
1472 vadduwm $two,$one,$one
1473 subi $inp,$inp,15 # undo "caller"
1476 vadduwm $out1,$ivec,$one # counter values ...
1477 vadduwm $out2,$ivec,$two
1478 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1480 vadduwm $out3,$out1,$two
1481 vxor $out1,$out1,$rndkey0
1482 le?lvsl $inpperm,0,$idx
1483 vadduwm $out4,$out2,$two
1484 vxor $out2,$out2,$rndkey0
1485 le?vspltisb $tmp,0x0f
1486 vadduwm $out5,$out3,$two
1487 vxor $out3,$out3,$rndkey0
1488 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
1489 vadduwm $out6,$out4,$two
1490 vxor $out4,$out4,$rndkey0
1491 vadduwm $out7,$out5,$two
1492 vxor $out5,$out5,$rndkey0
1493 vadduwm $ivec,$out6,$two # next counter value
1494 vxor $out6,$out6,$rndkey0
1495 vxor $out7,$out7,$rndkey0
1501 vcipher $out0,$out0,v24
1502 vcipher $out1,$out1,v24
1503 vcipher $out2,$out2,v24
1504 vcipher $out3,$out3,v24
1505 vcipher $out4,$out4,v24
1506 vcipher $out5,$out5,v24
1507 vcipher $out6,$out6,v24
1508 vcipher $out7,$out7,v24
1509 Loop_ctr32_enc8x_middle:
1510 lvx v24,$x20,$key_ # round[3]
1511 addi $key_,$key_,0x20
1513 vcipher $out0,$out0,v25
1514 vcipher $out1,$out1,v25
1515 vcipher $out2,$out2,v25
1516 vcipher $out3,$out3,v25
1517 vcipher $out4,$out4,v25
1518 vcipher $out5,$out5,v25
1519 vcipher $out6,$out6,v25
1520 vcipher $out7,$out7,v25
1521 lvx v25,$x10,$key_ # round[4]
1522 bdnz Loop_ctr32_enc8x
1524 subic r11,$len,256 # $len-256, borrow $key_
1525 vcipher $out0,$out0,v24
1526 vcipher $out1,$out1,v24
1527 vcipher $out2,$out2,v24
1528 vcipher $out3,$out3,v24
1529 vcipher $out4,$out4,v24
1530 vcipher $out5,$out5,v24
1531 vcipher $out6,$out6,v24
1532 vcipher $out7,$out7,v24
1534 subfe r0,r0,r0 # borrow?-1:0
1535 vcipher $out0,$out0,v25
1536 vcipher $out1,$out1,v25
1537 vcipher $out2,$out2,v25
1538 vcipher $out3,$out3,v25
1539 vcipher $out4,$out4,v25
1540 vcipher $out5,$out5,v25
1541 vcipher $out6,$out6,v25
1542 vcipher $out7,$out7,v25
1545 addi $key_,$sp,$FRAME+15 # rewind $key_
1546 vcipher $out0,$out0,v26
1547 vcipher $out1,$out1,v26
1548 vcipher $out2,$out2,v26
1549 vcipher $out3,$out3,v26
1550 vcipher $out4,$out4,v26
1551 vcipher $out5,$out5,v26
1552 vcipher $out6,$out6,v26
1553 vcipher $out7,$out7,v26
1554 lvx v24,$x00,$key_ # re-pre-load round[1]
1556 subic $len,$len,129 # $len-=129
1557 vcipher $out0,$out0,v27
1558 addi $len,$len,1 # $len-=128 really
1559 vcipher $out1,$out1,v27
1560 vcipher $out2,$out2,v27
1561 vcipher $out3,$out3,v27
1562 vcipher $out4,$out4,v27
1563 vcipher $out5,$out5,v27
1564 vcipher $out6,$out6,v27
1565 vcipher $out7,$out7,v27
1566 lvx v25,$x10,$key_ # re-pre-load round[2]
1568 vcipher $out0,$out0,v28
1569 lvx_u $in0,$x00,$inp # load input
1570 vcipher $out1,$out1,v28
1571 lvx_u $in1,$x10,$inp
1572 vcipher $out2,$out2,v28
1573 lvx_u $in2,$x20,$inp
1574 vcipher $out3,$out3,v28
1575 lvx_u $in3,$x30,$inp
1576 vcipher $out4,$out4,v28
1577 lvx_u $in4,$x40,$inp
1578 vcipher $out5,$out5,v28
1579 lvx_u $in5,$x50,$inp
1580 vcipher $out6,$out6,v28
1581 lvx_u $in6,$x60,$inp
1582 vcipher $out7,$out7,v28
1583 lvx_u $in7,$x70,$inp
1586 vcipher $out0,$out0,v29
1587 le?vperm $in0,$in0,$in0,$inpperm
1588 vcipher $out1,$out1,v29
1589 le?vperm $in1,$in1,$in1,$inpperm
1590 vcipher $out2,$out2,v29
1591 le?vperm $in2,$in2,$in2,$inpperm
1592 vcipher $out3,$out3,v29
1593 le?vperm $in3,$in3,$in3,$inpperm
1594 vcipher $out4,$out4,v29
1595 le?vperm $in4,$in4,$in4,$inpperm
1596 vcipher $out5,$out5,v29
1597 le?vperm $in5,$in5,$in5,$inpperm
1598 vcipher $out6,$out6,v29
1599 le?vperm $in6,$in6,$in6,$inpperm
1600 vcipher $out7,$out7,v29
1601 le?vperm $in7,$in7,$in7,$inpperm
1603 add $inp,$inp,r0 # $inp is adjusted in such
1604 # way that at exit from the
1605 # loop inX-in7 are loaded
1607 subfe. r0,r0,r0 # borrow?-1:0
1608 vcipher $out0,$out0,v30
1609 vxor $in0,$in0,v31 # xor with last round key
1610 vcipher $out1,$out1,v30
1612 vcipher $out2,$out2,v30
1614 vcipher $out3,$out3,v30
1616 vcipher $out4,$out4,v30
1618 vcipher $out5,$out5,v30
1620 vcipher $out6,$out6,v30
1622 vcipher $out7,$out7,v30
1625 bne Lctr32_enc8x_break # did $len-129 borrow?
1627 vcipherlast $in0,$out0,$in0
1628 vcipherlast $in1,$out1,$in1
1629 vadduwm $out1,$ivec,$one # counter values ...
1630 vcipherlast $in2,$out2,$in2
1631 vadduwm $out2,$ivec,$two
1632 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1633 vcipherlast $in3,$out3,$in3
1634 vadduwm $out3,$out1,$two
1635 vxor $out1,$out1,$rndkey0
1636 vcipherlast $in4,$out4,$in4
1637 vadduwm $out4,$out2,$two
1638 vxor $out2,$out2,$rndkey0
1639 vcipherlast $in5,$out5,$in5
1640 vadduwm $out5,$out3,$two
1641 vxor $out3,$out3,$rndkey0
1642 vcipherlast $in6,$out6,$in6
1643 vadduwm $out6,$out4,$two
1644 vxor $out4,$out4,$rndkey0
1645 vcipherlast $in7,$out7,$in7
1646 vadduwm $out7,$out5,$two
1647 vxor $out5,$out5,$rndkey0
1648 le?vperm $in0,$in0,$in0,$inpperm
1649 vadduwm $ivec,$out6,$two # next counter value
1650 vxor $out6,$out6,$rndkey0
1651 le?vperm $in1,$in1,$in1,$inpperm
1652 vxor $out7,$out7,$rndkey0
1655 vcipher $out0,$out0,v24
1656 stvx_u $in0,$x00,$out
1657 le?vperm $in2,$in2,$in2,$inpperm
1658 vcipher $out1,$out1,v24
1659 stvx_u $in1,$x10,$out
1660 le?vperm $in3,$in3,$in3,$inpperm
1661 vcipher $out2,$out2,v24
1662 stvx_u $in2,$x20,$out
1663 le?vperm $in4,$in4,$in4,$inpperm
1664 vcipher $out3,$out3,v24
1665 stvx_u $in3,$x30,$out
1666 le?vperm $in5,$in5,$in5,$inpperm
1667 vcipher $out4,$out4,v24
1668 stvx_u $in4,$x40,$out
1669 le?vperm $in6,$in6,$in6,$inpperm
1670 vcipher $out5,$out5,v24
1671 stvx_u $in5,$x50,$out
1672 le?vperm $in7,$in7,$in7,$inpperm
1673 vcipher $out6,$out6,v24
1674 stvx_u $in6,$x60,$out
1675 vcipher $out7,$out7,v24
1676 stvx_u $in7,$x70,$out
1679 b Loop_ctr32_enc8x_middle
1684 blt Lctr32_enc8x_one
1686 beq Lctr32_enc8x_two
1688 blt Lctr32_enc8x_three
1690 beq Lctr32_enc8x_four
1692 blt Lctr32_enc8x_five
1694 beq Lctr32_enc8x_six
1696 blt Lctr32_enc8x_seven
1699 vcipherlast $out0,$out0,$in0
1700 vcipherlast $out1,$out1,$in1
1701 vcipherlast $out2,$out2,$in2
1702 vcipherlast $out3,$out3,$in3
1703 vcipherlast $out4,$out4,$in4
1704 vcipherlast $out5,$out5,$in5
1705 vcipherlast $out6,$out6,$in6
1706 vcipherlast $out7,$out7,$in7
1708 le?vperm $out0,$out0,$out0,$inpperm
1709 le?vperm $out1,$out1,$out1,$inpperm
1710 stvx_u $out0,$x00,$out
1711 le?vperm $out2,$out2,$out2,$inpperm
1712 stvx_u $out1,$x10,$out
1713 le?vperm $out3,$out3,$out3,$inpperm
1714 stvx_u $out2,$x20,$out
1715 le?vperm $out4,$out4,$out4,$inpperm
1716 stvx_u $out3,$x30,$out
1717 le?vperm $out5,$out5,$out5,$inpperm
1718 stvx_u $out4,$x40,$out
1719 le?vperm $out6,$out6,$out6,$inpperm
1720 stvx_u $out5,$x50,$out
1721 le?vperm $out7,$out7,$out7,$inpperm
1722 stvx_u $out6,$x60,$out
1723 stvx_u $out7,$x70,$out
1729 vcipherlast $out0,$out0,$in1
1730 vcipherlast $out1,$out1,$in2
1731 vcipherlast $out2,$out2,$in3
1732 vcipherlast $out3,$out3,$in4
1733 vcipherlast $out4,$out4,$in5
1734 vcipherlast $out5,$out5,$in6
1735 vcipherlast $out6,$out6,$in7
1737 le?vperm $out0,$out0,$out0,$inpperm
1738 le?vperm $out1,$out1,$out1,$inpperm
1739 stvx_u $out0,$x00,$out
1740 le?vperm $out2,$out2,$out2,$inpperm
1741 stvx_u $out1,$x10,$out
1742 le?vperm $out3,$out3,$out3,$inpperm
1743 stvx_u $out2,$x20,$out
1744 le?vperm $out4,$out4,$out4,$inpperm
1745 stvx_u $out3,$x30,$out
1746 le?vperm $out5,$out5,$out5,$inpperm
1747 stvx_u $out4,$x40,$out
1748 le?vperm $out6,$out6,$out6,$inpperm
1749 stvx_u $out5,$x50,$out
1750 stvx_u $out6,$x60,$out
1756 vcipherlast $out0,$out0,$in2
1757 vcipherlast $out1,$out1,$in3
1758 vcipherlast $out2,$out2,$in4
1759 vcipherlast $out3,$out3,$in5
1760 vcipherlast $out4,$out4,$in6
1761 vcipherlast $out5,$out5,$in7
1763 le?vperm $out0,$out0,$out0,$inpperm
1764 le?vperm $out1,$out1,$out1,$inpperm
1765 stvx_u $out0,$x00,$out
1766 le?vperm $out2,$out2,$out2,$inpperm
1767 stvx_u $out1,$x10,$out
1768 le?vperm $out3,$out3,$out3,$inpperm
1769 stvx_u $out2,$x20,$out
1770 le?vperm $out4,$out4,$out4,$inpperm
1771 stvx_u $out3,$x30,$out
1772 le?vperm $out5,$out5,$out5,$inpperm
1773 stvx_u $out4,$x40,$out
1774 stvx_u $out5,$x50,$out
1780 vcipherlast $out0,$out0,$in3
1781 vcipherlast $out1,$out1,$in4
1782 vcipherlast $out2,$out2,$in5
1783 vcipherlast $out3,$out3,$in6
1784 vcipherlast $out4,$out4,$in7
1786 le?vperm $out0,$out0,$out0,$inpperm
1787 le?vperm $out1,$out1,$out1,$inpperm
1788 stvx_u $out0,$x00,$out
1789 le?vperm $out2,$out2,$out2,$inpperm
1790 stvx_u $out1,$x10,$out
1791 le?vperm $out3,$out3,$out3,$inpperm
1792 stvx_u $out2,$x20,$out
1793 le?vperm $out4,$out4,$out4,$inpperm
1794 stvx_u $out3,$x30,$out
1795 stvx_u $out4,$x40,$out
1801 vcipherlast $out0,$out0,$in4
1802 vcipherlast $out1,$out1,$in5
1803 vcipherlast $out2,$out2,$in6
1804 vcipherlast $out3,$out3,$in7
1806 le?vperm $out0,$out0,$out0,$inpperm
1807 le?vperm $out1,$out1,$out1,$inpperm
1808 stvx_u $out0,$x00,$out
1809 le?vperm $out2,$out2,$out2,$inpperm
1810 stvx_u $out1,$x10,$out
1811 le?vperm $out3,$out3,$out3,$inpperm
1812 stvx_u $out2,$x20,$out
1813 stvx_u $out3,$x30,$out
1819 vcipherlast $out0,$out0,$in5
1820 vcipherlast $out1,$out1,$in6
1821 vcipherlast $out2,$out2,$in7
1823 le?vperm $out0,$out0,$out0,$inpperm
1824 le?vperm $out1,$out1,$out1,$inpperm
1825 stvx_u $out0,$x00,$out
1826 le?vperm $out2,$out2,$out2,$inpperm
1827 stvx_u $out1,$x10,$out
1828 stvx_u $out2,$x20,$out
1834 vcipherlast $out0,$out0,$in6
1835 vcipherlast $out1,$out1,$in7
1837 le?vperm $out0,$out0,$out0,$inpperm
1838 le?vperm $out1,$out1,$out1,$inpperm
1839 stvx_u $out0,$x00,$out
1840 stvx_u $out1,$x10,$out
1846 vcipherlast $out0,$out0,$in7
1848 le?vperm $out0,$out0,$out0,$inpperm
1855 stvx $inpperm,r10,$sp # wipe copies of round keys
1857 stvx $inpperm,r11,$sp
1859 stvx $inpperm,r10,$sp
1861 stvx $inpperm,r11,$sp
1863 stvx $inpperm,r10,$sp
1865 stvx $inpperm,r11,$sp
1867 stvx $inpperm,r10,$sp
1869 stvx $inpperm,r11,$sp
1873 lvx v20,r10,$sp # ABI says so
1895 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1896 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1897 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1898 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1899 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1900 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1901 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1904 .byte 0,12,0x04,0,0x80,6,6,0
1906 .size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
1910 #########################################################################
1911 {{{ # XTS procedures #
1912 my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
1913 my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
1914 my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
1915 my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
1916 my $taillen = $key2;
1918 ($inp,$idx) = ($idx,$inp); # reassign
1921 .globl .${prefix}_xts_encrypt
1923 .${prefix}_xts_encrypt:
1924 mr $inp,r3 # reassign
1930 mfspr r12,256 # save vrsave
1934 vspltisb $seven,0x07 # 0x070707..07
1935 le?lvsl $leperm,r11,r11
1936 le?vspltisb $tmp,0x0f
1937 le?vxor $leperm,$leperm,$seven
1940 lvx $tweak,0,$ivp # load [unaligned] iv
1941 lvsl $inpperm,0,$ivp
1942 lvx $inptail,$idx,$ivp
1943 le?vxor $inpperm,$inpperm,$tmp
1944 vperm $tweak,$tweak,$inptail,$inpperm
1946 ?lvsl $keyperm,0,$key2 # prepare for unaligned key
1947 lwz $rounds,240($key2)
1948 srwi $rounds,$rounds,1
1949 subi $rounds,$rounds,1
1953 lvsr $inpperm,0,r11 # prepare for unaligned load
1955 addi $inp,$inp,15 # 15 is not typo
1956 le?vxor $inpperm,$inpperm,$tmp
1958 lvx $rndkey0,0,$key2
1959 lvx $rndkey1,$idx,$key2
1961 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1962 vxor $tweak,$tweak,$rndkey0
1963 lvx $rndkey0,$idx,$key2
1968 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1969 vcipher $tweak,$tweak,$rndkey1
1970 lvx $rndkey1,$idx,$key2
1972 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1973 vcipher $tweak,$tweak,$rndkey0
1974 lvx $rndkey0,$idx,$key2
1978 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1979 vcipher $tweak,$tweak,$rndkey1
1980 lvx $rndkey1,$idx,$key2
1982 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1983 vcipherlast $tweak,$tweak,$rndkey0
1988 ?lvsl $keyperm,0,$key1 # prepare for unaligned key
1989 lwz $rounds,240($key1)
1990 srwi $rounds,$rounds,1
1991 subi $rounds,$rounds,1
1994 vslb $eighty7,$seven,$seven # 0x808080..80
1995 vor $eighty7,$eighty7,$seven # 0x878787..87
1996 vspltisb $tmp,1 # 0x010101..01
1997 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2000 bge _aesp8_xts_encrypt6x
2002 andi. $taillen,$len,15
2004 subi $taillen,$taillen,16
2009 lvx $rndkey0,0,$key1
2010 lvx $rndkey1,$idx,$key1
2012 vperm $inout,$inout,$inptail,$inpperm
2013 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2014 vxor $inout,$inout,$tweak
2015 vxor $inout,$inout,$rndkey0
2016 lvx $rndkey0,$idx,$key1
2023 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2024 vcipher $inout,$inout,$rndkey1
2025 lvx $rndkey1,$idx,$key1
2027 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2028 vcipher $inout,$inout,$rndkey0
2029 lvx $rndkey0,$idx,$key1
2033 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2034 vcipher $inout,$inout,$rndkey1
2035 lvx $rndkey1,$idx,$key1
2037 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2038 vxor $rndkey0,$rndkey0,$tweak
2039 vcipherlast $output,$inout,$rndkey0
2041 le?vperm $tmp,$output,$output,$leperm
2043 le?stvx_u $tmp,0,$out
2044 be?stvx_u $output,0,$out
2053 lvx $rndkey0,0,$key1
2054 lvx $rndkey1,$idx,$key1
2062 vsrab $tmp,$tweak,$seven # next tweak value
2063 vaddubm $tweak,$tweak,$tweak
2064 vsldoi $tmp,$tmp,$tmp,15
2065 vand $tmp,$tmp,$eighty7
2066 vxor $tweak,$tweak,$tmp
2068 vperm $inout,$inout,$inptail,$inpperm
2069 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2070 vxor $inout,$inout,$tweak
2071 vxor $output,$output,$rndkey0 # just in case $len<16
2072 vxor $inout,$inout,$rndkey0
2073 lvx $rndkey0,$idx,$key1
2080 vxor $output,$output,$tweak
2081 lvsr $inpperm,0,$len # $inpperm is no longer needed
2082 vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2084 vperm $inptail,$inptail,$tmp,$inpperm
2085 vsel $inout,$inout,$output,$inptail
2094 bdnz Loop_xts_enc_steal
2097 b Loop_xts_enc # one more time...
2100 mtspr 256,r12 # restore vrsave
2104 .byte 0,12,0x04,0,0x80,6,6,0
2106 .size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
2108 .globl .${prefix}_xts_decrypt
2110 .${prefix}_xts_decrypt:
2111 mr $inp,r3 # reassign
2117 mfspr r12,256 # save vrsave
2126 vspltisb $seven,0x07 # 0x070707..07
2127 le?lvsl $leperm,r11,r11
2128 le?vspltisb $tmp,0x0f
2129 le?vxor $leperm,$leperm,$seven
2132 lvx $tweak,0,$ivp # load [unaligned] iv
2133 lvsl $inpperm,0,$ivp
2134 lvx $inptail,$idx,$ivp
2135 le?vxor $inpperm,$inpperm,$tmp
2136 vperm $tweak,$tweak,$inptail,$inpperm
2138 ?lvsl $keyperm,0,$key2 # prepare for unaligned key
2139 lwz $rounds,240($key2)
2140 srwi $rounds,$rounds,1
2141 subi $rounds,$rounds,1
2145 lvsr $inpperm,0,r11 # prepare for unaligned load
2147 addi $inp,$inp,15 # 15 is not typo
2148 le?vxor $inpperm,$inpperm,$tmp
2150 lvx $rndkey0,0,$key2
2151 lvx $rndkey1,$idx,$key2
2153 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2154 vxor $tweak,$tweak,$rndkey0
2155 lvx $rndkey0,$idx,$key2
2160 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2161 vcipher $tweak,$tweak,$rndkey1
2162 lvx $rndkey1,$idx,$key2
2164 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2165 vcipher $tweak,$tweak,$rndkey0
2166 lvx $rndkey0,$idx,$key2
2170 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2171 vcipher $tweak,$tweak,$rndkey1
2172 lvx $rndkey1,$idx,$key2
2174 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2175 vcipherlast $tweak,$tweak,$rndkey0
2180 ?lvsl $keyperm,0,$key1 # prepare for unaligned key
2181 lwz $rounds,240($key1)
2182 srwi $rounds,$rounds,1
2183 subi $rounds,$rounds,1
2186 vslb $eighty7,$seven,$seven # 0x808080..80
2187 vor $eighty7,$eighty7,$seven # 0x878787..87
2188 vspltisb $tmp,1 # 0x010101..01
2189 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2192 bge _aesp8_xts_decrypt6x
2194 lvx $rndkey0,0,$key1
2195 lvx $rndkey1,$idx,$key1
2197 vperm $inout,$inout,$inptail,$inpperm
2198 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2199 vxor $inout,$inout,$tweak
2200 vxor $inout,$inout,$rndkey0
2201 lvx $rndkey0,$idx,$key1
2211 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2212 vncipher $inout,$inout,$rndkey1
2213 lvx $rndkey1,$idx,$key1
2215 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2216 vncipher $inout,$inout,$rndkey0
2217 lvx $rndkey0,$idx,$key1
2221 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2222 vncipher $inout,$inout,$rndkey1
2223 lvx $rndkey1,$idx,$key1
2225 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2226 vxor $rndkey0,$rndkey0,$tweak
2227 vncipherlast $output,$inout,$rndkey0
2229 le?vperm $tmp,$output,$output,$leperm
2231 le?stvx_u $tmp,0,$out
2232 be?stvx_u $output,0,$out
2241 lvx $rndkey0,0,$key1
2242 lvx $rndkey1,$idx,$key1
2245 vsrab $tmp,$tweak,$seven # next tweak value
2246 vaddubm $tweak,$tweak,$tweak
2247 vsldoi $tmp,$tmp,$tmp,15
2248 vand $tmp,$tmp,$eighty7
2249 vxor $tweak,$tweak,$tmp
2251 vperm $inout,$inout,$inptail,$inpperm
2252 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2253 vxor $inout,$inout,$tweak
2254 vxor $inout,$inout,$rndkey0
2255 lvx $rndkey0,$idx,$key1
2263 vsrab $tmp,$tweak,$seven # next tweak value
2264 vaddubm $tweak1,$tweak,$tweak
2265 vsldoi $tmp,$tmp,$tmp,15
2266 vand $tmp,$tmp,$eighty7
2267 vxor $tweak1,$tweak1,$tmp
2272 vxor $inout,$inout,$tweak # :-(
2273 vxor $inout,$inout,$tweak1 # :-)
2276 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2277 vncipher $inout,$inout,$rndkey1
2278 lvx $rndkey1,$idx,$key1
2280 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2281 vncipher $inout,$inout,$rndkey0
2282 lvx $rndkey0,$idx,$key1
2284 bdnz Loop_xts_dec_short
2286 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2287 vncipher $inout,$inout,$rndkey1
2288 lvx $rndkey1,$idx,$key1
2290 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2291 vxor $rndkey0,$rndkey0,$tweak1
2292 vncipherlast $output,$inout,$rndkey0
2294 le?vperm $tmp,$output,$output,$leperm
2296 le?stvx_u $tmp,0,$out
2297 be?stvx_u $output,0,$out
2302 lvx $rndkey0,0,$key1
2303 lvx $rndkey1,$idx,$key1
2305 vperm $inout,$inout,$inptail,$inpperm
2306 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2308 lvsr $inpperm,0,$len # $inpperm is no longer needed
2309 vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2311 vperm $inptail,$inptail,$tmp,$inpperm
2312 vsel $inout,$inout,$output,$inptail
2314 vxor $rndkey0,$rndkey0,$tweak
2315 vxor $inout,$inout,$rndkey0
2316 lvx $rndkey0,$idx,$key1
2325 bdnz Loop_xts_dec_steal
2328 b Loop_xts_dec # one more time...
2331 mtspr 256,r12 # restore vrsave
2335 .byte 0,12,0x04,0,0x80,6,6,0
2337 .size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
2339 #########################################################################
2340 {{ # Optimized XTS procedures #
2342 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
2343 $x00=0 if ($flavour =~ /osx/);
2344 my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
2345 my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
2346 my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
2347 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
2348 # v26-v31 last 6 round keys
2349 my ($keyperm)=($out0); # aliases with "caller", redundant assignment
2354 _aesp8_xts_encrypt6x:
2355 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
2357 li r7,`$FRAME+8*16+15`
2358 li r8,`$FRAME+8*16+31`
2359 $PUSH r0,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
2360 stvx v20,r7,$sp # ABI says so
2384 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
2386 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2388 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2390 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2392 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2394 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2396 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2400 subi $rounds,$rounds,3 # -4 in total
2402 lvx $rndkey0,$x00,$key1 # load key schedule
2404 addi $key1,$key1,0x20
2406 ?vperm $rndkey0,$rndkey0,v30,$keyperm
2407 addi $key_,$sp,$FRAME+15
2411 ?vperm v24,v30,v31,$keyperm
2413 addi $key1,$key1,0x20
2414 stvx v24,$x00,$key_ # off-load round[1]
2415 ?vperm v25,v31,v30,$keyperm
2417 stvx v25,$x10,$key_ # off-load round[2]
2418 addi $key_,$key_,0x20
2419 bdnz Load_xts_enc_key
2422 ?vperm v24,v30,v31,$keyperm
2424 stvx v24,$x00,$key_ # off-load round[3]
2425 ?vperm v25,v31,v26,$keyperm
2427 stvx v25,$x10,$key_ # off-load round[4]
2428 addi $key_,$sp,$FRAME+15 # rewind $key_
2429 ?vperm v26,v26,v27,$keyperm
2431 ?vperm v27,v27,v28,$keyperm
2433 ?vperm v28,v28,v29,$keyperm
2435 ?vperm v29,v29,v30,$keyperm
2436 lvx $twk5,$x70,$key1 # borrow $twk5
2437 ?vperm v30,v30,v31,$keyperm
2438 lvx v24,$x00,$key_ # pre-load round[1]
2439 ?vperm v31,v31,$twk5,$keyperm
2440 lvx v25,$x10,$key_ # pre-load round[2]
2442 vperm $in0,$inout,$inptail,$inpperm
2443 subi $inp,$inp,31 # undo "caller"
2444 vxor $twk0,$tweak,$rndkey0
2445 vsrab $tmp,$tweak,$seven # next tweak value
2446 vaddubm $tweak,$tweak,$tweak
2447 vsldoi $tmp,$tmp,$tmp,15
2448 vand $tmp,$tmp,$eighty7
2449 vxor $out0,$in0,$twk0
2450 vxor $tweak,$tweak,$tmp
2452 lvx_u $in1,$x10,$inp
2453 vxor $twk1,$tweak,$rndkey0
2454 vsrab $tmp,$tweak,$seven # next tweak value
2455 vaddubm $tweak,$tweak,$tweak
2456 vsldoi $tmp,$tmp,$tmp,15
2457 le?vperm $in1,$in1,$in1,$leperm
2458 vand $tmp,$tmp,$eighty7
2459 vxor $out1,$in1,$twk1
2460 vxor $tweak,$tweak,$tmp
2462 lvx_u $in2,$x20,$inp
2463 andi. $taillen,$len,15
2464 vxor $twk2,$tweak,$rndkey0
2465 vsrab $tmp,$tweak,$seven # next tweak value
2466 vaddubm $tweak,$tweak,$tweak
2467 vsldoi $tmp,$tmp,$tmp,15
2468 le?vperm $in2,$in2,$in2,$leperm
2469 vand $tmp,$tmp,$eighty7
2470 vxor $out2,$in2,$twk2
2471 vxor $tweak,$tweak,$tmp
2473 lvx_u $in3,$x30,$inp
2474 sub $len,$len,$taillen
2475 vxor $twk3,$tweak,$rndkey0
2476 vsrab $tmp,$tweak,$seven # next tweak value
2477 vaddubm $tweak,$tweak,$tweak
2478 vsldoi $tmp,$tmp,$tmp,15
2479 le?vperm $in3,$in3,$in3,$leperm
2480 vand $tmp,$tmp,$eighty7
2481 vxor $out3,$in3,$twk3
2482 vxor $tweak,$tweak,$tmp
2484 lvx_u $in4,$x40,$inp
2486 vxor $twk4,$tweak,$rndkey0
2487 vsrab $tmp,$tweak,$seven # next tweak value
2488 vaddubm $tweak,$tweak,$tweak
2489 vsldoi $tmp,$tmp,$tmp,15
2490 le?vperm $in4,$in4,$in4,$leperm
2491 vand $tmp,$tmp,$eighty7
2492 vxor $out4,$in4,$twk4
2493 vxor $tweak,$tweak,$tmp
2495 lvx_u $in5,$x50,$inp
2497 vxor $twk5,$tweak,$rndkey0
2498 vsrab $tmp,$tweak,$seven # next tweak value
2499 vaddubm $tweak,$tweak,$tweak
2500 vsldoi $tmp,$tmp,$tmp,15
2501 le?vperm $in5,$in5,$in5,$leperm
2502 vand $tmp,$tmp,$eighty7
2503 vxor $out5,$in5,$twk5
2504 vxor $tweak,$tweak,$tmp
2506 vxor v31,v31,$rndkey0
2512 vcipher $out0,$out0,v24
2513 vcipher $out1,$out1,v24
2514 vcipher $out2,$out2,v24
2515 vcipher $out3,$out3,v24
2516 vcipher $out4,$out4,v24
2517 vcipher $out5,$out5,v24
2518 lvx v24,$x20,$key_ # round[3]
2519 addi $key_,$key_,0x20
2521 vcipher $out0,$out0,v25
2522 vcipher $out1,$out1,v25
2523 vcipher $out2,$out2,v25
2524 vcipher $out3,$out3,v25
2525 vcipher $out4,$out4,v25
2526 vcipher $out5,$out5,v25
2527 lvx v25,$x10,$key_ # round[4]
2530 subic $len,$len,96 # $len-=96
2531 vxor $in0,$twk0,v31 # xor with last round key
2532 vcipher $out0,$out0,v24
2533 vcipher $out1,$out1,v24
2534 vsrab $tmp,$tweak,$seven # next tweak value
2535 vxor $twk0,$tweak,$rndkey0
2536 vaddubm $tweak,$tweak,$tweak
2537 vcipher $out2,$out2,v24
2538 vcipher $out3,$out3,v24
2539 vsldoi $tmp,$tmp,$tmp,15
2540 vcipher $out4,$out4,v24
2541 vcipher $out5,$out5,v24
2543 subfe. r0,r0,r0 # borrow?-1:0
2544 vand $tmp,$tmp,$eighty7
2545 vcipher $out0,$out0,v25
2546 vcipher $out1,$out1,v25
2547 vxor $tweak,$tweak,$tmp
2548 vcipher $out2,$out2,v25
2549 vcipher $out3,$out3,v25
2551 vsrab $tmp,$tweak,$seven # next tweak value
2552 vxor $twk1,$tweak,$rndkey0
2553 vcipher $out4,$out4,v25
2554 vcipher $out5,$out5,v25
2557 vaddubm $tweak,$tweak,$tweak
2558 vsldoi $tmp,$tmp,$tmp,15
2559 vcipher $out0,$out0,v26
2560 vcipher $out1,$out1,v26
2561 vand $tmp,$tmp,$eighty7
2562 vcipher $out2,$out2,v26
2563 vcipher $out3,$out3,v26
2564 vxor $tweak,$tweak,$tmp
2565 vcipher $out4,$out4,v26
2566 vcipher $out5,$out5,v26
2568 add $inp,$inp,r0 # $inp is adjusted in such
2569 # way that at exit from the
2570 # loop inX-in5 are loaded
2573 vsrab $tmp,$tweak,$seven # next tweak value
2574 vxor $twk2,$tweak,$rndkey0
2575 vaddubm $tweak,$tweak,$tweak
2576 vcipher $out0,$out0,v27
2577 vcipher $out1,$out1,v27
2578 vsldoi $tmp,$tmp,$tmp,15
2579 vcipher $out2,$out2,v27
2580 vcipher $out3,$out3,v27
2581 vand $tmp,$tmp,$eighty7
2582 vcipher $out4,$out4,v27
2583 vcipher $out5,$out5,v27
2585 addi $key_,$sp,$FRAME+15 # rewind $key_
2586 vxor $tweak,$tweak,$tmp
2587 vcipher $out0,$out0,v28
2588 vcipher $out1,$out1,v28
2590 vsrab $tmp,$tweak,$seven # next tweak value
2591 vxor $twk3,$tweak,$rndkey0
2592 vcipher $out2,$out2,v28
2593 vcipher $out3,$out3,v28
2594 vaddubm $tweak,$tweak,$tweak
2595 vsldoi $tmp,$tmp,$tmp,15
2596 vcipher $out4,$out4,v28
2597 vcipher $out5,$out5,v28
2598 lvx v24,$x00,$key_ # re-pre-load round[1]
2599 vand $tmp,$tmp,$eighty7
2601 vcipher $out0,$out0,v29
2602 vcipher $out1,$out1,v29
2603 vxor $tweak,$tweak,$tmp
2604 vcipher $out2,$out2,v29
2605 vcipher $out3,$out3,v29
2607 vsrab $tmp,$tweak,$seven # next tweak value
2608 vxor $twk4,$tweak,$rndkey0
2609 vcipher $out4,$out4,v29
2610 vcipher $out5,$out5,v29
2611 lvx v25,$x10,$key_ # re-pre-load round[2]
2612 vaddubm $tweak,$tweak,$tweak
2613 vsldoi $tmp,$tmp,$tmp,15
2615 vcipher $out0,$out0,v30
2616 vcipher $out1,$out1,v30
2617 vand $tmp,$tmp,$eighty7
2618 vcipher $out2,$out2,v30
2619 vcipher $out3,$out3,v30
2620 vxor $tweak,$tweak,$tmp
2621 vcipher $out4,$out4,v30
2622 vcipher $out5,$out5,v30
2624 vsrab $tmp,$tweak,$seven # next tweak value
2625 vxor $twk5,$tweak,$rndkey0
2627 vcipherlast $out0,$out0,$in0
2628 lvx_u $in0,$x00,$inp # load next input block
2629 vaddubm $tweak,$tweak,$tweak
2630 vsldoi $tmp,$tmp,$tmp,15
2631 vcipherlast $out1,$out1,$in1
2632 lvx_u $in1,$x10,$inp
2633 vcipherlast $out2,$out2,$in2
2634 le?vperm $in0,$in0,$in0,$leperm
2635 lvx_u $in2,$x20,$inp
2636 vand $tmp,$tmp,$eighty7
2637 vcipherlast $out3,$out3,$in3
2638 le?vperm $in1,$in1,$in1,$leperm
2639 lvx_u $in3,$x30,$inp
2640 vcipherlast $out4,$out4,$in4
2641 le?vperm $in2,$in2,$in2,$leperm
2642 lvx_u $in4,$x40,$inp
2643 vxor $tweak,$tweak,$tmp
2644 vcipherlast $tmp,$out5,$in5 # last block might be needed
2646 le?vperm $in3,$in3,$in3,$leperm
2647 lvx_u $in5,$x50,$inp
2649 le?vperm $in4,$in4,$in4,$leperm
2650 le?vperm $in5,$in5,$in5,$leperm
2652 le?vperm $out0,$out0,$out0,$leperm
2653 le?vperm $out1,$out1,$out1,$leperm
2654 stvx_u $out0,$x00,$out # store output
2655 vxor $out0,$in0,$twk0
2656 le?vperm $out2,$out2,$out2,$leperm
2657 stvx_u $out1,$x10,$out
2658 vxor $out1,$in1,$twk1
2659 le?vperm $out3,$out3,$out3,$leperm
2660 stvx_u $out2,$x20,$out
2661 vxor $out2,$in2,$twk2
2662 le?vperm $out4,$out4,$out4,$leperm
2663 stvx_u $out3,$x30,$out
2664 vxor $out3,$in3,$twk3
2665 le?vperm $out5,$tmp,$tmp,$leperm
2666 stvx_u $out4,$x40,$out
2667 vxor $out4,$in4,$twk4
2668 le?stvx_u $out5,$x50,$out
2669 be?stvx_u $tmp, $x50,$out
2670 vxor $out5,$in5,$twk5
2674 beq Loop_xts_enc6x # did $len-=96 borrow?
2676 addic. $len,$len,0x60
2683 blt Lxts_enc6x_three
2688 vxor $out0,$in1,$twk0
2689 vxor $out1,$in2,$twk1
2690 vxor $out2,$in3,$twk2
2691 vxor $out3,$in4,$twk3
2692 vxor $out4,$in5,$twk4
2696 le?vperm $out0,$out0,$out0,$leperm
2697 vmr $twk0,$twk5 # unused tweak
2698 le?vperm $out1,$out1,$out1,$leperm
2699 stvx_u $out0,$x00,$out # store output
2700 le?vperm $out2,$out2,$out2,$leperm
2701 stvx_u $out1,$x10,$out
2702 le?vperm $out3,$out3,$out3,$leperm
2703 stvx_u $out2,$x20,$out
2704 vxor $tmp,$out4,$twk5 # last block prep for stealing
2705 le?vperm $out4,$out4,$out4,$leperm
2706 stvx_u $out3,$x30,$out
2707 stvx_u $out4,$x40,$out
2709 bne Lxts_enc6x_steal
2714 vxor $out0,$in2,$twk0
2715 vxor $out1,$in3,$twk1
2716 vxor $out2,$in4,$twk2
2717 vxor $out3,$in5,$twk3
2718 vxor $out4,$out4,$out4
2722 le?vperm $out0,$out0,$out0,$leperm
2723 vmr $twk0,$twk4 # unused tweak
2724 le?vperm $out1,$out1,$out1,$leperm
2725 stvx_u $out0,$x00,$out # store output
2726 le?vperm $out2,$out2,$out2,$leperm
2727 stvx_u $out1,$x10,$out
2728 vxor $tmp,$out3,$twk4 # last block prep for stealing
2729 le?vperm $out3,$out3,$out3,$leperm
2730 stvx_u $out2,$x20,$out
2731 stvx_u $out3,$x30,$out
2733 bne Lxts_enc6x_steal
2738 vxor $out0,$in3,$twk0
2739 vxor $out1,$in4,$twk1
2740 vxor $out2,$in5,$twk2
2741 vxor $out3,$out3,$out3
2742 vxor $out4,$out4,$out4
2746 le?vperm $out0,$out0,$out0,$leperm
2747 vmr $twk0,$twk3 # unused tweak
2748 le?vperm $out1,$out1,$out1,$leperm
2749 stvx_u $out0,$x00,$out # store output
2750 vxor $tmp,$out2,$twk3 # last block prep for stealing
2751 le?vperm $out2,$out2,$out2,$leperm
2752 stvx_u $out1,$x10,$out
2753 stvx_u $out2,$x20,$out
2755 bne Lxts_enc6x_steal
2760 vxor $out0,$in4,$twk0
2761 vxor $out1,$in5,$twk1
2762 vxor $out2,$out2,$out2
2763 vxor $out3,$out3,$out3
2764 vxor $out4,$out4,$out4
2768 le?vperm $out0,$out0,$out0,$leperm
2769 vmr $twk0,$twk2 # unused tweak
2770 vxor $tmp,$out1,$twk2 # last block prep for stealing
2771 le?vperm $out1,$out1,$out1,$leperm
2772 stvx_u $out0,$x00,$out # store output
2773 stvx_u $out1,$x10,$out
2775 bne Lxts_enc6x_steal
2780 vxor $out0,$in5,$twk0
2783 vcipher $out0,$out0,v24
2784 lvx v24,$x20,$key_ # round[3]
2785 addi $key_,$key_,0x20
2787 vcipher $out0,$out0,v25
2788 lvx v25,$x10,$key_ # round[4]
2791 add $inp,$inp,$taillen
2793 vcipher $out0,$out0,v24
2796 vcipher $out0,$out0,v25
2798 lvsr $inpperm,0,$taillen
2799 vcipher $out0,$out0,v26
2802 vcipher $out0,$out0,v27
2804 addi $key_,$sp,$FRAME+15 # rewind $key_
2805 vcipher $out0,$out0,v28
2806 lvx v24,$x00,$key_ # re-pre-load round[1]
2808 vcipher $out0,$out0,v29
2809 lvx v25,$x10,$key_ # re-pre-load round[2]
2810 vxor $twk0,$twk0,v31
2812 le?vperm $in0,$in0,$in0,$leperm
2813 vcipher $out0,$out0,v30
2815 vperm $in0,$in0,$in0,$inpperm
2816 vcipherlast $out0,$out0,$twk0
2818 vmr $twk0,$twk1 # unused tweak
2819 vxor $tmp,$out0,$twk1 # last block prep for stealing
2820 le?vperm $out0,$out0,$out0,$leperm
2821 stvx_u $out0,$x00,$out # store output
2823 bne Lxts_enc6x_steal
2831 add $inp,$inp,$taillen
2834 lvsr $inpperm,0,$taillen # $in5 is no more
2835 le?vperm $in0,$in0,$in0,$leperm
2836 vperm $in0,$in0,$in0,$inpperm
2837 vxor $tmp,$tmp,$twk0
2839 vxor $in0,$in0,$twk0
2840 vxor $out0,$out0,$out0
2842 vperm $out0,$out0,$out1,$inpperm
2843 vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember?
2848 Loop_xts_enc6x_steal:
2851 bdnz Loop_xts_enc6x_steal
2855 b Loop_xts_enc1x # one more time...
2862 stvx $seven,r10,$sp # wipe copies of round keys
2880 lvx v20,r10,$sp # ABI says so
2902 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2903 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2904 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2905 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2906 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2907 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2908 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
2911 .byte 0,12,0x04,1,0x80,6,6,0
2916 vcipher $out0,$out0,v24
2917 vcipher $out1,$out1,v24
2918 vcipher $out2,$out2,v24
2919 vcipher $out3,$out3,v24
2920 vcipher $out4,$out4,v24
2921 lvx v24,$x20,$key_ # round[3]
2922 addi $key_,$key_,0x20
2924 vcipher $out0,$out0,v25
2925 vcipher $out1,$out1,v25
2926 vcipher $out2,$out2,v25
2927 vcipher $out3,$out3,v25
2928 vcipher $out4,$out4,v25
2929 lvx v25,$x10,$key_ # round[4]
2930 bdnz _aesp8_xts_enc5x
2932 add $inp,$inp,$taillen
2934 vcipher $out0,$out0,v24
2935 vcipher $out1,$out1,v24
2936 vcipher $out2,$out2,v24
2937 vcipher $out3,$out3,v24
2938 vcipher $out4,$out4,v24
2941 vcipher $out0,$out0,v25
2942 vcipher $out1,$out1,v25
2943 vcipher $out2,$out2,v25
2944 vcipher $out3,$out3,v25
2945 vcipher $out4,$out4,v25
2946 vxor $twk0,$twk0,v31
2948 vcipher $out0,$out0,v26
2949 lvsr $inpperm,r0,$taillen # $in5 is no more
2950 vcipher $out1,$out1,v26
2951 vcipher $out2,$out2,v26
2952 vcipher $out3,$out3,v26
2953 vcipher $out4,$out4,v26
2956 vcipher $out0,$out0,v27
2958 vcipher $out1,$out1,v27
2959 vcipher $out2,$out2,v27
2960 vcipher $out3,$out3,v27
2961 vcipher $out4,$out4,v27
2964 addi $key_,$sp,$FRAME+15 # rewind $key_
2965 vcipher $out0,$out0,v28
2966 vcipher $out1,$out1,v28
2967 vcipher $out2,$out2,v28
2968 vcipher $out3,$out3,v28
2969 vcipher $out4,$out4,v28
2970 lvx v24,$x00,$key_ # re-pre-load round[1]
2973 vcipher $out0,$out0,v29
2974 le?vperm $in0,$in0,$in0,$leperm
2975 vcipher $out1,$out1,v29
2976 vcipher $out2,$out2,v29
2977 vcipher $out3,$out3,v29
2978 vcipher $out4,$out4,v29
2979 lvx v25,$x10,$key_ # re-pre-load round[2]
2982 vcipher $out0,$out0,v30
2983 vperm $in0,$in0,$in0,$inpperm
2984 vcipher $out1,$out1,v30
2985 vcipher $out2,$out2,v30
2986 vcipher $out3,$out3,v30
2987 vcipher $out4,$out4,v30
2989 vcipherlast $out0,$out0,$twk0
2990 vcipherlast $out1,$out1,$in1
2991 vcipherlast $out2,$out2,$in2
2992 vcipherlast $out3,$out3,$in3
2993 vcipherlast $out4,$out4,$in4
2996 .byte 0,12,0x14,0,0,0,0,0
2999 _aesp8_xts_decrypt6x:
3000 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
3002 li r7,`$FRAME+8*16+15`
3003 li r8,`$FRAME+8*16+31`
3004 $PUSH r0,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
3005 stvx v20,r7,$sp # ABI says so
3029 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
3031 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3033 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3035 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3037 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3039 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3041 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3045 subi $rounds,$rounds,3 # -4 in total
3047 lvx $rndkey0,$x00,$key1 # load key schedule
3049 addi $key1,$key1,0x20
3051 ?vperm $rndkey0,$rndkey0,v30,$keyperm
3052 addi $key_,$sp,$FRAME+15
3056 ?vperm v24,v30,v31,$keyperm
3058 addi $key1,$key1,0x20
3059 stvx v24,$x00,$key_ # off-load round[1]
3060 ?vperm v25,v31,v30,$keyperm
3062 stvx v25,$x10,$key_ # off-load round[2]
3063 addi $key_,$key_,0x20
3064 bdnz Load_xts_dec_key
3067 ?vperm v24,v30,v31,$keyperm
3069 stvx v24,$x00,$key_ # off-load round[3]
3070 ?vperm v25,v31,v26,$keyperm
3072 stvx v25,$x10,$key_ # off-load round[4]
3073 addi $key_,$sp,$FRAME+15 # rewind $key_
3074 ?vperm v26,v26,v27,$keyperm
3076 ?vperm v27,v27,v28,$keyperm
3078 ?vperm v28,v28,v29,$keyperm
3080 ?vperm v29,v29,v30,$keyperm
3081 lvx $twk5,$x70,$key1 # borrow $twk5
3082 ?vperm v30,v30,v31,$keyperm
3083 lvx v24,$x00,$key_ # pre-load round[1]
3084 ?vperm v31,v31,$twk5,$keyperm
3085 lvx v25,$x10,$key_ # pre-load round[2]
3087 vperm $in0,$inout,$inptail,$inpperm
3088 subi $inp,$inp,31 # undo "caller"
3089 vxor $twk0,$tweak,$rndkey0
3090 vsrab $tmp,$tweak,$seven # next tweak value
3091 vaddubm $tweak,$tweak,$tweak
3092 vsldoi $tmp,$tmp,$tmp,15
3093 vand $tmp,$tmp,$eighty7
3094 vxor $out0,$in0,$twk0
3095 vxor $tweak,$tweak,$tmp
3097 lvx_u $in1,$x10,$inp
3098 vxor $twk1,$tweak,$rndkey0
3099 vsrab $tmp,$tweak,$seven # next tweak value
3100 vaddubm $tweak,$tweak,$tweak
3101 vsldoi $tmp,$tmp,$tmp,15
3102 le?vperm $in1,$in1,$in1,$leperm
3103 vand $tmp,$tmp,$eighty7
3104 vxor $out1,$in1,$twk1
3105 vxor $tweak,$tweak,$tmp
3107 lvx_u $in2,$x20,$inp
3108 andi. $taillen,$len,15
3109 vxor $twk2,$tweak,$rndkey0
3110 vsrab $tmp,$tweak,$seven # next tweak value
3111 vaddubm $tweak,$tweak,$tweak
3112 vsldoi $tmp,$tmp,$tmp,15
3113 le?vperm $in2,$in2,$in2,$leperm
3114 vand $tmp,$tmp,$eighty7
3115 vxor $out2,$in2,$twk2
3116 vxor $tweak,$tweak,$tmp
3118 lvx_u $in3,$x30,$inp
3119 sub $len,$len,$taillen
3120 vxor $twk3,$tweak,$rndkey0
3121 vsrab $tmp,$tweak,$seven # next tweak value
3122 vaddubm $tweak,$tweak,$tweak
3123 vsldoi $tmp,$tmp,$tmp,15
3124 le?vperm $in3,$in3,$in3,$leperm
3125 vand $tmp,$tmp,$eighty7
3126 vxor $out3,$in3,$twk3
3127 vxor $tweak,$tweak,$tmp
3129 lvx_u $in4,$x40,$inp
3131 vxor $twk4,$tweak,$rndkey0
3132 vsrab $tmp,$tweak,$seven # next tweak value
3133 vaddubm $tweak,$tweak,$tweak
3134 vsldoi $tmp,$tmp,$tmp,15
3135 le?vperm $in4,$in4,$in4,$leperm
3136 vand $tmp,$tmp,$eighty7
3137 vxor $out4,$in4,$twk4
3138 vxor $tweak,$tweak,$tmp
3140 lvx_u $in5,$x50,$inp
3142 vxor $twk5,$tweak,$rndkey0
3143 vsrab $tmp,$tweak,$seven # next tweak value
3144 vaddubm $tweak,$tweak,$tweak
3145 vsldoi $tmp,$tmp,$tmp,15
3146 le?vperm $in5,$in5,$in5,$leperm
3147 vand $tmp,$tmp,$eighty7
3148 vxor $out5,$in5,$twk5
3149 vxor $tweak,$tweak,$tmp
3151 vxor v31,v31,$rndkey0
3157 vncipher $out0,$out0,v24
3158 vncipher $out1,$out1,v24
3159 vncipher $out2,$out2,v24
3160 vncipher $out3,$out3,v24
3161 vncipher $out4,$out4,v24
3162 vncipher $out5,$out5,v24
3163 lvx v24,$x20,$key_ # round[3]
3164 addi $key_,$key_,0x20
3166 vncipher $out0,$out0,v25
3167 vncipher $out1,$out1,v25
3168 vncipher $out2,$out2,v25
3169 vncipher $out3,$out3,v25
3170 vncipher $out4,$out4,v25
3171 vncipher $out5,$out5,v25
3172 lvx v25,$x10,$key_ # round[4]
3175 subic $len,$len,96 # $len-=96
3176 vxor $in0,$twk0,v31 # xor with last round key
3177 vncipher $out0,$out0,v24
3178 vncipher $out1,$out1,v24
3179 vsrab $tmp,$tweak,$seven # next tweak value
3180 vxor $twk0,$tweak,$rndkey0
3181 vaddubm $tweak,$tweak,$tweak
3182 vncipher $out2,$out2,v24
3183 vncipher $out3,$out3,v24
3184 vsldoi $tmp,$tmp,$tmp,15
3185 vncipher $out4,$out4,v24
3186 vncipher $out5,$out5,v24
3188 subfe. r0,r0,r0 # borrow?-1:0
3189 vand $tmp,$tmp,$eighty7
3190 vncipher $out0,$out0,v25
3191 vncipher $out1,$out1,v25
3192 vxor $tweak,$tweak,$tmp
3193 vncipher $out2,$out2,v25
3194 vncipher $out3,$out3,v25
3196 vsrab $tmp,$tweak,$seven # next tweak value
3197 vxor $twk1,$tweak,$rndkey0
3198 vncipher $out4,$out4,v25
3199 vncipher $out5,$out5,v25
3202 vaddubm $tweak,$tweak,$tweak
3203 vsldoi $tmp,$tmp,$tmp,15
3204 vncipher $out0,$out0,v26
3205 vncipher $out1,$out1,v26
3206 vand $tmp,$tmp,$eighty7
3207 vncipher $out2,$out2,v26
3208 vncipher $out3,$out3,v26
3209 vxor $tweak,$tweak,$tmp
3210 vncipher $out4,$out4,v26
3211 vncipher $out5,$out5,v26
3213 add $inp,$inp,r0 # $inp is adjusted in such
3214 # way that at exit from the
3215 # loop inX-in5 are loaded
3218 vsrab $tmp,$tweak,$seven # next tweak value
3219 vxor $twk2,$tweak,$rndkey0
3220 vaddubm $tweak,$tweak,$tweak
3221 vncipher $out0,$out0,v27
3222 vncipher $out1,$out1,v27
3223 vsldoi $tmp,$tmp,$tmp,15
3224 vncipher $out2,$out2,v27
3225 vncipher $out3,$out3,v27
3226 vand $tmp,$tmp,$eighty7
3227 vncipher $out4,$out4,v27
3228 vncipher $out5,$out5,v27
3230 addi $key_,$sp,$FRAME+15 # rewind $key_
3231 vxor $tweak,$tweak,$tmp
3232 vncipher $out0,$out0,v28
3233 vncipher $out1,$out1,v28
3235 vsrab $tmp,$tweak,$seven # next tweak value
3236 vxor $twk3,$tweak,$rndkey0
3237 vncipher $out2,$out2,v28
3238 vncipher $out3,$out3,v28
3239 vaddubm $tweak,$tweak,$tweak
3240 vsldoi $tmp,$tmp,$tmp,15
3241 vncipher $out4,$out4,v28
3242 vncipher $out5,$out5,v28
3243 lvx v24,$x00,$key_ # re-pre-load round[1]
3244 vand $tmp,$tmp,$eighty7
3246 vncipher $out0,$out0,v29
3247 vncipher $out1,$out1,v29
3248 vxor $tweak,$tweak,$tmp
3249 vncipher $out2,$out2,v29
3250 vncipher $out3,$out3,v29
3252 vsrab $tmp,$tweak,$seven # next tweak value
3253 vxor $twk4,$tweak,$rndkey0
3254 vncipher $out4,$out4,v29
3255 vncipher $out5,$out5,v29
3256 lvx v25,$x10,$key_ # re-pre-load round[2]
3257 vaddubm $tweak,$tweak,$tweak
3258 vsldoi $tmp,$tmp,$tmp,15
3260 vncipher $out0,$out0,v30
3261 vncipher $out1,$out1,v30
3262 vand $tmp,$tmp,$eighty7
3263 vncipher $out2,$out2,v30
3264 vncipher $out3,$out3,v30
3265 vxor $tweak,$tweak,$tmp
3266 vncipher $out4,$out4,v30
3267 vncipher $out5,$out5,v30
3269 vsrab $tmp,$tweak,$seven # next tweak value
3270 vxor $twk5,$tweak,$rndkey0
3272 vncipherlast $out0,$out0,$in0
3273 lvx_u $in0,$x00,$inp # load next input block
3274 vaddubm $tweak,$tweak,$tweak
3275 vsldoi $tmp,$tmp,$tmp,15
3276 vncipherlast $out1,$out1,$in1
3277 lvx_u $in1,$x10,$inp
3278 vncipherlast $out2,$out2,$in2
3279 le?vperm $in0,$in0,$in0,$leperm
3280 lvx_u $in2,$x20,$inp
3281 vand $tmp,$tmp,$eighty7
3282 vncipherlast $out3,$out3,$in3
3283 le?vperm $in1,$in1,$in1,$leperm
3284 lvx_u $in3,$x30,$inp
3285 vncipherlast $out4,$out4,$in4
3286 le?vperm $in2,$in2,$in2,$leperm
3287 lvx_u $in4,$x40,$inp
3288 vxor $tweak,$tweak,$tmp
3289 vncipherlast $out5,$out5,$in5
3290 le?vperm $in3,$in3,$in3,$leperm
3291 lvx_u $in5,$x50,$inp
3293 le?vperm $in4,$in4,$in4,$leperm
3294 le?vperm $in5,$in5,$in5,$leperm
3296 le?vperm $out0,$out0,$out0,$leperm
3297 le?vperm $out1,$out1,$out1,$leperm
3298 stvx_u $out0,$x00,$out # store output
3299 vxor $out0,$in0,$twk0
3300 le?vperm $out2,$out2,$out2,$leperm
3301 stvx_u $out1,$x10,$out
3302 vxor $out1,$in1,$twk1
3303 le?vperm $out3,$out3,$out3,$leperm
3304 stvx_u $out2,$x20,$out
3305 vxor $out2,$in2,$twk2
3306 le?vperm $out4,$out4,$out4,$leperm
3307 stvx_u $out3,$x30,$out
3308 vxor $out3,$in3,$twk3
3309 le?vperm $out5,$out5,$out5,$leperm
3310 stvx_u $out4,$x40,$out
3311 vxor $out4,$in4,$twk4
3312 stvx_u $out5,$x50,$out
3313 vxor $out5,$in5,$twk5
3317 beq Loop_xts_dec6x # did $len-=96 borrow?
3319 addic. $len,$len,0x60
3326 blt Lxts_dec6x_three
3331 vxor $out0,$in1,$twk0
3332 vxor $out1,$in2,$twk1
3333 vxor $out2,$in3,$twk2
3334 vxor $out3,$in4,$twk3
3335 vxor $out4,$in5,$twk4
3339 le?vperm $out0,$out0,$out0,$leperm
3340 vmr $twk0,$twk5 # unused tweak
3341 vxor $twk1,$tweak,$rndkey0
3342 le?vperm $out1,$out1,$out1,$leperm
3343 stvx_u $out0,$x00,$out # store output
3344 vxor $out0,$in0,$twk1
3345 le?vperm $out2,$out2,$out2,$leperm
3346 stvx_u $out1,$x10,$out
3347 le?vperm $out3,$out3,$out3,$leperm
3348 stvx_u $out2,$x20,$out
3349 le?vperm $out4,$out4,$out4,$leperm
3350 stvx_u $out3,$x30,$out
3351 stvx_u $out4,$x40,$out
3353 bne Lxts_dec6x_steal
3358 vxor $out0,$in2,$twk0
3359 vxor $out1,$in3,$twk1
3360 vxor $out2,$in4,$twk2
3361 vxor $out3,$in5,$twk3
3362 vxor $out4,$out4,$out4
3366 le?vperm $out0,$out0,$out0,$leperm
3367 vmr $twk0,$twk4 # unused tweak
3369 le?vperm $out1,$out1,$out1,$leperm
3370 stvx_u $out0,$x00,$out # store output
3371 vxor $out0,$in0,$twk5
3372 le?vperm $out2,$out2,$out2,$leperm
3373 stvx_u $out1,$x10,$out
3374 le?vperm $out3,$out3,$out3,$leperm
3375 stvx_u $out2,$x20,$out
3376 stvx_u $out3,$x30,$out
3378 bne Lxts_dec6x_steal
3383 vxor $out0,$in3,$twk0
3384 vxor $out1,$in4,$twk1
3385 vxor $out2,$in5,$twk2
3386 vxor $out3,$out3,$out3
3387 vxor $out4,$out4,$out4
3391 le?vperm $out0,$out0,$out0,$leperm
3392 vmr $twk0,$twk3 # unused tweak
3394 le?vperm $out1,$out1,$out1,$leperm
3395 stvx_u $out0,$x00,$out # store output
3396 vxor $out0,$in0,$twk4
3397 le?vperm $out2,$out2,$out2,$leperm
3398 stvx_u $out1,$x10,$out
3399 stvx_u $out2,$x20,$out
3401 bne Lxts_dec6x_steal
3406 vxor $out0,$in4,$twk0
3407 vxor $out1,$in5,$twk1
3408 vxor $out2,$out2,$out2
3409 vxor $out3,$out3,$out3
3410 vxor $out4,$out4,$out4
3414 le?vperm $out0,$out0,$out0,$leperm
3415 vmr $twk0,$twk2 # unused tweak
3417 le?vperm $out1,$out1,$out1,$leperm
3418 stvx_u $out0,$x00,$out # store output
3419 vxor $out0,$in0,$twk3
3420 stvx_u $out1,$x10,$out
3422 bne Lxts_dec6x_steal
3427 vxor $out0,$in5,$twk0
3430 vncipher $out0,$out0,v24
3431 lvx v24,$x20,$key_ # round[3]
3432 addi $key_,$key_,0x20
3434 vncipher $out0,$out0,v25
3435 lvx v25,$x10,$key_ # round[4]
3439 vncipher $out0,$out0,v24
3443 vncipher $out0,$out0,v25
3446 vncipher $out0,$out0,v26
3449 vncipher $out0,$out0,v27
3451 addi $key_,$sp,$FRAME+15 # rewind $key_
3452 vncipher $out0,$out0,v28
3453 lvx v24,$x00,$key_ # re-pre-load round[1]
3455 vncipher $out0,$out0,v29
3456 lvx v25,$x10,$key_ # re-pre-load round[2]
3457 vxor $twk0,$twk0,v31
3459 le?vperm $in0,$in0,$in0,$leperm
3460 vncipher $out0,$out0,v30
3463 vncipherlast $out0,$out0,$twk0
3465 vmr $twk0,$twk1 # unused tweak
3467 le?vperm $out0,$out0,$out0,$leperm
3468 stvx_u $out0,$x00,$out # store output
3470 vxor $out0,$in0,$twk2
3471 bne Lxts_dec6x_steal
3480 le?vperm $in0,$in0,$in0,$leperm
3481 vxor $out0,$in0,$twk1
3483 vncipher $out0,$out0,v24
3484 lvx v24,$x20,$key_ # round[3]
3485 addi $key_,$key_,0x20
3487 vncipher $out0,$out0,v25
3488 lvx v25,$x10,$key_ # round[4]
3489 bdnz Lxts_dec6x_steal
3491 add $inp,$inp,$taillen
3492 vncipher $out0,$out0,v24
3495 vncipher $out0,$out0,v25
3498 vncipher $out0,$out0,v26
3500 lvsr $inpperm,0,$taillen # $in5 is no more
3501 vncipher $out0,$out0,v27
3503 addi $key_,$sp,$FRAME+15 # rewind $key_
3504 vncipher $out0,$out0,v28
3505 lvx v24,$x00,$key_ # re-pre-load round[1]
3507 vncipher $out0,$out0,v29
3508 lvx v25,$x10,$key_ # re-pre-load round[2]
3509 vxor $twk1,$twk1,v31
3511 le?vperm $in0,$in0,$in0,$leperm
3512 vncipher $out0,$out0,v30
3514 vperm $in0,$in0,$in0,$inpperm
3515 vncipherlast $tmp,$out0,$twk1
3517 le?vperm $out0,$tmp,$tmp,$leperm
3518 le?stvx_u $out0,0,$out
3519 be?stvx_u $tmp,0,$out
3521 vxor $out0,$out0,$out0
3523 vperm $out0,$out0,$out1,$inpperm
3524 vsel $out0,$in0,$tmp,$out0
3525 vxor $out0,$out0,$twk0
3529 Loop_xts_dec6x_steal:
3532 bdnz Loop_xts_dec6x_steal
3536 b Loop_xts_dec1x # one more time...
3543 stvx $seven,r10,$sp # wipe copies of round keys
3561 lvx v20,r10,$sp # ABI says so
3583 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3584 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3585 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3586 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3587 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3588 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3589 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3592 .byte 0,12,0x04,1,0x80,6,6,0
3597 vncipher $out0,$out0,v24
3598 vncipher $out1,$out1,v24
3599 vncipher $out2,$out2,v24
3600 vncipher $out3,$out3,v24
3601 vncipher $out4,$out4,v24
3602 lvx v24,$x20,$key_ # round[3]
3603 addi $key_,$key_,0x20
3605 vncipher $out0,$out0,v25
3606 vncipher $out1,$out1,v25
3607 vncipher $out2,$out2,v25
3608 vncipher $out3,$out3,v25
3609 vncipher $out4,$out4,v25
3610 lvx v25,$x10,$key_ # round[4]
3611 bdnz _aesp8_xts_dec5x
3614 vncipher $out0,$out0,v24
3615 vncipher $out1,$out1,v24
3616 vncipher $out2,$out2,v24
3617 vncipher $out3,$out3,v24
3618 vncipher $out4,$out4,v24
3622 vncipher $out0,$out0,v25
3623 vncipher $out1,$out1,v25
3624 vncipher $out2,$out2,v25
3625 vncipher $out3,$out3,v25
3626 vncipher $out4,$out4,v25
3627 vxor $twk0,$twk0,v31
3630 vncipher $out0,$out0,v26
3631 vncipher $out1,$out1,v26
3632 vncipher $out2,$out2,v26
3633 vncipher $out3,$out3,v26
3634 vncipher $out4,$out4,v26
3637 vncipher $out0,$out0,v27
3639 vncipher $out1,$out1,v27
3640 vncipher $out2,$out2,v27
3641 vncipher $out3,$out3,v27
3642 vncipher $out4,$out4,v27
3645 addi $key_,$sp,$FRAME+15 # rewind $key_
3646 vncipher $out0,$out0,v28
3647 vncipher $out1,$out1,v28
3648 vncipher $out2,$out2,v28
3649 vncipher $out3,$out3,v28
3650 vncipher $out4,$out4,v28
3651 lvx v24,$x00,$key_ # re-pre-load round[1]
3654 vncipher $out0,$out0,v29
3655 le?vperm $in0,$in0,$in0,$leperm
3656 vncipher $out1,$out1,v29
3657 vncipher $out2,$out2,v29
3658 vncipher $out3,$out3,v29
3659 vncipher $out4,$out4,v29
3660 lvx v25,$x10,$key_ # re-pre-load round[2]
3663 vncipher $out0,$out0,v30
3664 vncipher $out1,$out1,v30
3665 vncipher $out2,$out2,v30
3666 vncipher $out3,$out3,v30
3667 vncipher $out4,$out4,v30
3669 vncipherlast $out0,$out0,$twk0
3670 vncipherlast $out1,$out1,$in1
3671 vncipherlast $out2,$out2,$in2
3672 vncipherlast $out3,$out3,$in3
3673 vncipherlast $out4,$out4,$in4
3677 .byte 0,12,0x14,0,0,0,0,0
3682 foreach(split("\n",$code)) {
3683 s/\`([^\`]*)\`/eval($1)/geo;
3685 # constants table endian-specific conversion
3686 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
3690 # convert to endian-agnostic format
3692 foreach (split(/,\s*/,$2)) {
3693 my $l = /^0/?oct:int;
3694 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
3697 @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
3700 # little-endian conversion
3701 if ($flavour =~ /le$/o) {
3702 SWITCH: for($conv) {
3703 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
3704 /\?rev/ && do { @bytes=reverse(@bytes); last; };
3709 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
3712 $consts=0 if (m/Lconsts:/o); # end of table
3714 # instructions prefixed with '?' are endian-specific and need
3715 # to be adjusted accordingly...
3716 if ($flavour =~ /le$/o) { # little-endian
3721 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
3722 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
3723 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
3724 } else { # big-endian