2 # Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the OpenSSL license (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
17 # This module implements support for AES instructions as per PowerISA
18 # specification version 2.07, first implemented by POWER8 processor.
19 # The module is endian-agnostic in sense that it supports both big-
20 # and little-endian cases. Data alignment in parallelizable modes is
21 # handled with VSX loads and stores, which implies MSR.VSX flag being
22 # set. It should also be noted that ISA specification doesn't prohibit
23 # alignment exceptions for these instructions on page boundaries.
24 # Initially alignment was handled in pure AltiVec/VMX way [when data
25 # is aligned programmatically, which in turn guarantees exception-
26 # free execution], but it turned to hamper performance when vcipher
27 # instructions are interleaved. It's reckoned that eventual
28 # misalignment penalties at page boundaries are in average lower
29 # than additional overhead in pure AltiVec approach.
33 # Add XTS subroutine, 9x on little- and 12x improvement on big-endian
34 # systems were measured.
36 ######################################################################
37 # Current large-block performance in cycles per byte processed with
38 # 128-bit key (less is better).
40 # CBC en-/decrypt CTR XTS
41 # POWER8[le] 3.96/0.72 0.74 1.1
42 # POWER8[be] 3.75/0.65 0.66 1.0
46 if ($flavour =~ /64/) {
54 } elsif ($flavour =~ /32/) {
62 } else { die "nonsense $flavour"; }
64 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
66 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
67 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
68 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
69 die "can't locate ppc-xlate.pl";
71 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
79 #########################################################################
80 {{{ # Key setup procedures #
81 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
82 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
83 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
92 .long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
93 .long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
94 .long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
99 mflr $ptr #vvvvv "distance between . and rcon
104 .byte 0,12,0x14,0,0,0,0,0
105 .asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
107 .globl .${prefix}_set_encrypt_key
109 .${prefix}_set_encrypt_key:
112 $PUSH r11,$LRSAVE($sp)
116 beq- Lenc_key_abort # if ($inp==0) return -1;
118 beq- Lenc_key_abort # if ($out==0) return -1;
136 addi $inp,$inp,15 # 15 is not typo
137 lvsr $key,0,r9 # borrow $key
141 le?vspltisb $mask,0x0f # borrow $mask
143 le?vxor $key,$key,$mask # adjust for byte swap
146 vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
148 vxor $zero,$zero,$zero
151 ?lvsr $outperm,0,$out
154 ?vperm $outmask,$zero,$outmask,$outperm
164 vperm $key,$in0,$in0,$mask # rotate-n-splat
165 vsldoi $tmp,$zero,$in0,12 # >>32
166 vperm $outtail,$in0,$in0,$outperm # rotate
167 vsel $stage,$outhead,$outtail,$outmask
168 vmr $outhead,$outtail
169 vcipherlast $key,$key,$rcon
174 vsldoi $tmp,$zero,$tmp,12 # >>32
176 vsldoi $tmp,$zero,$tmp,12 # >>32
178 vadduwm $rcon,$rcon,$rcon
182 lvx $rcon,0,$ptr # last two round keys
184 vperm $key,$in0,$in0,$mask # rotate-n-splat
185 vsldoi $tmp,$zero,$in0,12 # >>32
186 vperm $outtail,$in0,$in0,$outperm # rotate
187 vsel $stage,$outhead,$outtail,$outmask
188 vmr $outhead,$outtail
189 vcipherlast $key,$key,$rcon
194 vsldoi $tmp,$zero,$tmp,12 # >>32
196 vsldoi $tmp,$zero,$tmp,12 # >>32
198 vadduwm $rcon,$rcon,$rcon
201 vperm $key,$in0,$in0,$mask # rotate-n-splat
202 vsldoi $tmp,$zero,$in0,12 # >>32
203 vperm $outtail,$in0,$in0,$outperm # rotate
204 vsel $stage,$outhead,$outtail,$outmask
205 vmr $outhead,$outtail
206 vcipherlast $key,$key,$rcon
211 vsldoi $tmp,$zero,$tmp,12 # >>32
213 vsldoi $tmp,$zero,$tmp,12 # >>32
216 vperm $outtail,$in0,$in0,$outperm # rotate
217 vsel $stage,$outhead,$outtail,$outmask
218 vmr $outhead,$outtail
221 addi $inp,$out,15 # 15 is not typo
231 vperm $outtail,$in0,$in0,$outperm # rotate
232 vsel $stage,$outhead,$outtail,$outmask
233 vmr $outhead,$outtail
236 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
237 vspltisb $key,8 # borrow $key
239 vsububm $mask,$mask,$key # adjust the mask
242 vperm $key,$in1,$in1,$mask # roate-n-splat
243 vsldoi $tmp,$zero,$in0,12 # >>32
244 vcipherlast $key,$key,$rcon
247 vsldoi $tmp,$zero,$tmp,12 # >>32
249 vsldoi $tmp,$zero,$tmp,12 # >>32
252 vsldoi $stage,$zero,$in1,8
255 vsldoi $in1,$zero,$in1,12 # >>32
256 vadduwm $rcon,$rcon,$rcon
260 vsldoi $stage,$stage,$in0,8
262 vperm $key,$in1,$in1,$mask # rotate-n-splat
263 vsldoi $tmp,$zero,$in0,12 # >>32
264 vperm $outtail,$stage,$stage,$outperm # rotate
265 vsel $stage,$outhead,$outtail,$outmask
266 vmr $outhead,$outtail
267 vcipherlast $key,$key,$rcon
271 vsldoi $stage,$in0,$in1,8
273 vsldoi $tmp,$zero,$tmp,12 # >>32
274 vperm $outtail,$stage,$stage,$outperm # rotate
275 vsel $stage,$outhead,$outtail,$outmask
276 vmr $outhead,$outtail
278 vsldoi $tmp,$zero,$tmp,12 # >>32
285 vsldoi $in1,$zero,$in1,12 # >>32
286 vadduwm $rcon,$rcon,$rcon
290 vperm $outtail,$in0,$in0,$outperm # rotate
291 vsel $stage,$outhead,$outtail,$outmask
292 vmr $outhead,$outtail
294 addi $inp,$out,15 # 15 is not typo
307 vperm $outtail,$in0,$in0,$outperm # rotate
308 vsel $stage,$outhead,$outtail,$outmask
309 vmr $outhead,$outtail
312 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
316 vperm $key,$in1,$in1,$mask # rotate-n-splat
317 vsldoi $tmp,$zero,$in0,12 # >>32
318 vperm $outtail,$in1,$in1,$outperm # rotate
319 vsel $stage,$outhead,$outtail,$outmask
320 vmr $outhead,$outtail
321 vcipherlast $key,$key,$rcon
326 vsldoi $tmp,$zero,$tmp,12 # >>32
328 vsldoi $tmp,$zero,$tmp,12 # >>32
330 vadduwm $rcon,$rcon,$rcon
332 vperm $outtail,$in0,$in0,$outperm # rotate
333 vsel $stage,$outhead,$outtail,$outmask
334 vmr $outhead,$outtail
336 addi $inp,$out,15 # 15 is not typo
340 vspltw $key,$in0,3 # just splat
341 vsldoi $tmp,$zero,$in1,12 # >>32
345 vsldoi $tmp,$zero,$tmp,12 # >>32
347 vsldoi $tmp,$zero,$tmp,12 # >>32
355 lvx $in1,0,$inp # redundant in aligned case
356 vsel $in1,$outhead,$in1,$outmask
366 .byte 0,12,0x14,1,0,0,3,0
368 .size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
370 .globl .${prefix}_set_decrypt_key
372 .${prefix}_set_decrypt_key:
373 $STU $sp,-$FRAME($sp)
375 $PUSH r10,$FRAME+$LRSAVE($sp)
383 subi $inp,$out,240 # first round key
384 srwi $rounds,$rounds,1
385 add $out,$inp,$cnt # last round key
409 xor r3,r3,r3 # return value
414 .byte 0,12,4,1,0x80,0,3,0
416 .size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
419 #########################################################################
420 {{{ # Single block en- and decrypt procedures #
423 my $n = $dir eq "de" ? "n" : "";
424 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
427 .globl .${prefix}_${dir}crypt
429 .${prefix}_${dir}crypt:
430 lwz $rounds,240($key)
433 li $idx,15 # 15 is not typo
439 lvsl v2,0,$inp # inpperm
441 ?lvsl v3,0,r11 # outperm
444 vperm v0,v0,v1,v2 # align [and byte swap in LE]
446 ?lvsl v5,0,$key # keyperm
447 srwi $rounds,$rounds,1
450 subi $rounds,$rounds,1
451 ?vperm v1,v1,v2,v5 # align round key
473 v${n}cipherlast v0,v0,v1
477 li $idx,15 # 15 is not typo
478 ?vperm v2,v1,v2,v3 # outmask
480 lvx v1,0,$out # outhead
481 vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
491 .byte 0,12,0x14,0,0,0,3,0
493 .size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
499 #########################################################################
500 {{{ # CBC en- and decrypt procedures #
501 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
502 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
503 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
506 .globl .${prefix}_cbc_encrypt
508 .${prefix}_cbc_encrypt:
512 cmpwi $enc,0 # test direction
518 vxor $rndkey0,$rndkey0,$rndkey0
519 le?vspltisb $tmp,0x0f
521 lvx $ivec,0,$ivp # load [unaligned] iv
523 lvx $inptail,$idx,$ivp
524 le?vxor $inpperm,$inpperm,$tmp
525 vperm $ivec,$ivec,$inptail,$inpperm
528 ?lvsl $keyperm,0,$key # prepare for unaligned key
529 lwz $rounds,240($key)
531 lvsr $inpperm,0,r11 # prepare for unaligned load
533 addi $inp,$inp,15 # 15 is not typo
534 le?vxor $inpperm,$inpperm,$tmp
536 ?lvsr $outperm,0,$out # prepare for unaligned store
539 ?vperm $outmask,$rndkey0,$outmask,$outperm
540 le?vxor $outperm,$outperm,$tmp
542 srwi $rounds,$rounds,1
544 subi $rounds,$rounds,1
552 subi $len,$len,16 # len-=16
555 vperm $inout,$inout,$inptail,$inpperm
556 lvx $rndkey1,$idx,$key
558 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
559 vxor $inout,$inout,$rndkey0
560 lvx $rndkey0,$idx,$key
562 vxor $inout,$inout,$ivec
565 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
566 vcipher $inout,$inout,$rndkey1
567 lvx $rndkey1,$idx,$key
569 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
570 vcipher $inout,$inout,$rndkey0
571 lvx $rndkey0,$idx,$key
575 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
576 vcipher $inout,$inout,$rndkey1
577 lvx $rndkey1,$idx,$key
579 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
580 vcipherlast $ivec,$inout,$rndkey0
583 vperm $tmp,$ivec,$ivec,$outperm
584 vsel $inout,$outhead,$tmp,$outmask
595 bge _aesp8_cbc_decrypt8x
600 subi $len,$len,16 # len-=16
603 vperm $tmp,$tmp,$inptail,$inpperm
604 lvx $rndkey1,$idx,$key
606 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
607 vxor $inout,$tmp,$rndkey0
608 lvx $rndkey0,$idx,$key
612 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
613 vncipher $inout,$inout,$rndkey1
614 lvx $rndkey1,$idx,$key
616 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
617 vncipher $inout,$inout,$rndkey0
618 lvx $rndkey0,$idx,$key
622 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
623 vncipher $inout,$inout,$rndkey1
624 lvx $rndkey1,$idx,$key
626 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
627 vncipherlast $inout,$inout,$rndkey0
630 vxor $inout,$inout,$ivec
632 vperm $tmp,$inout,$inout,$outperm
633 vsel $inout,$outhead,$tmp,$outmask
641 lvx $inout,0,$out # redundant in aligned case
642 vsel $inout,$outhead,$inout,$outmask
645 neg $enc,$ivp # write [unaligned] iv
646 li $idx,15 # 15 is not typo
647 vxor $rndkey0,$rndkey0,$rndkey0
649 le?vspltisb $tmp,0x0f
650 ?lvsl $outperm,0,$enc
651 ?vperm $outmask,$rndkey0,$outmask,$outperm
652 le?vxor $outperm,$outperm,$tmp
654 vperm $ivec,$ivec,$ivec,$outperm
655 vsel $inout,$outhead,$ivec,$outmask
656 lvx $inptail,$idx,$ivp
658 vsel $inout,$ivec,$inptail,$outmask
659 stvx $inout,$idx,$ivp
664 .byte 0,12,0x14,0,0,0,6,0
667 #########################################################################
668 {{ # Optimized CBC decrypt procedure #
670 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
671 $x00=0 if ($flavour =~ /osx/);
672 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
673 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
674 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
675 # v26-v31 last 6 round keys
676 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
680 _aesp8_cbc_decrypt8x:
681 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
682 li r10,`$FRAME+8*16+15`
683 li r11,`$FRAME+8*16+31`
684 stvx v20,r10,$sp # ABI says so
707 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
709 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
711 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
713 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
715 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
717 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
719 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
723 subi $rounds,$rounds,3 # -4 in total
724 subi $len,$len,128 # bias
726 lvx $rndkey0,$x00,$key # load key schedule
730 ?vperm $rndkey0,$rndkey0,v30,$keyperm
731 addi $key_,$sp,$FRAME+15
735 ?vperm v24,v30,v31,$keyperm
738 stvx v24,$x00,$key_ # off-load round[1]
739 ?vperm v25,v31,v30,$keyperm
741 stvx v25,$x10,$key_ # off-load round[2]
742 addi $key_,$key_,0x20
743 bdnz Load_cbc_dec_key
746 ?vperm v24,v30,v31,$keyperm
748 stvx v24,$x00,$key_ # off-load round[3]
749 ?vperm v25,v31,v26,$keyperm
751 stvx v25,$x10,$key_ # off-load round[4]
752 addi $key_,$sp,$FRAME+15 # rewind $key_
753 ?vperm v26,v26,v27,$keyperm
755 ?vperm v27,v27,v28,$keyperm
757 ?vperm v28,v28,v29,$keyperm
759 ?vperm v29,v29,v30,$keyperm
760 lvx $out0,$x70,$key # borrow $out0
761 ?vperm v30,v30,v31,$keyperm
762 lvx v24,$x00,$key_ # pre-load round[1]
763 ?vperm v31,v31,$out0,$keyperm
764 lvx v25,$x10,$key_ # pre-load round[2]
766 #lvx $inptail,0,$inp # "caller" already did this
767 #addi $inp,$inp,15 # 15 is not typo
768 subi $inp,$inp,15 # undo "caller"
771 lvx_u $in0,$x00,$inp # load first 8 "words"
772 le?lvsl $inpperm,0,$idx
773 le?vspltisb $tmp,0x0f
775 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
777 le?vperm $in0,$in0,$in0,$inpperm
779 le?vperm $in1,$in1,$in1,$inpperm
781 le?vperm $in2,$in2,$in2,$inpperm
782 vxor $out0,$in0,$rndkey0
784 le?vperm $in3,$in3,$in3,$inpperm
785 vxor $out1,$in1,$rndkey0
787 le?vperm $in4,$in4,$in4,$inpperm
788 vxor $out2,$in2,$rndkey0
791 le?vperm $in5,$in5,$in5,$inpperm
792 vxor $out3,$in3,$rndkey0
793 le?vperm $in6,$in6,$in6,$inpperm
794 vxor $out4,$in4,$rndkey0
795 le?vperm $in7,$in7,$in7,$inpperm
796 vxor $out5,$in5,$rndkey0
797 vxor $out6,$in6,$rndkey0
798 vxor $out7,$in7,$rndkey0
804 vncipher $out0,$out0,v24
805 vncipher $out1,$out1,v24
806 vncipher $out2,$out2,v24
807 vncipher $out3,$out3,v24
808 vncipher $out4,$out4,v24
809 vncipher $out5,$out5,v24
810 vncipher $out6,$out6,v24
811 vncipher $out7,$out7,v24
812 lvx v24,$x20,$key_ # round[3]
813 addi $key_,$key_,0x20
815 vncipher $out0,$out0,v25
816 vncipher $out1,$out1,v25
817 vncipher $out2,$out2,v25
818 vncipher $out3,$out3,v25
819 vncipher $out4,$out4,v25
820 vncipher $out5,$out5,v25
821 vncipher $out6,$out6,v25
822 vncipher $out7,$out7,v25
823 lvx v25,$x10,$key_ # round[4]
826 subic $len,$len,128 # $len-=128
827 vncipher $out0,$out0,v24
828 vncipher $out1,$out1,v24
829 vncipher $out2,$out2,v24
830 vncipher $out3,$out3,v24
831 vncipher $out4,$out4,v24
832 vncipher $out5,$out5,v24
833 vncipher $out6,$out6,v24
834 vncipher $out7,$out7,v24
836 subfe. r0,r0,r0 # borrow?-1:0
837 vncipher $out0,$out0,v25
838 vncipher $out1,$out1,v25
839 vncipher $out2,$out2,v25
840 vncipher $out3,$out3,v25
841 vncipher $out4,$out4,v25
842 vncipher $out5,$out5,v25
843 vncipher $out6,$out6,v25
844 vncipher $out7,$out7,v25
847 vncipher $out0,$out0,v26
848 vncipher $out1,$out1,v26
849 vncipher $out2,$out2,v26
850 vncipher $out3,$out3,v26
851 vncipher $out4,$out4,v26
852 vncipher $out5,$out5,v26
853 vncipher $out6,$out6,v26
854 vncipher $out7,$out7,v26
856 add $inp,$inp,r0 # $inp is adjusted in such
857 # way that at exit from the
858 # loop inX-in7 are loaded
860 vncipher $out0,$out0,v27
861 vncipher $out1,$out1,v27
862 vncipher $out2,$out2,v27
863 vncipher $out3,$out3,v27
864 vncipher $out4,$out4,v27
865 vncipher $out5,$out5,v27
866 vncipher $out6,$out6,v27
867 vncipher $out7,$out7,v27
869 addi $key_,$sp,$FRAME+15 # rewind $key_
870 vncipher $out0,$out0,v28
871 vncipher $out1,$out1,v28
872 vncipher $out2,$out2,v28
873 vncipher $out3,$out3,v28
874 vncipher $out4,$out4,v28
875 vncipher $out5,$out5,v28
876 vncipher $out6,$out6,v28
877 vncipher $out7,$out7,v28
878 lvx v24,$x00,$key_ # re-pre-load round[1]
880 vncipher $out0,$out0,v29
881 vncipher $out1,$out1,v29
882 vncipher $out2,$out2,v29
883 vncipher $out3,$out3,v29
884 vncipher $out4,$out4,v29
885 vncipher $out5,$out5,v29
886 vncipher $out6,$out6,v29
887 vncipher $out7,$out7,v29
888 lvx v25,$x10,$key_ # re-pre-load round[2]
890 vncipher $out0,$out0,v30
891 vxor $ivec,$ivec,v31 # xor with last round key
892 vncipher $out1,$out1,v30
894 vncipher $out2,$out2,v30
896 vncipher $out3,$out3,v30
898 vncipher $out4,$out4,v30
900 vncipher $out5,$out5,v30
902 vncipher $out6,$out6,v30
904 vncipher $out7,$out7,v30
907 vncipherlast $out0,$out0,$ivec
908 vncipherlast $out1,$out1,$in0
909 lvx_u $in0,$x00,$inp # load next input block
910 vncipherlast $out2,$out2,$in1
912 vncipherlast $out3,$out3,$in2
913 le?vperm $in0,$in0,$in0,$inpperm
915 vncipherlast $out4,$out4,$in3
916 le?vperm $in1,$in1,$in1,$inpperm
918 vncipherlast $out5,$out5,$in4
919 le?vperm $in2,$in2,$in2,$inpperm
921 vncipherlast $out6,$out6,$in5
922 le?vperm $in3,$in3,$in3,$inpperm
924 vncipherlast $out7,$out7,$in6
925 le?vperm $in4,$in4,$in4,$inpperm
928 le?vperm $in5,$in5,$in5,$inpperm
932 le?vperm $out0,$out0,$out0,$inpperm
933 le?vperm $out1,$out1,$out1,$inpperm
934 stvx_u $out0,$x00,$out
935 le?vperm $in6,$in6,$in6,$inpperm
936 vxor $out0,$in0,$rndkey0
937 le?vperm $out2,$out2,$out2,$inpperm
938 stvx_u $out1,$x10,$out
939 le?vperm $in7,$in7,$in7,$inpperm
940 vxor $out1,$in1,$rndkey0
941 le?vperm $out3,$out3,$out3,$inpperm
942 stvx_u $out2,$x20,$out
943 vxor $out2,$in2,$rndkey0
944 le?vperm $out4,$out4,$out4,$inpperm
945 stvx_u $out3,$x30,$out
946 vxor $out3,$in3,$rndkey0
947 le?vperm $out5,$out5,$out5,$inpperm
948 stvx_u $out4,$x40,$out
949 vxor $out4,$in4,$rndkey0
950 le?vperm $out6,$out6,$out6,$inpperm
951 stvx_u $out5,$x50,$out
952 vxor $out5,$in5,$rndkey0
953 le?vperm $out7,$out7,$out7,$inpperm
954 stvx_u $out6,$x60,$out
955 vxor $out6,$in6,$rndkey0
956 stvx_u $out7,$x70,$out
958 vxor $out7,$in7,$rndkey0
961 beq Loop_cbc_dec8x # did $len-=128 borrow?
968 Loop_cbc_dec8x_tail: # up to 7 "words" tail...
969 vncipher $out1,$out1,v24
970 vncipher $out2,$out2,v24
971 vncipher $out3,$out3,v24
972 vncipher $out4,$out4,v24
973 vncipher $out5,$out5,v24
974 vncipher $out6,$out6,v24
975 vncipher $out7,$out7,v24
976 lvx v24,$x20,$key_ # round[3]
977 addi $key_,$key_,0x20
979 vncipher $out1,$out1,v25
980 vncipher $out2,$out2,v25
981 vncipher $out3,$out3,v25
982 vncipher $out4,$out4,v25
983 vncipher $out5,$out5,v25
984 vncipher $out6,$out6,v25
985 vncipher $out7,$out7,v25
986 lvx v25,$x10,$key_ # round[4]
987 bdnz Loop_cbc_dec8x_tail
989 vncipher $out1,$out1,v24
990 vncipher $out2,$out2,v24
991 vncipher $out3,$out3,v24
992 vncipher $out4,$out4,v24
993 vncipher $out5,$out5,v24
994 vncipher $out6,$out6,v24
995 vncipher $out7,$out7,v24
997 vncipher $out1,$out1,v25
998 vncipher $out2,$out2,v25
999 vncipher $out3,$out3,v25
1000 vncipher $out4,$out4,v25
1001 vncipher $out5,$out5,v25
1002 vncipher $out6,$out6,v25
1003 vncipher $out7,$out7,v25
1005 vncipher $out1,$out1,v26
1006 vncipher $out2,$out2,v26
1007 vncipher $out3,$out3,v26
1008 vncipher $out4,$out4,v26
1009 vncipher $out5,$out5,v26
1010 vncipher $out6,$out6,v26
1011 vncipher $out7,$out7,v26
1013 vncipher $out1,$out1,v27
1014 vncipher $out2,$out2,v27
1015 vncipher $out3,$out3,v27
1016 vncipher $out4,$out4,v27
1017 vncipher $out5,$out5,v27
1018 vncipher $out6,$out6,v27
1019 vncipher $out7,$out7,v27
1021 vncipher $out1,$out1,v28
1022 vncipher $out2,$out2,v28
1023 vncipher $out3,$out3,v28
1024 vncipher $out4,$out4,v28
1025 vncipher $out5,$out5,v28
1026 vncipher $out6,$out6,v28
1027 vncipher $out7,$out7,v28
1029 vncipher $out1,$out1,v29
1030 vncipher $out2,$out2,v29
1031 vncipher $out3,$out3,v29
1032 vncipher $out4,$out4,v29
1033 vncipher $out5,$out5,v29
1034 vncipher $out6,$out6,v29
1035 vncipher $out7,$out7,v29
1037 vncipher $out1,$out1,v30
1038 vxor $ivec,$ivec,v31 # last round key
1039 vncipher $out2,$out2,v30
1041 vncipher $out3,$out3,v30
1043 vncipher $out4,$out4,v30
1045 vncipher $out5,$out5,v30
1047 vncipher $out6,$out6,v30
1049 vncipher $out7,$out7,v30
1052 cmplwi $len,32 # switch($len)
1057 blt Lcbc_dec8x_three
1066 vncipherlast $out1,$out1,$ivec
1067 vncipherlast $out2,$out2,$in1
1068 vncipherlast $out3,$out3,$in2
1069 vncipherlast $out4,$out4,$in3
1070 vncipherlast $out5,$out5,$in4
1071 vncipherlast $out6,$out6,$in5
1072 vncipherlast $out7,$out7,$in6
1075 le?vperm $out1,$out1,$out1,$inpperm
1076 le?vperm $out2,$out2,$out2,$inpperm
1077 stvx_u $out1,$x00,$out
1078 le?vperm $out3,$out3,$out3,$inpperm
1079 stvx_u $out2,$x10,$out
1080 le?vperm $out4,$out4,$out4,$inpperm
1081 stvx_u $out3,$x20,$out
1082 le?vperm $out5,$out5,$out5,$inpperm
1083 stvx_u $out4,$x30,$out
1084 le?vperm $out6,$out6,$out6,$inpperm
1085 stvx_u $out5,$x40,$out
1086 le?vperm $out7,$out7,$out7,$inpperm
1087 stvx_u $out6,$x50,$out
1088 stvx_u $out7,$x60,$out
1094 vncipherlast $out2,$out2,$ivec
1095 vncipherlast $out3,$out3,$in2
1096 vncipherlast $out4,$out4,$in3
1097 vncipherlast $out5,$out5,$in4
1098 vncipherlast $out6,$out6,$in5
1099 vncipherlast $out7,$out7,$in6
1102 le?vperm $out2,$out2,$out2,$inpperm
1103 le?vperm $out3,$out3,$out3,$inpperm
1104 stvx_u $out2,$x00,$out
1105 le?vperm $out4,$out4,$out4,$inpperm
1106 stvx_u $out3,$x10,$out
1107 le?vperm $out5,$out5,$out5,$inpperm
1108 stvx_u $out4,$x20,$out
1109 le?vperm $out6,$out6,$out6,$inpperm
1110 stvx_u $out5,$x30,$out
1111 le?vperm $out7,$out7,$out7,$inpperm
1112 stvx_u $out6,$x40,$out
1113 stvx_u $out7,$x50,$out
1119 vncipherlast $out3,$out3,$ivec
1120 vncipherlast $out4,$out4,$in3
1121 vncipherlast $out5,$out5,$in4
1122 vncipherlast $out6,$out6,$in5
1123 vncipherlast $out7,$out7,$in6
1126 le?vperm $out3,$out3,$out3,$inpperm
1127 le?vperm $out4,$out4,$out4,$inpperm
1128 stvx_u $out3,$x00,$out
1129 le?vperm $out5,$out5,$out5,$inpperm
1130 stvx_u $out4,$x10,$out
1131 le?vperm $out6,$out6,$out6,$inpperm
1132 stvx_u $out5,$x20,$out
1133 le?vperm $out7,$out7,$out7,$inpperm
1134 stvx_u $out6,$x30,$out
1135 stvx_u $out7,$x40,$out
1141 vncipherlast $out4,$out4,$ivec
1142 vncipherlast $out5,$out5,$in4
1143 vncipherlast $out6,$out6,$in5
1144 vncipherlast $out7,$out7,$in6
1147 le?vperm $out4,$out4,$out4,$inpperm
1148 le?vperm $out5,$out5,$out5,$inpperm
1149 stvx_u $out4,$x00,$out
1150 le?vperm $out6,$out6,$out6,$inpperm
1151 stvx_u $out5,$x10,$out
1152 le?vperm $out7,$out7,$out7,$inpperm
1153 stvx_u $out6,$x20,$out
1154 stvx_u $out7,$x30,$out
1160 vncipherlast $out5,$out5,$ivec
1161 vncipherlast $out6,$out6,$in5
1162 vncipherlast $out7,$out7,$in6
1165 le?vperm $out5,$out5,$out5,$inpperm
1166 le?vperm $out6,$out6,$out6,$inpperm
1167 stvx_u $out5,$x00,$out
1168 le?vperm $out7,$out7,$out7,$inpperm
1169 stvx_u $out6,$x10,$out
1170 stvx_u $out7,$x20,$out
1176 vncipherlast $out6,$out6,$ivec
1177 vncipherlast $out7,$out7,$in6
1180 le?vperm $out6,$out6,$out6,$inpperm
1181 le?vperm $out7,$out7,$out7,$inpperm
1182 stvx_u $out6,$x00,$out
1183 stvx_u $out7,$x10,$out
1189 vncipherlast $out7,$out7,$ivec
1192 le?vperm $out7,$out7,$out7,$inpperm
1197 le?vperm $ivec,$ivec,$ivec,$inpperm
1198 stvx_u $ivec,0,$ivp # write [unaligned] iv
1202 stvx $inpperm,r10,$sp # wipe copies of round keys
1204 stvx $inpperm,r11,$sp
1206 stvx $inpperm,r10,$sp
1208 stvx $inpperm,r11,$sp
1210 stvx $inpperm,r10,$sp
1212 stvx $inpperm,r11,$sp
1214 stvx $inpperm,r10,$sp
1216 stvx $inpperm,r11,$sp
1220 lvx v20,r10,$sp # ABI says so
1242 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1243 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1244 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1245 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1246 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1247 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1248 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1251 .byte 0,12,0x04,0,0x80,6,6,0
1253 .size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1257 #########################################################################
1258 {{{ # CTR procedure[s] #
1259 my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1260 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
1261 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1266 .globl .${prefix}_ctr32_encrypt_blocks
1268 .${prefix}_ctr32_encrypt_blocks:
1277 vxor $rndkey0,$rndkey0,$rndkey0
1278 le?vspltisb $tmp,0x0f
1280 lvx $ivec,0,$ivp # load [unaligned] iv
1281 lvsl $inpperm,0,$ivp
1282 lvx $inptail,$idx,$ivp
1284 le?vxor $inpperm,$inpperm,$tmp
1285 vperm $ivec,$ivec,$inptail,$inpperm
1286 vsldoi $one,$rndkey0,$one,1
1289 ?lvsl $keyperm,0,$key # prepare for unaligned key
1290 lwz $rounds,240($key)
1292 lvsr $inpperm,0,r11 # prepare for unaligned load
1294 addi $inp,$inp,15 # 15 is not typo
1295 le?vxor $inpperm,$inpperm,$tmp
1297 srwi $rounds,$rounds,1
1299 subi $rounds,$rounds,1
1302 bge _aesp8_ctr32_encrypt8x
1304 ?lvsr $outperm,0,$out # prepare for unaligned store
1305 vspltisb $outmask,-1
1307 ?vperm $outmask,$rndkey0,$outmask,$outperm
1308 le?vxor $outperm,$outperm,$tmp
1312 lvx $rndkey1,$idx,$key
1314 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1315 vxor $inout,$ivec,$rndkey0
1316 lvx $rndkey0,$idx,$key
1322 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1323 vcipher $inout,$inout,$rndkey1
1324 lvx $rndkey1,$idx,$key
1326 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1327 vcipher $inout,$inout,$rndkey0
1328 lvx $rndkey0,$idx,$key
1332 vadduwm $ivec,$ivec,$one
1336 subic. $len,$len,1 # blocks--
1338 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1339 vcipher $inout,$inout,$rndkey1
1340 lvx $rndkey1,$idx,$key
1341 vperm $dat,$dat,$inptail,$inpperm
1343 ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
1345 vxor $dat,$dat,$rndkey1 # last round key
1346 vcipherlast $inout,$inout,$dat
1348 lvx $rndkey1,$idx,$key
1350 vperm $inout,$inout,$inout,$outperm
1351 vsel $dat,$outhead,$inout,$outmask
1353 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1355 vxor $inout,$ivec,$rndkey0
1356 lvx $rndkey0,$idx,$key
1363 lvx $inout,0,$out # redundant in aligned case
1364 vsel $inout,$outhead,$inout,$outmask
1370 .byte 0,12,0x14,0,0,0,6,0
1373 #########################################################################
1374 {{ # Optimized CTR procedure #
1376 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1377 $x00=0 if ($flavour =~ /osx/);
1378 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1379 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1380 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
1381 # v26-v31 last 6 round keys
1382 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1383 my ($two,$three,$four)=($outhead,$outperm,$outmask);
1387 _aesp8_ctr32_encrypt8x:
1388 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1389 li r10,`$FRAME+8*16+15`
1390 li r11,`$FRAME+8*16+31`
1391 stvx v20,r10,$sp # ABI says so
1414 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
1416 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1418 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1420 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1422 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1424 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1426 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1430 subi $rounds,$rounds,3 # -4 in total
1432 lvx $rndkey0,$x00,$key # load key schedule
1436 ?vperm $rndkey0,$rndkey0,v30,$keyperm
1437 addi $key_,$sp,$FRAME+15
1441 ?vperm v24,v30,v31,$keyperm
1444 stvx v24,$x00,$key_ # off-load round[1]
1445 ?vperm v25,v31,v30,$keyperm
1447 stvx v25,$x10,$key_ # off-load round[2]
1448 addi $key_,$key_,0x20
1449 bdnz Load_ctr32_enc_key
1452 ?vperm v24,v30,v31,$keyperm
1454 stvx v24,$x00,$key_ # off-load round[3]
1455 ?vperm v25,v31,v26,$keyperm
1457 stvx v25,$x10,$key_ # off-load round[4]
1458 addi $key_,$sp,$FRAME+15 # rewind $key_
1459 ?vperm v26,v26,v27,$keyperm
1461 ?vperm v27,v27,v28,$keyperm
1463 ?vperm v28,v28,v29,$keyperm
1465 ?vperm v29,v29,v30,$keyperm
1466 lvx $out0,$x70,$key # borrow $out0
1467 ?vperm v30,v30,v31,$keyperm
1468 lvx v24,$x00,$key_ # pre-load round[1]
1469 ?vperm v31,v31,$out0,$keyperm
1470 lvx v25,$x10,$key_ # pre-load round[2]
1472 vadduwm $two,$one,$one
1473 subi $inp,$inp,15 # undo "caller"
1476 vadduwm $out1,$ivec,$one # counter values ...
1477 vadduwm $out2,$ivec,$two
1478 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1480 vadduwm $out3,$out1,$two
1481 vxor $out1,$out1,$rndkey0
1482 le?lvsl $inpperm,0,$idx
1483 vadduwm $out4,$out2,$two
1484 vxor $out2,$out2,$rndkey0
1485 le?vspltisb $tmp,0x0f
1486 vadduwm $out5,$out3,$two
1487 vxor $out3,$out3,$rndkey0
1488 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
1489 vadduwm $out6,$out4,$two
1490 vxor $out4,$out4,$rndkey0
1491 vadduwm $out7,$out5,$two
1492 vxor $out5,$out5,$rndkey0
1493 vadduwm $ivec,$out6,$two # next counter value
1494 vxor $out6,$out6,$rndkey0
1495 vxor $out7,$out7,$rndkey0
1501 vcipher $out0,$out0,v24
1502 vcipher $out1,$out1,v24
1503 vcipher $out2,$out2,v24
1504 vcipher $out3,$out3,v24
1505 vcipher $out4,$out4,v24
1506 vcipher $out5,$out5,v24
1507 vcipher $out6,$out6,v24
1508 vcipher $out7,$out7,v24
1509 Loop_ctr32_enc8x_middle:
1510 lvx v24,$x20,$key_ # round[3]
1511 addi $key_,$key_,0x20
1513 vcipher $out0,$out0,v25
1514 vcipher $out1,$out1,v25
1515 vcipher $out2,$out2,v25
1516 vcipher $out3,$out3,v25
1517 vcipher $out4,$out4,v25
1518 vcipher $out5,$out5,v25
1519 vcipher $out6,$out6,v25
1520 vcipher $out7,$out7,v25
1521 lvx v25,$x10,$key_ # round[4]
1522 bdnz Loop_ctr32_enc8x
1524 subic r11,$len,256 # $len-256, borrow $key_
1525 vcipher $out0,$out0,v24
1526 vcipher $out1,$out1,v24
1527 vcipher $out2,$out2,v24
1528 vcipher $out3,$out3,v24
1529 vcipher $out4,$out4,v24
1530 vcipher $out5,$out5,v24
1531 vcipher $out6,$out6,v24
1532 vcipher $out7,$out7,v24
1534 subfe r0,r0,r0 # borrow?-1:0
1535 vcipher $out0,$out0,v25
1536 vcipher $out1,$out1,v25
1537 vcipher $out2,$out2,v25
1538 vcipher $out3,$out3,v25
1539 vcipher $out4,$out4,v25
1540 vcipher $out5,$out5,v25
1541 vcipher $out6,$out6,v25
1542 vcipher $out7,$out7,v25
1545 addi $key_,$sp,$FRAME+15 # rewind $key_
1546 vcipher $out0,$out0,v26
1547 vcipher $out1,$out1,v26
1548 vcipher $out2,$out2,v26
1549 vcipher $out3,$out3,v26
1550 vcipher $out4,$out4,v26
1551 vcipher $out5,$out5,v26
1552 vcipher $out6,$out6,v26
1553 vcipher $out7,$out7,v26
1554 lvx v24,$x00,$key_ # re-pre-load round[1]
1556 subic $len,$len,129 # $len-=129
1557 vcipher $out0,$out0,v27
1558 addi $len,$len,1 # $len-=128 really
1559 vcipher $out1,$out1,v27
1560 vcipher $out2,$out2,v27
1561 vcipher $out3,$out3,v27
1562 vcipher $out4,$out4,v27
1563 vcipher $out5,$out5,v27
1564 vcipher $out6,$out6,v27
1565 vcipher $out7,$out7,v27
1566 lvx v25,$x10,$key_ # re-pre-load round[2]
1568 vcipher $out0,$out0,v28
1569 lvx_u $in0,$x00,$inp # load input
1570 vcipher $out1,$out1,v28
1571 lvx_u $in1,$x10,$inp
1572 vcipher $out2,$out2,v28
1573 lvx_u $in2,$x20,$inp
1574 vcipher $out3,$out3,v28
1575 lvx_u $in3,$x30,$inp
1576 vcipher $out4,$out4,v28
1577 lvx_u $in4,$x40,$inp
1578 vcipher $out5,$out5,v28
1579 lvx_u $in5,$x50,$inp
1580 vcipher $out6,$out6,v28
1581 lvx_u $in6,$x60,$inp
1582 vcipher $out7,$out7,v28
1583 lvx_u $in7,$x70,$inp
1586 vcipher $out0,$out0,v29
1587 le?vperm $in0,$in0,$in0,$inpperm
1588 vcipher $out1,$out1,v29
1589 le?vperm $in1,$in1,$in1,$inpperm
1590 vcipher $out2,$out2,v29
1591 le?vperm $in2,$in2,$in2,$inpperm
1592 vcipher $out3,$out3,v29
1593 le?vperm $in3,$in3,$in3,$inpperm
1594 vcipher $out4,$out4,v29
1595 le?vperm $in4,$in4,$in4,$inpperm
1596 vcipher $out5,$out5,v29
1597 le?vperm $in5,$in5,$in5,$inpperm
1598 vcipher $out6,$out6,v29
1599 le?vperm $in6,$in6,$in6,$inpperm
1600 vcipher $out7,$out7,v29
1601 le?vperm $in7,$in7,$in7,$inpperm
1603 add $inp,$inp,r0 # $inp is adjusted in such
1604 # way that at exit from the
1605 # loop inX-in7 are loaded
1607 subfe. r0,r0,r0 # borrow?-1:0
1608 vcipher $out0,$out0,v30
1609 vxor $in0,$in0,v31 # xor with last round key
1610 vcipher $out1,$out1,v30
1612 vcipher $out2,$out2,v30
1614 vcipher $out3,$out3,v30
1616 vcipher $out4,$out4,v30
1618 vcipher $out5,$out5,v30
1620 vcipher $out6,$out6,v30
1622 vcipher $out7,$out7,v30
1625 bne Lctr32_enc8x_break # did $len-129 borrow?
1627 vcipherlast $in0,$out0,$in0
1628 vcipherlast $in1,$out1,$in1
1629 vadduwm $out1,$ivec,$one # counter values ...
1630 vcipherlast $in2,$out2,$in2
1631 vadduwm $out2,$ivec,$two
1632 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1633 vcipherlast $in3,$out3,$in3
1634 vadduwm $out3,$out1,$two
1635 vxor $out1,$out1,$rndkey0
1636 vcipherlast $in4,$out4,$in4
1637 vadduwm $out4,$out2,$two
1638 vxor $out2,$out2,$rndkey0
1639 vcipherlast $in5,$out5,$in5
1640 vadduwm $out5,$out3,$two
1641 vxor $out3,$out3,$rndkey0
1642 vcipherlast $in6,$out6,$in6
1643 vadduwm $out6,$out4,$two
1644 vxor $out4,$out4,$rndkey0
1645 vcipherlast $in7,$out7,$in7
1646 vadduwm $out7,$out5,$two
1647 vxor $out5,$out5,$rndkey0
1648 le?vperm $in0,$in0,$in0,$inpperm
1649 vadduwm $ivec,$out6,$two # next counter value
1650 vxor $out6,$out6,$rndkey0
1651 le?vperm $in1,$in1,$in1,$inpperm
1652 vxor $out7,$out7,$rndkey0
1655 vcipher $out0,$out0,v24
1656 stvx_u $in0,$x00,$out
1657 le?vperm $in2,$in2,$in2,$inpperm
1658 vcipher $out1,$out1,v24
1659 stvx_u $in1,$x10,$out
1660 le?vperm $in3,$in3,$in3,$inpperm
1661 vcipher $out2,$out2,v24
1662 stvx_u $in2,$x20,$out
1663 le?vperm $in4,$in4,$in4,$inpperm
1664 vcipher $out3,$out3,v24
1665 stvx_u $in3,$x30,$out
1666 le?vperm $in5,$in5,$in5,$inpperm
1667 vcipher $out4,$out4,v24
1668 stvx_u $in4,$x40,$out
1669 le?vperm $in6,$in6,$in6,$inpperm
1670 vcipher $out5,$out5,v24
1671 stvx_u $in5,$x50,$out
1672 le?vperm $in7,$in7,$in7,$inpperm
1673 vcipher $out6,$out6,v24
1674 stvx_u $in6,$x60,$out
1675 vcipher $out7,$out7,v24
1676 stvx_u $in7,$x70,$out
1679 b Loop_ctr32_enc8x_middle
1684 blt Lctr32_enc8x_one
1686 beq Lctr32_enc8x_two
1688 blt Lctr32_enc8x_three
1690 beq Lctr32_enc8x_four
1692 blt Lctr32_enc8x_five
1694 beq Lctr32_enc8x_six
1696 blt Lctr32_enc8x_seven
1699 vcipherlast $out0,$out0,$in0
1700 vcipherlast $out1,$out1,$in1
1701 vcipherlast $out2,$out2,$in2
1702 vcipherlast $out3,$out3,$in3
1703 vcipherlast $out4,$out4,$in4
1704 vcipherlast $out5,$out5,$in5
1705 vcipherlast $out6,$out6,$in6
1706 vcipherlast $out7,$out7,$in7
1708 le?vperm $out0,$out0,$out0,$inpperm
1709 le?vperm $out1,$out1,$out1,$inpperm
1710 stvx_u $out0,$x00,$out
1711 le?vperm $out2,$out2,$out2,$inpperm
1712 stvx_u $out1,$x10,$out
1713 le?vperm $out3,$out3,$out3,$inpperm
1714 stvx_u $out2,$x20,$out
1715 le?vperm $out4,$out4,$out4,$inpperm
1716 stvx_u $out3,$x30,$out
1717 le?vperm $out5,$out5,$out5,$inpperm
1718 stvx_u $out4,$x40,$out
1719 le?vperm $out6,$out6,$out6,$inpperm
1720 stvx_u $out5,$x50,$out
1721 le?vperm $out7,$out7,$out7,$inpperm
1722 stvx_u $out6,$x60,$out
1723 stvx_u $out7,$x70,$out
1729 vcipherlast $out0,$out0,$in1
1730 vcipherlast $out1,$out1,$in2
1731 vcipherlast $out2,$out2,$in3
1732 vcipherlast $out3,$out3,$in4
1733 vcipherlast $out4,$out4,$in5
1734 vcipherlast $out5,$out5,$in6
1735 vcipherlast $out6,$out6,$in7
1737 le?vperm $out0,$out0,$out0,$inpperm
1738 le?vperm $out1,$out1,$out1,$inpperm
1739 stvx_u $out0,$x00,$out
1740 le?vperm $out2,$out2,$out2,$inpperm
1741 stvx_u $out1,$x10,$out
1742 le?vperm $out3,$out3,$out3,$inpperm
1743 stvx_u $out2,$x20,$out
1744 le?vperm $out4,$out4,$out4,$inpperm
1745 stvx_u $out3,$x30,$out
1746 le?vperm $out5,$out5,$out5,$inpperm
1747 stvx_u $out4,$x40,$out
1748 le?vperm $out6,$out6,$out6,$inpperm
1749 stvx_u $out5,$x50,$out
1750 stvx_u $out6,$x60,$out
1756 vcipherlast $out0,$out0,$in2
1757 vcipherlast $out1,$out1,$in3
1758 vcipherlast $out2,$out2,$in4
1759 vcipherlast $out3,$out3,$in5
1760 vcipherlast $out4,$out4,$in6
1761 vcipherlast $out5,$out5,$in7
1763 le?vperm $out0,$out0,$out0,$inpperm
1764 le?vperm $out1,$out1,$out1,$inpperm
1765 stvx_u $out0,$x00,$out
1766 le?vperm $out2,$out2,$out2,$inpperm
1767 stvx_u $out1,$x10,$out
1768 le?vperm $out3,$out3,$out3,$inpperm
1769 stvx_u $out2,$x20,$out
1770 le?vperm $out4,$out4,$out4,$inpperm
1771 stvx_u $out3,$x30,$out
1772 le?vperm $out5,$out5,$out5,$inpperm
1773 stvx_u $out4,$x40,$out
1774 stvx_u $out5,$x50,$out
1780 vcipherlast $out0,$out0,$in3
1781 vcipherlast $out1,$out1,$in4
1782 vcipherlast $out2,$out2,$in5
1783 vcipherlast $out3,$out3,$in6
1784 vcipherlast $out4,$out4,$in7
1786 le?vperm $out0,$out0,$out0,$inpperm
1787 le?vperm $out1,$out1,$out1,$inpperm
1788 stvx_u $out0,$x00,$out
1789 le?vperm $out2,$out2,$out2,$inpperm
1790 stvx_u $out1,$x10,$out
1791 le?vperm $out3,$out3,$out3,$inpperm
1792 stvx_u $out2,$x20,$out
1793 le?vperm $out4,$out4,$out4,$inpperm
1794 stvx_u $out3,$x30,$out
1795 stvx_u $out4,$x40,$out
1801 vcipherlast $out0,$out0,$in4
1802 vcipherlast $out1,$out1,$in5
1803 vcipherlast $out2,$out2,$in6
1804 vcipherlast $out3,$out3,$in7
1806 le?vperm $out0,$out0,$out0,$inpperm
1807 le?vperm $out1,$out1,$out1,$inpperm
1808 stvx_u $out0,$x00,$out
1809 le?vperm $out2,$out2,$out2,$inpperm
1810 stvx_u $out1,$x10,$out
1811 le?vperm $out3,$out3,$out3,$inpperm
1812 stvx_u $out2,$x20,$out
1813 stvx_u $out3,$x30,$out
1819 vcipherlast $out0,$out0,$in5
1820 vcipherlast $out1,$out1,$in6
1821 vcipherlast $out2,$out2,$in7
1823 le?vperm $out0,$out0,$out0,$inpperm
1824 le?vperm $out1,$out1,$out1,$inpperm
1825 stvx_u $out0,$x00,$out
1826 le?vperm $out2,$out2,$out2,$inpperm
1827 stvx_u $out1,$x10,$out
1828 stvx_u $out2,$x20,$out
1834 vcipherlast $out0,$out0,$in6
1835 vcipherlast $out1,$out1,$in7
1837 le?vperm $out0,$out0,$out0,$inpperm
1838 le?vperm $out1,$out1,$out1,$inpperm
1839 stvx_u $out0,$x00,$out
1840 stvx_u $out1,$x10,$out
1846 vcipherlast $out0,$out0,$in7
1848 le?vperm $out0,$out0,$out0,$inpperm
1855 stvx $inpperm,r10,$sp # wipe copies of round keys
1857 stvx $inpperm,r11,$sp
1859 stvx $inpperm,r10,$sp
1861 stvx $inpperm,r11,$sp
1863 stvx $inpperm,r10,$sp
1865 stvx $inpperm,r11,$sp
1867 stvx $inpperm,r10,$sp
1869 stvx $inpperm,r11,$sp
1873 lvx v20,r10,$sp # ABI says so
1895 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1896 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1897 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1898 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1899 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1900 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1901 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1904 .byte 0,12,0x04,0,0x80,6,6,0
1906 .size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
1910 #########################################################################
1911 {{{ # XTS procedures #
1912 # int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, #
1913 # const AES_KEY *key1, const AES_KEY *key2, #
1914 # [const] unsigned char iv[16]); #
1915 # If $key2 is NULL, then a "tweak chaining" mode is engaged, in which #
1916 # input tweak value is assumed to be encrypted already, and last tweak #
1917 # value, one suitable for consecutive call on same chunk of data, is #
1918 # written back to original buffer. In addition, in "tweak chaining" #
1919 # mode only complete input blocks are processed. #
1921 my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
1922 my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
1923 my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
1924 my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
1925 my $taillen = $key2;
1927 ($inp,$idx) = ($idx,$inp); # reassign
1930 .globl .${prefix}_xts_encrypt
1932 .${prefix}_xts_encrypt:
1933 mr $inp,r3 # reassign
1939 mfspr r12,256 # save vrsave
1943 vspltisb $seven,0x07 # 0x070707..07
1944 le?lvsl $leperm,r11,r11
1945 le?vspltisb $tmp,0x0f
1946 le?vxor $leperm,$leperm,$seven
1949 lvx $tweak,0,$ivp # load [unaligned] iv
1950 lvsl $inpperm,0,$ivp
1951 lvx $inptail,$idx,$ivp
1952 le?vxor $inpperm,$inpperm,$tmp
1953 vperm $tweak,$tweak,$inptail,$inpperm
1956 lvsr $inpperm,0,r11 # prepare for unaligned load
1958 addi $inp,$inp,15 # 15 is not typo
1959 le?vxor $inpperm,$inpperm,$tmp
1961 ${UCMP}i $key2,0 # key2==NULL?
1962 beq Lxts_enc_no_key2
1964 ?lvsl $keyperm,0,$key2 # prepare for unaligned key
1965 lwz $rounds,240($key2)
1966 srwi $rounds,$rounds,1
1967 subi $rounds,$rounds,1
1970 lvx $rndkey0,0,$key2
1971 lvx $rndkey1,$idx,$key2
1973 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1974 vxor $tweak,$tweak,$rndkey0
1975 lvx $rndkey0,$idx,$key2
1980 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1981 vcipher $tweak,$tweak,$rndkey1
1982 lvx $rndkey1,$idx,$key2
1984 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1985 vcipher $tweak,$tweak,$rndkey0
1986 lvx $rndkey0,$idx,$key2
1990 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1991 vcipher $tweak,$tweak,$rndkey1
1992 lvx $rndkey1,$idx,$key2
1993 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1994 vcipherlast $tweak,$tweak,$rndkey0
1996 li $ivp,0 # don't chain the tweak
2001 and $len,$len,$idx # in "tweak chaining"
2002 # mode only complete
2003 # blocks are processed
2008 ?lvsl $keyperm,0,$key1 # prepare for unaligned key
2009 lwz $rounds,240($key1)
2010 srwi $rounds,$rounds,1
2011 subi $rounds,$rounds,1
2014 vslb $eighty7,$seven,$seven # 0x808080..80
2015 vor $eighty7,$eighty7,$seven # 0x878787..87
2016 vspltisb $tmp,1 # 0x010101..01
2017 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2020 bge _aesp8_xts_encrypt6x
2022 andi. $taillen,$len,15
2024 subi $taillen,$taillen,16
2029 lvx $rndkey0,0,$key1
2030 lvx $rndkey1,$idx,$key1
2032 vperm $inout,$inout,$inptail,$inpperm
2033 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2034 vxor $inout,$inout,$tweak
2035 vxor $inout,$inout,$rndkey0
2036 lvx $rndkey0,$idx,$key1
2043 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2044 vcipher $inout,$inout,$rndkey1
2045 lvx $rndkey1,$idx,$key1
2047 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2048 vcipher $inout,$inout,$rndkey0
2049 lvx $rndkey0,$idx,$key1
2053 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2054 vcipher $inout,$inout,$rndkey1
2055 lvx $rndkey1,$idx,$key1
2057 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2058 vxor $rndkey0,$rndkey0,$tweak
2059 vcipherlast $output,$inout,$rndkey0
2061 le?vperm $tmp,$output,$output,$leperm
2063 le?stvx_u $tmp,0,$out
2064 be?stvx_u $output,0,$out
2073 lvx $rndkey0,0,$key1
2074 lvx $rndkey1,$idx,$key1
2082 vsrab $tmp,$tweak,$seven # next tweak value
2083 vaddubm $tweak,$tweak,$tweak
2084 vsldoi $tmp,$tmp,$tmp,15
2085 vand $tmp,$tmp,$eighty7
2086 vxor $tweak,$tweak,$tmp
2088 vperm $inout,$inout,$inptail,$inpperm
2089 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2090 vxor $inout,$inout,$tweak
2091 vxor $output,$output,$rndkey0 # just in case $len<16
2092 vxor $inout,$inout,$rndkey0
2093 lvx $rndkey0,$idx,$key1
2100 vxor $output,$output,$tweak
2101 lvsr $inpperm,0,$len # $inpperm is no longer needed
2102 vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2104 vperm $inptail,$inptail,$tmp,$inpperm
2105 vsel $inout,$inout,$output,$inptail
2114 bdnz Loop_xts_enc_steal
2117 b Loop_xts_enc # one more time...
2123 vsrab $tmp,$tweak,$seven # next tweak value
2124 vaddubm $tweak,$tweak,$tweak
2125 vsldoi $tmp,$tmp,$tmp,15
2126 vand $tmp,$tmp,$eighty7
2127 vxor $tweak,$tweak,$tmp
2129 le?vperm $tweak,$tweak,$tweak,$leperm
2130 stvx_u $tweak,0,$ivp
2133 mtspr 256,r12 # restore vrsave
2137 .byte 0,12,0x04,0,0x80,6,6,0
2139 .size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
2141 .globl .${prefix}_xts_decrypt
2143 .${prefix}_xts_decrypt:
2144 mr $inp,r3 # reassign
2150 mfspr r12,256 # save vrsave
2159 vspltisb $seven,0x07 # 0x070707..07
2160 le?lvsl $leperm,r11,r11
2161 le?vspltisb $tmp,0x0f
2162 le?vxor $leperm,$leperm,$seven
2165 lvx $tweak,0,$ivp # load [unaligned] iv
2166 lvsl $inpperm,0,$ivp
2167 lvx $inptail,$idx,$ivp
2168 le?vxor $inpperm,$inpperm,$tmp
2169 vperm $tweak,$tweak,$inptail,$inpperm
2172 lvsr $inpperm,0,r11 # prepare for unaligned load
2174 addi $inp,$inp,15 # 15 is not typo
2175 le?vxor $inpperm,$inpperm,$tmp
2177 ${UCMP}i $key2,0 # key2==NULL?
2178 beq Lxts_dec_no_key2
2180 ?lvsl $keyperm,0,$key2 # prepare for unaligned key
2181 lwz $rounds,240($key2)
2182 srwi $rounds,$rounds,1
2183 subi $rounds,$rounds,1
2186 lvx $rndkey0,0,$key2
2187 lvx $rndkey1,$idx,$key2
2189 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2190 vxor $tweak,$tweak,$rndkey0
2191 lvx $rndkey0,$idx,$key2
2196 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2197 vcipher $tweak,$tweak,$rndkey1
2198 lvx $rndkey1,$idx,$key2
2200 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2201 vcipher $tweak,$tweak,$rndkey0
2202 lvx $rndkey0,$idx,$key2
2206 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2207 vcipher $tweak,$tweak,$rndkey1
2208 lvx $rndkey1,$idx,$key2
2209 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2210 vcipherlast $tweak,$tweak,$rndkey0
2212 li $ivp,0 # don't chain the tweak
2218 add $len,$len,$idx # in "tweak chaining"
2219 # mode only complete
2220 # blocks are processed
2225 ?lvsl $keyperm,0,$key1 # prepare for unaligned key
2226 lwz $rounds,240($key1)
2227 srwi $rounds,$rounds,1
2228 subi $rounds,$rounds,1
2231 vslb $eighty7,$seven,$seven # 0x808080..80
2232 vor $eighty7,$eighty7,$seven # 0x878787..87
2233 vspltisb $tmp,1 # 0x010101..01
2234 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2237 bge _aesp8_xts_decrypt6x
2239 lvx $rndkey0,0,$key1
2240 lvx $rndkey1,$idx,$key1
2242 vperm $inout,$inout,$inptail,$inpperm
2243 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2244 vxor $inout,$inout,$tweak
2245 vxor $inout,$inout,$rndkey0
2246 lvx $rndkey0,$idx,$key1
2256 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2257 vncipher $inout,$inout,$rndkey1
2258 lvx $rndkey1,$idx,$key1
2260 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2261 vncipher $inout,$inout,$rndkey0
2262 lvx $rndkey0,$idx,$key1
2266 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2267 vncipher $inout,$inout,$rndkey1
2268 lvx $rndkey1,$idx,$key1
2270 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2271 vxor $rndkey0,$rndkey0,$tweak
2272 vncipherlast $output,$inout,$rndkey0
2274 le?vperm $tmp,$output,$output,$leperm
2276 le?stvx_u $tmp,0,$out
2277 be?stvx_u $output,0,$out
2286 lvx $rndkey0,0,$key1
2287 lvx $rndkey1,$idx,$key1
2290 vsrab $tmp,$tweak,$seven # next tweak value
2291 vaddubm $tweak,$tweak,$tweak
2292 vsldoi $tmp,$tmp,$tmp,15
2293 vand $tmp,$tmp,$eighty7
2294 vxor $tweak,$tweak,$tmp
2296 vperm $inout,$inout,$inptail,$inpperm
2297 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2298 vxor $inout,$inout,$tweak
2299 vxor $inout,$inout,$rndkey0
2300 lvx $rndkey0,$idx,$key1
2308 vsrab $tmp,$tweak,$seven # next tweak value
2309 vaddubm $tweak1,$tweak,$tweak
2310 vsldoi $tmp,$tmp,$tmp,15
2311 vand $tmp,$tmp,$eighty7
2312 vxor $tweak1,$tweak1,$tmp
2317 vxor $inout,$inout,$tweak # :-(
2318 vxor $inout,$inout,$tweak1 # :-)
2321 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2322 vncipher $inout,$inout,$rndkey1
2323 lvx $rndkey1,$idx,$key1
2325 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2326 vncipher $inout,$inout,$rndkey0
2327 lvx $rndkey0,$idx,$key1
2329 bdnz Loop_xts_dec_short
2331 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2332 vncipher $inout,$inout,$rndkey1
2333 lvx $rndkey1,$idx,$key1
2335 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2336 vxor $rndkey0,$rndkey0,$tweak1
2337 vncipherlast $output,$inout,$rndkey0
2339 le?vperm $tmp,$output,$output,$leperm
2341 le?stvx_u $tmp,0,$out
2342 be?stvx_u $output,0,$out
2347 lvx $rndkey0,0,$key1
2348 lvx $rndkey1,$idx,$key1
2350 vperm $inout,$inout,$inptail,$inpperm
2351 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2353 lvsr $inpperm,0,$len # $inpperm is no longer needed
2354 vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2356 vperm $inptail,$inptail,$tmp,$inpperm
2357 vsel $inout,$inout,$output,$inptail
2359 vxor $rndkey0,$rndkey0,$tweak
2360 vxor $inout,$inout,$rndkey0
2361 lvx $rndkey0,$idx,$key1
2370 bdnz Loop_xts_dec_steal
2373 b Loop_xts_dec # one more time...
2379 vsrab $tmp,$tweak,$seven # next tweak value
2380 vaddubm $tweak,$tweak,$tweak
2381 vsldoi $tmp,$tmp,$tmp,15
2382 vand $tmp,$tmp,$eighty7
2383 vxor $tweak,$tweak,$tmp
2385 le?vperm $tweak,$tweak,$tweak,$leperm
2386 stvx_u $tweak,0,$ivp
2389 mtspr 256,r12 # restore vrsave
2393 .byte 0,12,0x04,0,0x80,6,6,0
2395 .size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
2397 #########################################################################
2398 {{ # Optimized XTS procedures #
2400 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
2401 $x00=0 if ($flavour =~ /osx/);
2402 my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
2403 my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
2404 my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
2405 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
2406 # v26-v31 last 6 round keys
2407 my ($keyperm)=($out0); # aliases with "caller", redundant assignment
2412 _aesp8_xts_encrypt6x:
2413 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
2415 li r7,`$FRAME+8*16+15`
2416 li r3,`$FRAME+8*16+31`
2417 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
2418 stvx v20,r7,$sp # ABI says so
2441 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
2443 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2445 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2447 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2449 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2451 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2453 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2457 subi $rounds,$rounds,3 # -4 in total
2459 lvx $rndkey0,$x00,$key1 # load key schedule
2461 addi $key1,$key1,0x20
2463 ?vperm $rndkey0,$rndkey0,v30,$keyperm
2464 addi $key_,$sp,$FRAME+15
2468 ?vperm v24,v30,v31,$keyperm
2470 addi $key1,$key1,0x20
2471 stvx v24,$x00,$key_ # off-load round[1]
2472 ?vperm v25,v31,v30,$keyperm
2474 stvx v25,$x10,$key_ # off-load round[2]
2475 addi $key_,$key_,0x20
2476 bdnz Load_xts_enc_key
2479 ?vperm v24,v30,v31,$keyperm
2481 stvx v24,$x00,$key_ # off-load round[3]
2482 ?vperm v25,v31,v26,$keyperm
2484 stvx v25,$x10,$key_ # off-load round[4]
2485 addi $key_,$sp,$FRAME+15 # rewind $key_
2486 ?vperm v26,v26,v27,$keyperm
2488 ?vperm v27,v27,v28,$keyperm
2490 ?vperm v28,v28,v29,$keyperm
2492 ?vperm v29,v29,v30,$keyperm
2493 lvx $twk5,$x70,$key1 # borrow $twk5
2494 ?vperm v30,v30,v31,$keyperm
2495 lvx v24,$x00,$key_ # pre-load round[1]
2496 ?vperm v31,v31,$twk5,$keyperm
2497 lvx v25,$x10,$key_ # pre-load round[2]
2499 vperm $in0,$inout,$inptail,$inpperm
2500 subi $inp,$inp,31 # undo "caller"
2501 vxor $twk0,$tweak,$rndkey0
2502 vsrab $tmp,$tweak,$seven # next tweak value
2503 vaddubm $tweak,$tweak,$tweak
2504 vsldoi $tmp,$tmp,$tmp,15
2505 vand $tmp,$tmp,$eighty7
2506 vxor $out0,$in0,$twk0
2507 vxor $tweak,$tweak,$tmp
2509 lvx_u $in1,$x10,$inp
2510 vxor $twk1,$tweak,$rndkey0
2511 vsrab $tmp,$tweak,$seven # next tweak value
2512 vaddubm $tweak,$tweak,$tweak
2513 vsldoi $tmp,$tmp,$tmp,15
2514 le?vperm $in1,$in1,$in1,$leperm
2515 vand $tmp,$tmp,$eighty7
2516 vxor $out1,$in1,$twk1
2517 vxor $tweak,$tweak,$tmp
2519 lvx_u $in2,$x20,$inp
2520 andi. $taillen,$len,15
2521 vxor $twk2,$tweak,$rndkey0
2522 vsrab $tmp,$tweak,$seven # next tweak value
2523 vaddubm $tweak,$tweak,$tweak
2524 vsldoi $tmp,$tmp,$tmp,15
2525 le?vperm $in2,$in2,$in2,$leperm
2526 vand $tmp,$tmp,$eighty7
2527 vxor $out2,$in2,$twk2
2528 vxor $tweak,$tweak,$tmp
2530 lvx_u $in3,$x30,$inp
2531 sub $len,$len,$taillen
2532 vxor $twk3,$tweak,$rndkey0
2533 vsrab $tmp,$tweak,$seven # next tweak value
2534 vaddubm $tweak,$tweak,$tweak
2535 vsldoi $tmp,$tmp,$tmp,15
2536 le?vperm $in3,$in3,$in3,$leperm
2537 vand $tmp,$tmp,$eighty7
2538 vxor $out3,$in3,$twk3
2539 vxor $tweak,$tweak,$tmp
2541 lvx_u $in4,$x40,$inp
2543 vxor $twk4,$tweak,$rndkey0
2544 vsrab $tmp,$tweak,$seven # next tweak value
2545 vaddubm $tweak,$tweak,$tweak
2546 vsldoi $tmp,$tmp,$tmp,15
2547 le?vperm $in4,$in4,$in4,$leperm
2548 vand $tmp,$tmp,$eighty7
2549 vxor $out4,$in4,$twk4
2550 vxor $tweak,$tweak,$tmp
2552 lvx_u $in5,$x50,$inp
2554 vxor $twk5,$tweak,$rndkey0
2555 vsrab $tmp,$tweak,$seven # next tweak value
2556 vaddubm $tweak,$tweak,$tweak
2557 vsldoi $tmp,$tmp,$tmp,15
2558 le?vperm $in5,$in5,$in5,$leperm
2559 vand $tmp,$tmp,$eighty7
2560 vxor $out5,$in5,$twk5
2561 vxor $tweak,$tweak,$tmp
2563 vxor v31,v31,$rndkey0
2569 vcipher $out0,$out0,v24
2570 vcipher $out1,$out1,v24
2571 vcipher $out2,$out2,v24
2572 vcipher $out3,$out3,v24
2573 vcipher $out4,$out4,v24
2574 vcipher $out5,$out5,v24
2575 lvx v24,$x20,$key_ # round[3]
2576 addi $key_,$key_,0x20
2578 vcipher $out0,$out0,v25
2579 vcipher $out1,$out1,v25
2580 vcipher $out2,$out2,v25
2581 vcipher $out3,$out3,v25
2582 vcipher $out4,$out4,v25
2583 vcipher $out5,$out5,v25
2584 lvx v25,$x10,$key_ # round[4]
2587 subic $len,$len,96 # $len-=96
2588 vxor $in0,$twk0,v31 # xor with last round key
2589 vcipher $out0,$out0,v24
2590 vcipher $out1,$out1,v24
2591 vsrab $tmp,$tweak,$seven # next tweak value
2592 vxor $twk0,$tweak,$rndkey0
2593 vaddubm $tweak,$tweak,$tweak
2594 vcipher $out2,$out2,v24
2595 vcipher $out3,$out3,v24
2596 vsldoi $tmp,$tmp,$tmp,15
2597 vcipher $out4,$out4,v24
2598 vcipher $out5,$out5,v24
2600 subfe. r0,r0,r0 # borrow?-1:0
2601 vand $tmp,$tmp,$eighty7
2602 vcipher $out0,$out0,v25
2603 vcipher $out1,$out1,v25
2604 vxor $tweak,$tweak,$tmp
2605 vcipher $out2,$out2,v25
2606 vcipher $out3,$out3,v25
2608 vsrab $tmp,$tweak,$seven # next tweak value
2609 vxor $twk1,$tweak,$rndkey0
2610 vcipher $out4,$out4,v25
2611 vcipher $out5,$out5,v25
2614 vaddubm $tweak,$tweak,$tweak
2615 vsldoi $tmp,$tmp,$tmp,15
2616 vcipher $out0,$out0,v26
2617 vcipher $out1,$out1,v26
2618 vand $tmp,$tmp,$eighty7
2619 vcipher $out2,$out2,v26
2620 vcipher $out3,$out3,v26
2621 vxor $tweak,$tweak,$tmp
2622 vcipher $out4,$out4,v26
2623 vcipher $out5,$out5,v26
2625 add $inp,$inp,r0 # $inp is adjusted in such
2626 # way that at exit from the
2627 # loop inX-in5 are loaded
2630 vsrab $tmp,$tweak,$seven # next tweak value
2631 vxor $twk2,$tweak,$rndkey0
2632 vaddubm $tweak,$tweak,$tweak
2633 vcipher $out0,$out0,v27
2634 vcipher $out1,$out1,v27
2635 vsldoi $tmp,$tmp,$tmp,15
2636 vcipher $out2,$out2,v27
2637 vcipher $out3,$out3,v27
2638 vand $tmp,$tmp,$eighty7
2639 vcipher $out4,$out4,v27
2640 vcipher $out5,$out5,v27
2642 addi $key_,$sp,$FRAME+15 # rewind $key_
2643 vxor $tweak,$tweak,$tmp
2644 vcipher $out0,$out0,v28
2645 vcipher $out1,$out1,v28
2647 vsrab $tmp,$tweak,$seven # next tweak value
2648 vxor $twk3,$tweak,$rndkey0
2649 vcipher $out2,$out2,v28
2650 vcipher $out3,$out3,v28
2651 vaddubm $tweak,$tweak,$tweak
2652 vsldoi $tmp,$tmp,$tmp,15
2653 vcipher $out4,$out4,v28
2654 vcipher $out5,$out5,v28
2655 lvx v24,$x00,$key_ # re-pre-load round[1]
2656 vand $tmp,$tmp,$eighty7
2658 vcipher $out0,$out0,v29
2659 vcipher $out1,$out1,v29
2660 vxor $tweak,$tweak,$tmp
2661 vcipher $out2,$out2,v29
2662 vcipher $out3,$out3,v29
2664 vsrab $tmp,$tweak,$seven # next tweak value
2665 vxor $twk4,$tweak,$rndkey0
2666 vcipher $out4,$out4,v29
2667 vcipher $out5,$out5,v29
2668 lvx v25,$x10,$key_ # re-pre-load round[2]
2669 vaddubm $tweak,$tweak,$tweak
2670 vsldoi $tmp,$tmp,$tmp,15
2672 vcipher $out0,$out0,v30
2673 vcipher $out1,$out1,v30
2674 vand $tmp,$tmp,$eighty7
2675 vcipher $out2,$out2,v30
2676 vcipher $out3,$out3,v30
2677 vxor $tweak,$tweak,$tmp
2678 vcipher $out4,$out4,v30
2679 vcipher $out5,$out5,v30
2681 vsrab $tmp,$tweak,$seven # next tweak value
2682 vxor $twk5,$tweak,$rndkey0
2684 vcipherlast $out0,$out0,$in0
2685 lvx_u $in0,$x00,$inp # load next input block
2686 vaddubm $tweak,$tweak,$tweak
2687 vsldoi $tmp,$tmp,$tmp,15
2688 vcipherlast $out1,$out1,$in1
2689 lvx_u $in1,$x10,$inp
2690 vcipherlast $out2,$out2,$in2
2691 le?vperm $in0,$in0,$in0,$leperm
2692 lvx_u $in2,$x20,$inp
2693 vand $tmp,$tmp,$eighty7
2694 vcipherlast $out3,$out3,$in3
2695 le?vperm $in1,$in1,$in1,$leperm
2696 lvx_u $in3,$x30,$inp
2697 vcipherlast $out4,$out4,$in4
2698 le?vperm $in2,$in2,$in2,$leperm
2699 lvx_u $in4,$x40,$inp
2700 vxor $tweak,$tweak,$tmp
2701 vcipherlast $tmp,$out5,$in5 # last block might be needed
2703 le?vperm $in3,$in3,$in3,$leperm
2704 lvx_u $in5,$x50,$inp
2706 le?vperm $in4,$in4,$in4,$leperm
2707 le?vperm $in5,$in5,$in5,$leperm
2709 le?vperm $out0,$out0,$out0,$leperm
2710 le?vperm $out1,$out1,$out1,$leperm
2711 stvx_u $out0,$x00,$out # store output
2712 vxor $out0,$in0,$twk0
2713 le?vperm $out2,$out2,$out2,$leperm
2714 stvx_u $out1,$x10,$out
2715 vxor $out1,$in1,$twk1
2716 le?vperm $out3,$out3,$out3,$leperm
2717 stvx_u $out2,$x20,$out
2718 vxor $out2,$in2,$twk2
2719 le?vperm $out4,$out4,$out4,$leperm
2720 stvx_u $out3,$x30,$out
2721 vxor $out3,$in3,$twk3
2722 le?vperm $out5,$tmp,$tmp,$leperm
2723 stvx_u $out4,$x40,$out
2724 vxor $out4,$in4,$twk4
2725 le?stvx_u $out5,$x50,$out
2726 be?stvx_u $tmp, $x50,$out
2727 vxor $out5,$in5,$twk5
2731 beq Loop_xts_enc6x # did $len-=96 borrow?
2733 addic. $len,$len,0x60
2740 blt Lxts_enc6x_three
2745 vxor $out0,$in1,$twk0
2746 vxor $out1,$in2,$twk1
2747 vxor $out2,$in3,$twk2
2748 vxor $out3,$in4,$twk3
2749 vxor $out4,$in5,$twk4
2753 le?vperm $out0,$out0,$out0,$leperm
2754 vmr $twk0,$twk5 # unused tweak
2755 le?vperm $out1,$out1,$out1,$leperm
2756 stvx_u $out0,$x00,$out # store output
2757 le?vperm $out2,$out2,$out2,$leperm
2758 stvx_u $out1,$x10,$out
2759 le?vperm $out3,$out3,$out3,$leperm
2760 stvx_u $out2,$x20,$out
2761 vxor $tmp,$out4,$twk5 # last block prep for stealing
2762 le?vperm $out4,$out4,$out4,$leperm
2763 stvx_u $out3,$x30,$out
2764 stvx_u $out4,$x40,$out
2766 bne Lxts_enc6x_steal
2771 vxor $out0,$in2,$twk0
2772 vxor $out1,$in3,$twk1
2773 vxor $out2,$in4,$twk2
2774 vxor $out3,$in5,$twk3
2775 vxor $out4,$out4,$out4
2779 le?vperm $out0,$out0,$out0,$leperm
2780 vmr $twk0,$twk4 # unused tweak
2781 le?vperm $out1,$out1,$out1,$leperm
2782 stvx_u $out0,$x00,$out # store output
2783 le?vperm $out2,$out2,$out2,$leperm
2784 stvx_u $out1,$x10,$out
2785 vxor $tmp,$out3,$twk4 # last block prep for stealing
2786 le?vperm $out3,$out3,$out3,$leperm
2787 stvx_u $out2,$x20,$out
2788 stvx_u $out3,$x30,$out
2790 bne Lxts_enc6x_steal
2795 vxor $out0,$in3,$twk0
2796 vxor $out1,$in4,$twk1
2797 vxor $out2,$in5,$twk2
2798 vxor $out3,$out3,$out3
2799 vxor $out4,$out4,$out4
2803 le?vperm $out0,$out0,$out0,$leperm
2804 vmr $twk0,$twk3 # unused tweak
2805 le?vperm $out1,$out1,$out1,$leperm
2806 stvx_u $out0,$x00,$out # store output
2807 vxor $tmp,$out2,$twk3 # last block prep for stealing
2808 le?vperm $out2,$out2,$out2,$leperm
2809 stvx_u $out1,$x10,$out
2810 stvx_u $out2,$x20,$out
2812 bne Lxts_enc6x_steal
2817 vxor $out0,$in4,$twk0
2818 vxor $out1,$in5,$twk1
2819 vxor $out2,$out2,$out2
2820 vxor $out3,$out3,$out3
2821 vxor $out4,$out4,$out4
2825 le?vperm $out0,$out0,$out0,$leperm
2826 vmr $twk0,$twk2 # unused tweak
2827 vxor $tmp,$out1,$twk2 # last block prep for stealing
2828 le?vperm $out1,$out1,$out1,$leperm
2829 stvx_u $out0,$x00,$out # store output
2830 stvx_u $out1,$x10,$out
2832 bne Lxts_enc6x_steal
2837 vxor $out0,$in5,$twk0
2840 vcipher $out0,$out0,v24
2841 lvx v24,$x20,$key_ # round[3]
2842 addi $key_,$key_,0x20
2844 vcipher $out0,$out0,v25
2845 lvx v25,$x10,$key_ # round[4]
2848 add $inp,$inp,$taillen
2850 vcipher $out0,$out0,v24
2853 vcipher $out0,$out0,v25
2855 lvsr $inpperm,0,$taillen
2856 vcipher $out0,$out0,v26
2859 vcipher $out0,$out0,v27
2861 addi $key_,$sp,$FRAME+15 # rewind $key_
2862 vcipher $out0,$out0,v28
2863 lvx v24,$x00,$key_ # re-pre-load round[1]
2865 vcipher $out0,$out0,v29
2866 lvx v25,$x10,$key_ # re-pre-load round[2]
2867 vxor $twk0,$twk0,v31
2869 le?vperm $in0,$in0,$in0,$leperm
2870 vcipher $out0,$out0,v30
2872 vperm $in0,$in0,$in0,$inpperm
2873 vcipherlast $out0,$out0,$twk0
2875 vmr $twk0,$twk1 # unused tweak
2876 vxor $tmp,$out0,$twk1 # last block prep for stealing
2877 le?vperm $out0,$out0,$out0,$leperm
2878 stvx_u $out0,$x00,$out # store output
2880 bne Lxts_enc6x_steal
2888 add $inp,$inp,$taillen
2891 lvsr $inpperm,0,$taillen # $in5 is no more
2892 le?vperm $in0,$in0,$in0,$leperm
2893 vperm $in0,$in0,$in0,$inpperm
2894 vxor $tmp,$tmp,$twk0
2896 vxor $in0,$in0,$twk0
2897 vxor $out0,$out0,$out0
2899 vperm $out0,$out0,$out1,$inpperm
2900 vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember?
2905 Loop_xts_enc6x_steal:
2908 bdnz Loop_xts_enc6x_steal
2912 b Loop_xts_enc1x # one more time...
2919 vxor $tweak,$twk0,$rndkey0
2920 le?vperm $tweak,$tweak,$tweak,$leperm
2921 stvx_u $tweak,0,$ivp
2927 stvx $seven,r10,$sp # wipe copies of round keys
2945 lvx v20,r10,$sp # ABI says so
2967 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2968 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2969 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2970 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2971 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2972 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2973 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
2976 .byte 0,12,0x04,1,0x80,6,6,0
2981 vcipher $out0,$out0,v24
2982 vcipher $out1,$out1,v24
2983 vcipher $out2,$out2,v24
2984 vcipher $out3,$out3,v24
2985 vcipher $out4,$out4,v24
2986 lvx v24,$x20,$key_ # round[3]
2987 addi $key_,$key_,0x20
2989 vcipher $out0,$out0,v25
2990 vcipher $out1,$out1,v25
2991 vcipher $out2,$out2,v25
2992 vcipher $out3,$out3,v25
2993 vcipher $out4,$out4,v25
2994 lvx v25,$x10,$key_ # round[4]
2995 bdnz _aesp8_xts_enc5x
2997 add $inp,$inp,$taillen
2999 vcipher $out0,$out0,v24
3000 vcipher $out1,$out1,v24
3001 vcipher $out2,$out2,v24
3002 vcipher $out3,$out3,v24
3003 vcipher $out4,$out4,v24
3006 vcipher $out0,$out0,v25
3007 vcipher $out1,$out1,v25
3008 vcipher $out2,$out2,v25
3009 vcipher $out3,$out3,v25
3010 vcipher $out4,$out4,v25
3011 vxor $twk0,$twk0,v31
3013 vcipher $out0,$out0,v26
3014 lvsr $inpperm,r0,$taillen # $in5 is no more
3015 vcipher $out1,$out1,v26
3016 vcipher $out2,$out2,v26
3017 vcipher $out3,$out3,v26
3018 vcipher $out4,$out4,v26
3021 vcipher $out0,$out0,v27
3023 vcipher $out1,$out1,v27
3024 vcipher $out2,$out2,v27
3025 vcipher $out3,$out3,v27
3026 vcipher $out4,$out4,v27
3029 addi $key_,$sp,$FRAME+15 # rewind $key_
3030 vcipher $out0,$out0,v28
3031 vcipher $out1,$out1,v28
3032 vcipher $out2,$out2,v28
3033 vcipher $out3,$out3,v28
3034 vcipher $out4,$out4,v28
3035 lvx v24,$x00,$key_ # re-pre-load round[1]
3038 vcipher $out0,$out0,v29
3039 le?vperm $in0,$in0,$in0,$leperm
3040 vcipher $out1,$out1,v29
3041 vcipher $out2,$out2,v29
3042 vcipher $out3,$out3,v29
3043 vcipher $out4,$out4,v29
3044 lvx v25,$x10,$key_ # re-pre-load round[2]
3047 vcipher $out0,$out0,v30
3048 vperm $in0,$in0,$in0,$inpperm
3049 vcipher $out1,$out1,v30
3050 vcipher $out2,$out2,v30
3051 vcipher $out3,$out3,v30
3052 vcipher $out4,$out4,v30
3054 vcipherlast $out0,$out0,$twk0
3055 vcipherlast $out1,$out1,$in1
3056 vcipherlast $out2,$out2,$in2
3057 vcipherlast $out3,$out3,$in3
3058 vcipherlast $out4,$out4,$in4
3061 .byte 0,12,0x14,0,0,0,0,0
3064 _aesp8_xts_decrypt6x:
3065 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
3067 li r7,`$FRAME+8*16+15`
3068 li r3,`$FRAME+8*16+31`
3069 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
3070 stvx v20,r7,$sp # ABI says so
3093 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
3095 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3097 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3099 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3101 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3103 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3105 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3109 subi $rounds,$rounds,3 # -4 in total
3111 lvx $rndkey0,$x00,$key1 # load key schedule
3113 addi $key1,$key1,0x20
3115 ?vperm $rndkey0,$rndkey0,v30,$keyperm