2 # Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the Apache License 2.0 (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
17 # This module implements support for AES instructions as per PowerISA
18 # specification version 2.07, first implemented by POWER8 processor.
19 # The module is endian-agnostic in sense that it supports both big-
20 # and little-endian cases. Data alignment in parallelizable modes is
21 # handled with VSX loads and stores, which implies MSR.VSX flag being
22 # set. It should also be noted that ISA specification doesn't prohibit
23 # alignment exceptions for these instructions on page boundaries.
24 # Initially alignment was handled in pure AltiVec/VMX way [when data
25 # is aligned programmatically, which in turn guarantees exception-
26 # free execution], but it turned to hamper performance when vcipher
27 # instructions are interleaved. It's reckoned that eventual
28 # misalignment penalties at page boundaries are in average lower
29 # than additional overhead in pure AltiVec approach.
33 # Add XTS subroutine, 9x on little- and 12x improvement on big-endian
34 # systems were measured.
36 ######################################################################
37 # Current large-block performance in cycles per byte processed with
38 # 128-bit key (less is better).
40 # CBC en-/decrypt CTR XTS
41 # POWER8[le] 3.96/0.72 0.74 1.1
42 # POWER8[be] 3.75/0.65 0.66 1.0
43 # POWER9[le] 4.02/0.86 0.84 1.05
44 # POWER9[be] 3.99/0.78 0.79 0.97
46 # $output is the last argument if it looks like a file (it has an extension)
47 # $flavour is the first argument if it doesn't look like a file
48 $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
49 $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
51 if ($flavour =~ /64/) {
59 } elsif ($flavour =~ /32/) {
67 } else { die "nonsense $flavour"; }
69 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
71 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
72 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
73 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
74 die "can't locate ppc-xlate.pl";
76 open STDOUT,"| $^X $xlate $flavour \"$output\""
77 or die "can't call $xlate: $!";
85 #########################################################################
86 {{{ # Key setup procedures #
87 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
88 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
89 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
98 .long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
99 .long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
100 .long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
102 .long 0x0f102132, 0x43546576, 0x8798a9ba, 0xcbdcedfe
106 mflr $ptr #vvvvv "distance between . and rcon
111 .byte 0,12,0x14,0,0,0,0,0
112 .asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
114 .globl .${prefix}_set_encrypt_key
116 .${prefix}_set_encrypt_key:
119 $PUSH r11,$LRSAVE($sp)
123 beq- Lenc_key_abort # if ($inp==0) return -1;
125 beq- Lenc_key_abort # if ($out==0) return -1;
143 addi $inp,$inp,15 # 15 is not typo
144 lvsr $key,0,r9 # borrow $key
148 le?vspltisb $mask,0x0f # borrow $mask
150 le?vxor $key,$key,$mask # adjust for byte swap
153 vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
155 vxor $zero,$zero,$zero
158 ?lvsr $outperm,0,$out
161 ?vperm $outmask,$zero,$outmask,$outperm
171 vperm $key,$in0,$in0,$mask # rotate-n-splat
172 vsldoi $tmp,$zero,$in0,12 # >>32
173 vperm $outtail,$in0,$in0,$outperm # rotate
174 vsel $stage,$outhead,$outtail,$outmask
175 vmr $outhead,$outtail
176 vcipherlast $key,$key,$rcon
181 vsldoi $tmp,$zero,$tmp,12 # >>32
183 vsldoi $tmp,$zero,$tmp,12 # >>32
185 vadduwm $rcon,$rcon,$rcon
189 lvx $rcon,0,$ptr # last two round keys
191 vperm $key,$in0,$in0,$mask # rotate-n-splat
192 vsldoi $tmp,$zero,$in0,12 # >>32
193 vperm $outtail,$in0,$in0,$outperm # rotate
194 vsel $stage,$outhead,$outtail,$outmask
195 vmr $outhead,$outtail
196 vcipherlast $key,$key,$rcon
201 vsldoi $tmp,$zero,$tmp,12 # >>32
203 vsldoi $tmp,$zero,$tmp,12 # >>32
205 vadduwm $rcon,$rcon,$rcon
208 vperm $key,$in0,$in0,$mask # rotate-n-splat
209 vsldoi $tmp,$zero,$in0,12 # >>32
210 vperm $outtail,$in0,$in0,$outperm # rotate
211 vsel $stage,$outhead,$outtail,$outmask
212 vmr $outhead,$outtail
213 vcipherlast $key,$key,$rcon
218 vsldoi $tmp,$zero,$tmp,12 # >>32
220 vsldoi $tmp,$zero,$tmp,12 # >>32
223 vperm $outtail,$in0,$in0,$outperm # rotate
224 vsel $stage,$outhead,$outtail,$outmask
225 vmr $outhead,$outtail
228 addi $inp,$out,15 # 15 is not typo
238 vperm $outtail,$in0,$in0,$outperm # rotate
239 vsel $stage,$outhead,$outtail,$outmask
240 vmr $outhead,$outtail
243 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
244 vspltisb $key,8 # borrow $key
246 vsububm $mask,$mask,$key # adjust the mask
249 vperm $key,$in1,$in1,$mask # roate-n-splat
250 vsldoi $tmp,$zero,$in0,12 # >>32
251 vcipherlast $key,$key,$rcon
254 vsldoi $tmp,$zero,$tmp,12 # >>32
256 vsldoi $tmp,$zero,$tmp,12 # >>32
259 vsldoi $stage,$zero,$in1,8
262 vsldoi $in1,$zero,$in1,12 # >>32
263 vadduwm $rcon,$rcon,$rcon
267 vsldoi $stage,$stage,$in0,8
269 vperm $key,$in1,$in1,$mask # rotate-n-splat
270 vsldoi $tmp,$zero,$in0,12 # >>32
271 vperm $outtail,$stage,$stage,$outperm # rotate
272 vsel $stage,$outhead,$outtail,$outmask
273 vmr $outhead,$outtail
274 vcipherlast $key,$key,$rcon
278 vsldoi $stage,$in0,$in1,8
280 vsldoi $tmp,$zero,$tmp,12 # >>32
281 vperm $outtail,$stage,$stage,$outperm # rotate
282 vsel $stage,$outhead,$outtail,$outmask
283 vmr $outhead,$outtail
285 vsldoi $tmp,$zero,$tmp,12 # >>32
292 vsldoi $in1,$zero,$in1,12 # >>32
293 vadduwm $rcon,$rcon,$rcon
297 vperm $outtail,$in0,$in0,$outperm # rotate
298 vsel $stage,$outhead,$outtail,$outmask
299 vmr $outhead,$outtail
301 addi $inp,$out,15 # 15 is not typo
314 vperm $outtail,$in0,$in0,$outperm # rotate
315 vsel $stage,$outhead,$outtail,$outmask
316 vmr $outhead,$outtail
319 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
323 vperm $key,$in1,$in1,$mask # rotate-n-splat
324 vsldoi $tmp,$zero,$in0,12 # >>32
325 vperm $outtail,$in1,$in1,$outperm # rotate
326 vsel $stage,$outhead,$outtail,$outmask
327 vmr $outhead,$outtail
328 vcipherlast $key,$key,$rcon
333 vsldoi $tmp,$zero,$tmp,12 # >>32
335 vsldoi $tmp,$zero,$tmp,12 # >>32
337 vadduwm $rcon,$rcon,$rcon
339 vperm $outtail,$in0,$in0,$outperm # rotate
340 vsel $stage,$outhead,$outtail,$outmask
341 vmr $outhead,$outtail
343 addi $inp,$out,15 # 15 is not typo
347 vspltw $key,$in0,3 # just splat
348 vsldoi $tmp,$zero,$in1,12 # >>32
352 vsldoi $tmp,$zero,$tmp,12 # >>32
354 vsldoi $tmp,$zero,$tmp,12 # >>32
362 lvx $in1,0,$inp # redundant in aligned case
363 vsel $in1,$outhead,$in1,$outmask
373 .byte 0,12,0x14,1,0,0,3,0
375 .size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
377 .globl .${prefix}_set_decrypt_key
379 .${prefix}_set_decrypt_key:
380 $STU $sp,-$FRAME($sp)
382 $PUSH r10,$FRAME+$LRSAVE($sp)
390 subi $inp,$out,240 # first round key
391 srwi $rounds,$rounds,1
392 add $out,$inp,$cnt # last round key
416 xor r3,r3,r3 # return value
421 .byte 0,12,4,1,0x80,0,3,0
423 .size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
426 #########################################################################
427 {{{ # Single block en- and decrypt procedures #
430 my $n = $dir eq "de" ? "n" : "";
431 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
434 .globl .${prefix}_${dir}crypt
436 .${prefix}_${dir}crypt:
437 lwz $rounds,240($key)
440 li $idx,15 # 15 is not typo
446 lvsl v2,0,$inp # inpperm
448 ?lvsl v3,0,r11 # outperm
451 vperm v0,v0,v1,v2 # align [and byte swap in LE]
453 ?lvsl v5,0,$key # keyperm
454 srwi $rounds,$rounds,1
457 subi $rounds,$rounds,1
458 ?vperm v1,v1,v2,v5 # align round key
480 v${n}cipherlast v0,v0,v1
484 li $idx,15 # 15 is not typo
485 ?vperm v2,v1,v2,v3 # outmask
487 lvx v1,0,$out # outhead
488 vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
498 .byte 0,12,0x14,0,0,0,3,0
500 .size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
506 #########################################################################
507 {{{ # CBC en- and decrypt procedures #
508 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
509 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
510 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
513 .globl .${prefix}_cbc_encrypt
515 .${prefix}_cbc_encrypt:
519 cmpwi $enc,0 # test direction
525 vxor $rndkey0,$rndkey0,$rndkey0
526 le?vspltisb $tmp,0x0f
528 lvx $ivec,0,$ivp # load [unaligned] iv
530 lvx $inptail,$idx,$ivp
531 le?vxor $inpperm,$inpperm,$tmp
532 vperm $ivec,$ivec,$inptail,$inpperm
535 ?lvsl $keyperm,0,$key # prepare for unaligned key
536 lwz $rounds,240($key)
538 lvsr $inpperm,0,r11 # prepare for unaligned load
540 addi $inp,$inp,15 # 15 is not typo
541 le?vxor $inpperm,$inpperm,$tmp
543 ?lvsr $outperm,0,$out # prepare for unaligned store
546 ?vperm $outmask,$rndkey0,$outmask,$outperm
547 le?vxor $outperm,$outperm,$tmp
549 srwi $rounds,$rounds,1
551 subi $rounds,$rounds,1
559 subi $len,$len,16 # len-=16
562 vperm $inout,$inout,$inptail,$inpperm
563 lvx $rndkey1,$idx,$key
565 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
566 vxor $inout,$inout,$rndkey0
567 lvx $rndkey0,$idx,$key
569 vxor $inout,$inout,$ivec
572 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
573 vcipher $inout,$inout,$rndkey1
574 lvx $rndkey1,$idx,$key
576 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
577 vcipher $inout,$inout,$rndkey0
578 lvx $rndkey0,$idx,$key
582 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
583 vcipher $inout,$inout,$rndkey1
584 lvx $rndkey1,$idx,$key
586 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
587 vcipherlast $ivec,$inout,$rndkey0
590 vperm $tmp,$ivec,$ivec,$outperm
591 vsel $inout,$outhead,$tmp,$outmask
602 bge _aesp8_cbc_decrypt8x
607 subi $len,$len,16 # len-=16
610 vperm $tmp,$tmp,$inptail,$inpperm
611 lvx $rndkey1,$idx,$key
613 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
614 vxor $inout,$tmp,$rndkey0
615 lvx $rndkey0,$idx,$key
619 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
620 vncipher $inout,$inout,$rndkey1
621 lvx $rndkey1,$idx,$key
623 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
624 vncipher $inout,$inout,$rndkey0
625 lvx $rndkey0,$idx,$key
629 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
630 vncipher $inout,$inout,$rndkey1
631 lvx $rndkey1,$idx,$key
633 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
634 vncipherlast $inout,$inout,$rndkey0
637 vxor $inout,$inout,$ivec
639 vperm $tmp,$inout,$inout,$outperm
640 vsel $inout,$outhead,$tmp,$outmask
648 lvx $inout,0,$out # redundant in aligned case
649 vsel $inout,$outhead,$inout,$outmask
652 neg $enc,$ivp # write [unaligned] iv
653 li $idx,15 # 15 is not typo
654 vxor $rndkey0,$rndkey0,$rndkey0
656 le?vspltisb $tmp,0x0f
657 ?lvsl $outperm,0,$enc
658 ?vperm $outmask,$rndkey0,$outmask,$outperm
659 le?vxor $outperm,$outperm,$tmp
661 vperm $ivec,$ivec,$ivec,$outperm
662 vsel $inout,$outhead,$ivec,$outmask
663 lvx $inptail,$idx,$ivp
665 vsel $inout,$ivec,$inptail,$outmask
666 stvx $inout,$idx,$ivp
671 .byte 0,12,0x14,0,0,0,6,0
674 #########################################################################
675 {{ # Optimized CBC decrypt procedure #
677 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
678 $x00=0 if ($flavour =~ /osx/);
679 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
680 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
681 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
682 # v26-v31 last 6 round keys
683 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
687 _aesp8_cbc_decrypt8x:
688 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
689 li r10,`$FRAME+8*16+15`
690 li r11,`$FRAME+8*16+31`
691 stvx v20,r10,$sp # ABI says so
714 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
716 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
718 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
720 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
722 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
724 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
726 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
730 subi $rounds,$rounds,3 # -4 in total
731 subi $len,$len,128 # bias
733 lvx $rndkey0,$x00,$key # load key schedule
737 ?vperm $rndkey0,$rndkey0,v30,$keyperm
738 addi $key_,$sp,$FRAME+15
742 ?vperm v24,v30,v31,$keyperm
745 stvx v24,$x00,$key_ # off-load round[1]
746 ?vperm v25,v31,v30,$keyperm
748 stvx v25,$x10,$key_ # off-load round[2]
749 addi $key_,$key_,0x20
750 bdnz Load_cbc_dec_key
753 ?vperm v24,v30,v31,$keyperm
755 stvx v24,$x00,$key_ # off-load round[3]
756 ?vperm v25,v31,v26,$keyperm
758 stvx v25,$x10,$key_ # off-load round[4]
759 addi $key_,$sp,$FRAME+15 # rewind $key_
760 ?vperm v26,v26,v27,$keyperm
762 ?vperm v27,v27,v28,$keyperm
764 ?vperm v28,v28,v29,$keyperm
766 ?vperm v29,v29,v30,$keyperm
767 lvx $out0,$x70,$key # borrow $out0
768 ?vperm v30,v30,v31,$keyperm
769 lvx v24,$x00,$key_ # pre-load round[1]
770 ?vperm v31,v31,$out0,$keyperm
771 lvx v25,$x10,$key_ # pre-load round[2]
773 #lvx $inptail,0,$inp # "caller" already did this
774 #addi $inp,$inp,15 # 15 is not typo
775 subi $inp,$inp,15 # undo "caller"
778 lvx_u $in0,$x00,$inp # load first 8 "words"
779 le?lvsl $inpperm,0,$idx
780 le?vspltisb $tmp,0x0f
782 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
784 le?vperm $in0,$in0,$in0,$inpperm
786 le?vperm $in1,$in1,$in1,$inpperm
788 le?vperm $in2,$in2,$in2,$inpperm
789 vxor $out0,$in0,$rndkey0
791 le?vperm $in3,$in3,$in3,$inpperm
792 vxor $out1,$in1,$rndkey0
794 le?vperm $in4,$in4,$in4,$inpperm
795 vxor $out2,$in2,$rndkey0
798 le?vperm $in5,$in5,$in5,$inpperm
799 vxor $out3,$in3,$rndkey0
800 le?vperm $in6,$in6,$in6,$inpperm
801 vxor $out4,$in4,$rndkey0
802 le?vperm $in7,$in7,$in7,$inpperm
803 vxor $out5,$in5,$rndkey0
804 vxor $out6,$in6,$rndkey0
805 vxor $out7,$in7,$rndkey0
811 vncipher $out0,$out0,v24
812 vncipher $out1,$out1,v24
813 vncipher $out2,$out2,v24
814 vncipher $out3,$out3,v24
815 vncipher $out4,$out4,v24
816 vncipher $out5,$out5,v24
817 vncipher $out6,$out6,v24
818 vncipher $out7,$out7,v24
819 lvx v24,$x20,$key_ # round[3]
820 addi $key_,$key_,0x20
822 vncipher $out0,$out0,v25
823 vncipher $out1,$out1,v25
824 vncipher $out2,$out2,v25
825 vncipher $out3,$out3,v25
826 vncipher $out4,$out4,v25
827 vncipher $out5,$out5,v25
828 vncipher $out6,$out6,v25
829 vncipher $out7,$out7,v25
830 lvx v25,$x10,$key_ # round[4]
833 subic $len,$len,128 # $len-=128
834 vncipher $out0,$out0,v24
835 vncipher $out1,$out1,v24
836 vncipher $out2,$out2,v24
837 vncipher $out3,$out3,v24
838 vncipher $out4,$out4,v24
839 vncipher $out5,$out5,v24
840 vncipher $out6,$out6,v24
841 vncipher $out7,$out7,v24
843 subfe. r0,r0,r0 # borrow?-1:0
844 vncipher $out0,$out0,v25
845 vncipher $out1,$out1,v25
846 vncipher $out2,$out2,v25
847 vncipher $out3,$out3,v25
848 vncipher $out4,$out4,v25
849 vncipher $out5,$out5,v25
850 vncipher $out6,$out6,v25
851 vncipher $out7,$out7,v25
854 vncipher $out0,$out0,v26
855 vncipher $out1,$out1,v26
856 vncipher $out2,$out2,v26
857 vncipher $out3,$out3,v26
858 vncipher $out4,$out4,v26
859 vncipher $out5,$out5,v26
860 vncipher $out6,$out6,v26
861 vncipher $out7,$out7,v26
863 add $inp,$inp,r0 # $inp is adjusted in such
864 # way that at exit from the
865 # loop inX-in7 are loaded
867 vncipher $out0,$out0,v27
868 vncipher $out1,$out1,v27
869 vncipher $out2,$out2,v27
870 vncipher $out3,$out3,v27
871 vncipher $out4,$out4,v27
872 vncipher $out5,$out5,v27
873 vncipher $out6,$out6,v27
874 vncipher $out7,$out7,v27
876 addi $key_,$sp,$FRAME+15 # rewind $key_
877 vncipher $out0,$out0,v28
878 vncipher $out1,$out1,v28
879 vncipher $out2,$out2,v28
880 vncipher $out3,$out3,v28
881 vncipher $out4,$out4,v28
882 vncipher $out5,$out5,v28
883 vncipher $out6,$out6,v28
884 vncipher $out7,$out7,v28
885 lvx v24,$x00,$key_ # re-pre-load round[1]
887 vncipher $out0,$out0,v29
888 vncipher $out1,$out1,v29
889 vncipher $out2,$out2,v29
890 vncipher $out3,$out3,v29
891 vncipher $out4,$out4,v29
892 vncipher $out5,$out5,v29
893 vncipher $out6,$out6,v29
894 vncipher $out7,$out7,v29
895 lvx v25,$x10,$key_ # re-pre-load round[2]
897 vncipher $out0,$out0,v30
898 vxor $ivec,$ivec,v31 # xor with last round key
899 vncipher $out1,$out1,v30
901 vncipher $out2,$out2,v30
903 vncipher $out3,$out3,v30
905 vncipher $out4,$out4,v30
907 vncipher $out5,$out5,v30
909 vncipher $out6,$out6,v30
911 vncipher $out7,$out7,v30
914 vncipherlast $out0,$out0,$ivec
915 vncipherlast $out1,$out1,$in0
916 lvx_u $in0,$x00,$inp # load next input block
917 vncipherlast $out2,$out2,$in1
919 vncipherlast $out3,$out3,$in2
920 le?vperm $in0,$in0,$in0,$inpperm
922 vncipherlast $out4,$out4,$in3
923 le?vperm $in1,$in1,$in1,$inpperm
925 vncipherlast $out5,$out5,$in4
926 le?vperm $in2,$in2,$in2,$inpperm
928 vncipherlast $out6,$out6,$in5
929 le?vperm $in3,$in3,$in3,$inpperm
931 vncipherlast $out7,$out7,$in6
932 le?vperm $in4,$in4,$in4,$inpperm
935 le?vperm $in5,$in5,$in5,$inpperm
939 le?vperm $out0,$out0,$out0,$inpperm
940 le?vperm $out1,$out1,$out1,$inpperm
941 stvx_u $out0,$x00,$out
942 le?vperm $in6,$in6,$in6,$inpperm
943 vxor $out0,$in0,$rndkey0
944 le?vperm $out2,$out2,$out2,$inpperm
945 stvx_u $out1,$x10,$out
946 le?vperm $in7,$in7,$in7,$inpperm
947 vxor $out1,$in1,$rndkey0
948 le?vperm $out3,$out3,$out3,$inpperm
949 stvx_u $out2,$x20,$out
950 vxor $out2,$in2,$rndkey0
951 le?vperm $out4,$out4,$out4,$inpperm
952 stvx_u $out3,$x30,$out
953 vxor $out3,$in3,$rndkey0
954 le?vperm $out5,$out5,$out5,$inpperm
955 stvx_u $out4,$x40,$out
956 vxor $out4,$in4,$rndkey0
957 le?vperm $out6,$out6,$out6,$inpperm
958 stvx_u $out5,$x50,$out
959 vxor $out5,$in5,$rndkey0
960 le?vperm $out7,$out7,$out7,$inpperm
961 stvx_u $out6,$x60,$out
962 vxor $out6,$in6,$rndkey0
963 stvx_u $out7,$x70,$out
965 vxor $out7,$in7,$rndkey0
968 beq Loop_cbc_dec8x # did $len-=128 borrow?
975 Loop_cbc_dec8x_tail: # up to 7 "words" tail...
976 vncipher $out1,$out1,v24
977 vncipher $out2,$out2,v24
978 vncipher $out3,$out3,v24
979 vncipher $out4,$out4,v24
980 vncipher $out5,$out5,v24
981 vncipher $out6,$out6,v24
982 vncipher $out7,$out7,v24
983 lvx v24,$x20,$key_ # round[3]
984 addi $key_,$key_,0x20
986 vncipher $out1,$out1,v25
987 vncipher $out2,$out2,v25
988 vncipher $out3,$out3,v25
989 vncipher $out4,$out4,v25
990 vncipher $out5,$out5,v25
991 vncipher $out6,$out6,v25
992 vncipher $out7,$out7,v25
993 lvx v25,$x10,$key_ # round[4]
994 bdnz Loop_cbc_dec8x_tail
996 vncipher $out1,$out1,v24
997 vncipher $out2,$out2,v24
998 vncipher $out3,$out3,v24
999 vncipher $out4,$out4,v24
1000 vncipher $out5,$out5,v24
1001 vncipher $out6,$out6,v24
1002 vncipher $out7,$out7,v24
1004 vncipher $out1,$out1,v25
1005 vncipher $out2,$out2,v25
1006 vncipher $out3,$out3,v25
1007 vncipher $out4,$out4,v25
1008 vncipher $out5,$out5,v25
1009 vncipher $out6,$out6,v25
1010 vncipher $out7,$out7,v25
1012 vncipher $out1,$out1,v26
1013 vncipher $out2,$out2,v26
1014 vncipher $out3,$out3,v26
1015 vncipher $out4,$out4,v26
1016 vncipher $out5,$out5,v26
1017 vncipher $out6,$out6,v26
1018 vncipher $out7,$out7,v26
1020 vncipher $out1,$out1,v27
1021 vncipher $out2,$out2,v27
1022 vncipher $out3,$out3,v27
1023 vncipher $out4,$out4,v27
1024 vncipher $out5,$out5,v27
1025 vncipher $out6,$out6,v27
1026 vncipher $out7,$out7,v27
1028 vncipher $out1,$out1,v28
1029 vncipher $out2,$out2,v28
1030 vncipher $out3,$out3,v28
1031 vncipher $out4,$out4,v28
1032 vncipher $out5,$out5,v28
1033 vncipher $out6,$out6,v28
1034 vncipher $out7,$out7,v28
1036 vncipher $out1,$out1,v29
1037 vncipher $out2,$out2,v29
1038 vncipher $out3,$out3,v29
1039 vncipher $out4,$out4,v29
1040 vncipher $out5,$out5,v29
1041 vncipher $out6,$out6,v29
1042 vncipher $out7,$out7,v29
1044 vncipher $out1,$out1,v30
1045 vxor $ivec,$ivec,v31 # last round key
1046 vncipher $out2,$out2,v30
1048 vncipher $out3,$out3,v30
1050 vncipher $out4,$out4,v30
1052 vncipher $out5,$out5,v30
1054 vncipher $out6,$out6,v30
1056 vncipher $out7,$out7,v30
1059 cmplwi $len,32 # switch($len)
1064 blt Lcbc_dec8x_three
1073 vncipherlast $out1,$out1,$ivec
1074 vncipherlast $out2,$out2,$in1
1075 vncipherlast $out3,$out3,$in2
1076 vncipherlast $out4,$out4,$in3
1077 vncipherlast $out5,$out5,$in4
1078 vncipherlast $out6,$out6,$in5
1079 vncipherlast $out7,$out7,$in6
1082 le?vperm $out1,$out1,$out1,$inpperm
1083 le?vperm $out2,$out2,$out2,$inpperm
1084 stvx_u $out1,$x00,$out
1085 le?vperm $out3,$out3,$out3,$inpperm
1086 stvx_u $out2,$x10,$out
1087 le?vperm $out4,$out4,$out4,$inpperm
1088 stvx_u $out3,$x20,$out
1089 le?vperm $out5,$out5,$out5,$inpperm
1090 stvx_u $out4,$x30,$out
1091 le?vperm $out6,$out6,$out6,$inpperm
1092 stvx_u $out5,$x40,$out
1093 le?vperm $out7,$out7,$out7,$inpperm
1094 stvx_u $out6,$x50,$out
1095 stvx_u $out7,$x60,$out
1101 vncipherlast $out2,$out2,$ivec
1102 vncipherlast $out3,$out3,$in2
1103 vncipherlast $out4,$out4,$in3
1104 vncipherlast $out5,$out5,$in4
1105 vncipherlast $out6,$out6,$in5
1106 vncipherlast $out7,$out7,$in6
1109 le?vperm $out2,$out2,$out2,$inpperm
1110 le?vperm $out3,$out3,$out3,$inpperm
1111 stvx_u $out2,$x00,$out
1112 le?vperm $out4,$out4,$out4,$inpperm
1113 stvx_u $out3,$x10,$out
1114 le?vperm $out5,$out5,$out5,$inpperm
1115 stvx_u $out4,$x20,$out
1116 le?vperm $out6,$out6,$out6,$inpperm
1117 stvx_u $out5,$x30,$out
1118 le?vperm $out7,$out7,$out7,$inpperm
1119 stvx_u $out6,$x40,$out
1120 stvx_u $out7,$x50,$out
1126 vncipherlast $out3,$out3,$ivec
1127 vncipherlast $out4,$out4,$in3
1128 vncipherlast $out5,$out5,$in4
1129 vncipherlast $out6,$out6,$in5
1130 vncipherlast $out7,$out7,$in6
1133 le?vperm $out3,$out3,$out3,$inpperm
1134 le?vperm $out4,$out4,$out4,$inpperm
1135 stvx_u $out3,$x00,$out
1136 le?vperm $out5,$out5,$out5,$inpperm
1137 stvx_u $out4,$x10,$out
1138 le?vperm $out6,$out6,$out6,$inpperm
1139 stvx_u $out5,$x20,$out
1140 le?vperm $out7,$out7,$out7,$inpperm
1141 stvx_u $out6,$x30,$out
1142 stvx_u $out7,$x40,$out
1148 vncipherlast $out4,$out4,$ivec
1149 vncipherlast $out5,$out5,$in4
1150 vncipherlast $out6,$out6,$in5
1151 vncipherlast $out7,$out7,$in6
1154 le?vperm $out4,$out4,$out4,$inpperm
1155 le?vperm $out5,$out5,$out5,$inpperm
1156 stvx_u $out4,$x00,$out
1157 le?vperm $out6,$out6,$out6,$inpperm
1158 stvx_u $out5,$x10,$out
1159 le?vperm $out7,$out7,$out7,$inpperm
1160 stvx_u $out6,$x20,$out
1161 stvx_u $out7,$x30,$out
1167 vncipherlast $out5,$out5,$ivec
1168 vncipherlast $out6,$out6,$in5
1169 vncipherlast $out7,$out7,$in6
1172 le?vperm $out5,$out5,$out5,$inpperm
1173 le?vperm $out6,$out6,$out6,$inpperm
1174 stvx_u $out5,$x00,$out
1175 le?vperm $out7,$out7,$out7,$inpperm
1176 stvx_u $out6,$x10,$out
1177 stvx_u $out7,$x20,$out
1183 vncipherlast $out6,$out6,$ivec
1184 vncipherlast $out7,$out7,$in6
1187 le?vperm $out6,$out6,$out6,$inpperm
1188 le?vperm $out7,$out7,$out7,$inpperm
1189 stvx_u $out6,$x00,$out
1190 stvx_u $out7,$x10,$out
1196 vncipherlast $out7,$out7,$ivec
1199 le?vperm $out7,$out7,$out7,$inpperm
1204 le?vperm $ivec,$ivec,$ivec,$inpperm
1205 stvx_u $ivec,0,$ivp # write [unaligned] iv
1209 stvx $inpperm,r10,$sp # wipe copies of round keys
1211 stvx $inpperm,r11,$sp
1213 stvx $inpperm,r10,$sp
1215 stvx $inpperm,r11,$sp
1217 stvx $inpperm,r10,$sp
1219 stvx $inpperm,r11,$sp
1221 stvx $inpperm,r10,$sp
1223 stvx $inpperm,r11,$sp
1227 lvx v20,r10,$sp # ABI says so
1249 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1250 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1251 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1252 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1253 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1254 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1255 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1258 .byte 0,12,0x04,0,0x80,6,6,0
1260 .size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1264 #########################################################################
1265 {{{ # CTR procedure[s] #
1266 my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1267 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
1268 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1273 .globl .${prefix}_ctr32_encrypt_blocks
1275 .${prefix}_ctr32_encrypt_blocks:
1284 vxor $rndkey0,$rndkey0,$rndkey0
1285 le?vspltisb $tmp,0x0f
1287 lvx $ivec,0,$ivp # load [unaligned] iv
1288 lvsl $inpperm,0,$ivp
1289 lvx $inptail,$idx,$ivp
1291 le?vxor $inpperm,$inpperm,$tmp
1292 vperm $ivec,$ivec,$inptail,$inpperm
1293 vsldoi $one,$rndkey0,$one,1
1296 ?lvsl $keyperm,0,$key # prepare for unaligned key
1297 lwz $rounds,240($key)
1299 lvsr $inpperm,0,r11 # prepare for unaligned load
1301 addi $inp,$inp,15 # 15 is not typo
1302 le?vxor $inpperm,$inpperm,$tmp
1304 srwi $rounds,$rounds,1
1306 subi $rounds,$rounds,1
1309 bge _aesp8_ctr32_encrypt8x
1311 ?lvsr $outperm,0,$out # prepare for unaligned store
1312 vspltisb $outmask,-1
1314 ?vperm $outmask,$rndkey0,$outmask,$outperm
1315 le?vxor $outperm,$outperm,$tmp
1319 lvx $rndkey1,$idx,$key
1321 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1322 vxor $inout,$ivec,$rndkey0
1323 lvx $rndkey0,$idx,$key
1329 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1330 vcipher $inout,$inout,$rndkey1
1331 lvx $rndkey1,$idx,$key
1333 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1334 vcipher $inout,$inout,$rndkey0
1335 lvx $rndkey0,$idx,$key
1339 vadduwm $ivec,$ivec,$one
1343 subic. $len,$len,1 # blocks--
1345 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1346 vcipher $inout,$inout,$rndkey1
1347 lvx $rndkey1,$idx,$key
1348 vperm $dat,$dat,$inptail,$inpperm
1350 ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
1352 vxor $dat,$dat,$rndkey1 # last round key
1353 vcipherlast $inout,$inout,$dat
1355 lvx $rndkey1,$idx,$key
1357 vperm $inout,$inout,$inout,$outperm
1358 vsel $dat,$outhead,$inout,$outmask
1360 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1362 vxor $inout,$ivec,$rndkey0
1363 lvx $rndkey0,$idx,$key
1370 lvx $inout,0,$out # redundant in aligned case
1371 vsel $inout,$outhead,$inout,$outmask
1377 .byte 0,12,0x14,0,0,0,6,0
1380 #########################################################################
1381 {{ # Optimized CTR procedure #
1383 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1384 $x00=0 if ($flavour =~ /osx/);
1385 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1386 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1387 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
1388 # v26-v31 last 6 round keys
1389 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1390 my ($two,$three,$four)=($outhead,$outperm,$outmask);
1394 _aesp8_ctr32_encrypt8x:
1395 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1396 li r10,`$FRAME+8*16+15`
1397 li r11,`$FRAME+8*16+31`
1398 stvx v20,r10,$sp # ABI says so
1421 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
1423 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1425 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1427 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1429 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1431 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1433 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1437 subi $rounds,$rounds,3 # -4 in total
1439 lvx $rndkey0,$x00,$key # load key schedule
1443 ?vperm $rndkey0,$rndkey0,v30,$keyperm
1444 addi $key_,$sp,$FRAME+15
1448 ?vperm v24,v30,v31,$keyperm
1451 stvx v24,$x00,$key_ # off-load round[1]
1452 ?vperm v25,v31,v30,$keyperm
1454 stvx v25,$x10,$key_ # off-load round[2]
1455 addi $key_,$key_,0x20
1456 bdnz Load_ctr32_enc_key
1459 ?vperm v24,v30,v31,$keyperm
1461 stvx v24,$x00,$key_ # off-load round[3]
1462 ?vperm v25,v31,v26,$keyperm
1464 stvx v25,$x10,$key_ # off-load round[4]
1465 addi $key_,$sp,$FRAME+15 # rewind $key_
1466 ?vperm v26,v26,v27,$keyperm
1468 ?vperm v27,v27,v28,$keyperm
1470 ?vperm v28,v28,v29,$keyperm
1472 ?vperm v29,v29,v30,$keyperm
1473 lvx $out0,$x70,$key # borrow $out0
1474 ?vperm v30,v30,v31,$keyperm
1475 lvx v24,$x00,$key_ # pre-load round[1]
1476 ?vperm v31,v31,$out0,$keyperm
1477 lvx v25,$x10,$key_ # pre-load round[2]
1479 vadduwm $two,$one,$one
1480 subi $inp,$inp,15 # undo "caller"
1483 vadduwm $out1,$ivec,$one # counter values ...
1484 vadduwm $out2,$ivec,$two
1485 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1487 vadduwm $out3,$out1,$two
1488 vxor $out1,$out1,$rndkey0
1489 le?lvsl $inpperm,0,$idx
1490 vadduwm $out4,$out2,$two
1491 vxor $out2,$out2,$rndkey0
1492 le?vspltisb $tmp,0x0f
1493 vadduwm $out5,$out3,$two
1494 vxor $out3,$out3,$rndkey0
1495 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
1496 vadduwm $out6,$out4,$two
1497 vxor $out4,$out4,$rndkey0
1498 vadduwm $out7,$out5,$two
1499 vxor $out5,$out5,$rndkey0
1500 vadduwm $ivec,$out6,$two # next counter value
1501 vxor $out6,$out6,$rndkey0
1502 vxor $out7,$out7,$rndkey0
1508 vcipher $out0,$out0,v24
1509 vcipher $out1,$out1,v24
1510 vcipher $out2,$out2,v24
1511 vcipher $out3,$out3,v24
1512 vcipher $out4,$out4,v24
1513 vcipher $out5,$out5,v24
1514 vcipher $out6,$out6,v24
1515 vcipher $out7,$out7,v24
1516 Loop_ctr32_enc8x_middle:
1517 lvx v24,$x20,$key_ # round[3]
1518 addi $key_,$key_,0x20
1520 vcipher $out0,$out0,v25
1521 vcipher $out1,$out1,v25
1522 vcipher $out2,$out2,v25
1523 vcipher $out3,$out3,v25
1524 vcipher $out4,$out4,v25
1525 vcipher $out5,$out5,v25
1526 vcipher $out6,$out6,v25
1527 vcipher $out7,$out7,v25
1528 lvx v25,$x10,$key_ # round[4]
1529 bdnz Loop_ctr32_enc8x
1531 subic r11,$len,256 # $len-256, borrow $key_
1532 vcipher $out0,$out0,v24
1533 vcipher $out1,$out1,v24
1534 vcipher $out2,$out2,v24
1535 vcipher $out3,$out3,v24
1536 vcipher $out4,$out4,v24
1537 vcipher $out5,$out5,v24
1538 vcipher $out6,$out6,v24
1539 vcipher $out7,$out7,v24
1541 subfe r0,r0,r0 # borrow?-1:0
1542 vcipher $out0,$out0,v25
1543 vcipher $out1,$out1,v25
1544 vcipher $out2,$out2,v25
1545 vcipher $out3,$out3,v25
1546 vcipher $out4,$out4,v25
1547 vcipher $out5,$out5,v25
1548 vcipher $out6,$out6,v25
1549 vcipher $out7,$out7,v25
1552 addi $key_,$sp,$FRAME+15 # rewind $key_
1553 vcipher $out0,$out0,v26
1554 vcipher $out1,$out1,v26
1555 vcipher $out2,$out2,v26
1556 vcipher $out3,$out3,v26
1557 vcipher $out4,$out4,v26
1558 vcipher $out5,$out5,v26
1559 vcipher $out6,$out6,v26
1560 vcipher $out7,$out7,v26
1561 lvx v24,$x00,$key_ # re-pre-load round[1]
1563 subic $len,$len,129 # $len-=129
1564 vcipher $out0,$out0,v27
1565 addi $len,$len,1 # $len-=128 really
1566 vcipher $out1,$out1,v27
1567 vcipher $out2,$out2,v27
1568 vcipher $out3,$out3,v27
1569 vcipher $out4,$out4,v27
1570 vcipher $out5,$out5,v27
1571 vcipher $out6,$out6,v27
1572 vcipher $out7,$out7,v27
1573 lvx v25,$x10,$key_ # re-pre-load round[2]
1575 vcipher $out0,$out0,v28
1576 lvx_u $in0,$x00,$inp # load input
1577 vcipher $out1,$out1,v28
1578 lvx_u $in1,$x10,$inp
1579 vcipher $out2,$out2,v28
1580 lvx_u $in2,$x20,$inp
1581 vcipher $out3,$out3,v28
1582 lvx_u $in3,$x30,$inp
1583 vcipher $out4,$out4,v28
1584 lvx_u $in4,$x40,$inp
1585 vcipher $out5,$out5,v28
1586 lvx_u $in5,$x50,$inp
1587 vcipher $out6,$out6,v28
1588 lvx_u $in6,$x60,$inp
1589 vcipher $out7,$out7,v28
1590 lvx_u $in7,$x70,$inp
1593 vcipher $out0,$out0,v29
1594 le?vperm $in0,$in0,$in0,$inpperm
1595 vcipher $out1,$out1,v29
1596 le?vperm $in1,$in1,$in1,$inpperm
1597 vcipher $out2,$out2,v29
1598 le?vperm $in2,$in2,$in2,$inpperm
1599 vcipher $out3,$out3,v29
1600 le?vperm $in3,$in3,$in3,$inpperm
1601 vcipher $out4,$out4,v29
1602 le?vperm $in4,$in4,$in4,$inpperm
1603 vcipher $out5,$out5,v29
1604 le?vperm $in5,$in5,$in5,$inpperm
1605 vcipher $out6,$out6,v29
1606 le?vperm $in6,$in6,$in6,$inpperm
1607 vcipher $out7,$out7,v29
1608 le?vperm $in7,$in7,$in7,$inpperm
1610 add $inp,$inp,r0 # $inp is adjusted in such
1611 # way that at exit from the
1612 # loop inX-in7 are loaded
1614 subfe. r0,r0,r0 # borrow?-1:0
1615 vcipher $out0,$out0,v30
1616 vxor $in0,$in0,v31 # xor with last round key
1617 vcipher $out1,$out1,v30
1619 vcipher $out2,$out2,v30
1621 vcipher $out3,$out3,v30
1623 vcipher $out4,$out4,v30
1625 vcipher $out5,$out5,v30
1627 vcipher $out6,$out6,v30
1629 vcipher $out7,$out7,v30
1632 bne Lctr32_enc8x_break # did $len-129 borrow?
1634 vcipherlast $in0,$out0,$in0
1635 vcipherlast $in1,$out1,$in1
1636 vadduwm $out1,$ivec,$one # counter values ...
1637 vcipherlast $in2,$out2,$in2
1638 vadduwm $out2,$ivec,$two
1639 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1640 vcipherlast $in3,$out3,$in3
1641 vadduwm $out3,$out1,$two
1642 vxor $out1,$out1,$rndkey0
1643 vcipherlast $in4,$out4,$in4
1644 vadduwm $out4,$out2,$two
1645 vxor $out2,$out2,$rndkey0
1646 vcipherlast $in5,$out5,$in5
1647 vadduwm $out5,$out3,$two
1648 vxor $out3,$out3,$rndkey0
1649 vcipherlast $in6,$out6,$in6
1650 vadduwm $out6,$out4,$two
1651 vxor $out4,$out4,$rndkey0
1652 vcipherlast $in7,$out7,$in7
1653 vadduwm $out7,$out5,$two
1654 vxor $out5,$out5,$rndkey0
1655 le?vperm $in0,$in0,$in0,$inpperm
1656 vadduwm $ivec,$out6,$two # next counter value
1657 vxor $out6,$out6,$rndkey0
1658 le?vperm $in1,$in1,$in1,$inpperm
1659 vxor $out7,$out7,$rndkey0
1662 vcipher $out0,$out0,v24
1663 stvx_u $in0,$x00,$out
1664 le?vperm $in2,$in2,$in2,$inpperm
1665 vcipher $out1,$out1,v24
1666 stvx_u $in1,$x10,$out
1667 le?vperm $in3,$in3,$in3,$inpperm
1668 vcipher $out2,$out2,v24
1669 stvx_u $in2,$x20,$out
1670 le?vperm $in4,$in4,$in4,$inpperm
1671 vcipher $out3,$out3,v24
1672 stvx_u $in3,$x30,$out
1673 le?vperm $in5,$in5,$in5,$inpperm
1674 vcipher $out4,$out4,v24
1675 stvx_u $in4,$x40,$out
1676 le?vperm $in6,$in6,$in6,$inpperm
1677 vcipher $out5,$out5,v24
1678 stvx_u $in5,$x50,$out
1679 le?vperm $in7,$in7,$in7,$inpperm
1680 vcipher $out6,$out6,v24
1681 stvx_u $in6,$x60,$out
1682 vcipher $out7,$out7,v24
1683 stvx_u $in7,$x70,$out
1686 b Loop_ctr32_enc8x_middle
1691 blt Lctr32_enc8x_one
1693 beq Lctr32_enc8x_two
1695 blt Lctr32_enc8x_three
1697 beq Lctr32_enc8x_four
1699 blt Lctr32_enc8x_five
1701 beq Lctr32_enc8x_six
1703 blt Lctr32_enc8x_seven
1706 vcipherlast $out0,$out0,$in0
1707 vcipherlast $out1,$out1,$in1
1708 vcipherlast $out2,$out2,$in2
1709 vcipherlast $out3,$out3,$in3
1710 vcipherlast $out4,$out4,$in4
1711 vcipherlast $out5,$out5,$in5
1712 vcipherlast $out6,$out6,$in6
1713 vcipherlast $out7,$out7,$in7
1715 le?vperm $out0,$out0,$out0,$inpperm
1716 le?vperm $out1,$out1,$out1,$inpperm
1717 stvx_u $out0,$x00,$out
1718 le?vperm $out2,$out2,$out2,$inpperm
1719 stvx_u $out1,$x10,$out
1720 le?vperm $out3,$out3,$out3,$inpperm
1721 stvx_u $out2,$x20,$out
1722 le?vperm $out4,$out4,$out4,$inpperm
1723 stvx_u $out3,$x30,$out
1724 le?vperm $out5,$out5,$out5,$inpperm
1725 stvx_u $out4,$x40,$out
1726 le?vperm $out6,$out6,$out6,$inpperm
1727 stvx_u $out5,$x50,$out
1728 le?vperm $out7,$out7,$out7,$inpperm
1729 stvx_u $out6,$x60,$out
1730 stvx_u $out7,$x70,$out
1736 vcipherlast $out0,$out0,$in1
1737 vcipherlast $out1,$out1,$in2
1738 vcipherlast $out2,$out2,$in3
1739 vcipherlast $out3,$out3,$in4
1740 vcipherlast $out4,$out4,$in5
1741 vcipherlast $out5,$out5,$in6
1742 vcipherlast $out6,$out6,$in7
1744 le?vperm $out0,$out0,$out0,$inpperm
1745 le?vperm $out1,$out1,$out1,$inpperm
1746 stvx_u $out0,$x00,$out
1747 le?vperm $out2,$out2,$out2,$inpperm
1748 stvx_u $out1,$x10,$out
1749 le?vperm $out3,$out3,$out3,$inpperm
1750 stvx_u $out2,$x20,$out
1751 le?vperm $out4,$out4,$out4,$inpperm
1752 stvx_u $out3,$x30,$out
1753 le?vperm $out5,$out5,$out5,$inpperm
1754 stvx_u $out4,$x40,$out
1755 le?vperm $out6,$out6,$out6,$inpperm
1756 stvx_u $out5,$x50,$out
1757 stvx_u $out6,$x60,$out
1763 vcipherlast $out0,$out0,$in2
1764 vcipherlast $out1,$out1,$in3
1765 vcipherlast $out2,$out2,$in4
1766 vcipherlast $out3,$out3,$in5
1767 vcipherlast $out4,$out4,$in6
1768 vcipherlast $out5,$out5,$in7
1770 le?vperm $out0,$out0,$out0,$inpperm
1771 le?vperm $out1,$out1,$out1,$inpperm
1772 stvx_u $out0,$x00,$out
1773 le?vperm $out2,$out2,$out2,$inpperm
1774 stvx_u $out1,$x10,$out
1775 le?vperm $out3,$out3,$out3,$inpperm
1776 stvx_u $out2,$x20,$out
1777 le?vperm $out4,$out4,$out4,$inpperm
1778 stvx_u $out3,$x30,$out
1779 le?vperm $out5,$out5,$out5,$inpperm
1780 stvx_u $out4,$x40,$out
1781 stvx_u $out5,$x50,$out
1787 vcipherlast $out0,$out0,$in3
1788 vcipherlast $out1,$out1,$in4
1789 vcipherlast $out2,$out2,$in5
1790 vcipherlast $out3,$out3,$in6
1791 vcipherlast $out4,$out4,$in7
1793 le?vperm $out0,$out0,$out0,$inpperm
1794 le?vperm $out1,$out1,$out1,$inpperm
1795 stvx_u $out0,$x00,$out
1796 le?vperm $out2,$out2,$out2,$inpperm
1797 stvx_u $out1,$x10,$out
1798 le?vperm $out3,$out3,$out3,$inpperm
1799 stvx_u $out2,$x20,$out
1800 le?vperm $out4,$out4,$out4,$inpperm
1801 stvx_u $out3,$x30,$out
1802 stvx_u $out4,$x40,$out
1808 vcipherlast $out0,$out0,$in4
1809 vcipherlast $out1,$out1,$in5
1810 vcipherlast $out2,$out2,$in6
1811 vcipherlast $out3,$out3,$in7
1813 le?vperm $out0,$out0,$out0,$inpperm
1814 le?vperm $out1,$out1,$out1,$inpperm
1815 stvx_u $out0,$x00,$out
1816 le?vperm $out2,$out2,$out2,$inpperm
1817 stvx_u $out1,$x10,$out
1818 le?vperm $out3,$out3,$out3,$inpperm
1819 stvx_u $out2,$x20,$out
1820 stvx_u $out3,$x30,$out
1826 vcipherlast $out0,$out0,$in5
1827 vcipherlast $out1,$out1,$in6
1828 vcipherlast $out2,$out2,$in7
1830 le?vperm $out0,$out0,$out0,$inpperm
1831 le?vperm $out1,$out1,$out1,$inpperm
1832 stvx_u $out0,$x00,$out
1833 le?vperm $out2,$out2,$out2,$inpperm
1834 stvx_u $out1,$x10,$out
1835 stvx_u $out2,$x20,$out
1841 vcipherlast $out0,$out0,$in6
1842 vcipherlast $out1,$out1,$in7
1844 le?vperm $out0,$out0,$out0,$inpperm
1845 le?vperm $out1,$out1,$out1,$inpperm
1846 stvx_u $out0,$x00,$out
1847 stvx_u $out1,$x10,$out
1853 vcipherlast $out0,$out0,$in7
1855 le?vperm $out0,$out0,$out0,$inpperm
1862 stvx $inpperm,r10,$sp # wipe copies of round keys
1864 stvx $inpperm,r11,$sp
1866 stvx $inpperm,r10,$sp
1868 stvx $inpperm,r11,$sp
1870 stvx $inpperm,r10,$sp
1872 stvx $inpperm,r11,$sp
1874 stvx $inpperm,r10,$sp
1876 stvx $inpperm,r11,$sp
1880 lvx v20,r10,$sp # ABI says so
1902 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1903 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1904 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1905 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1906 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1907 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1908 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1911 .byte 0,12,0x04,0,0x80,6,6,0
1913 .size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
1917 #########################################################################
1918 {{{ # XTS procedures #
1919 # int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, #
1920 # const AES_KEY *key1, const AES_KEY *key2, #
1921 # [const] unsigned char iv[16]); #
1922 # If $key2 is NULL, then a "tweak chaining" mode is engaged, in which #
1923 # input tweak value is assumed to be encrypted already, and last tweak #
1924 # value, one suitable for consecutive call on same chunk of data, is #
1925 # written back to original buffer. In addition, in "tweak chaining" #
1926 # mode only complete input blocks are processed. #
1928 my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
1929 my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
1930 my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
1931 my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
1932 my $taillen = $key2;
1934 ($inp,$idx) = ($idx,$inp); # reassign
1937 .globl .${prefix}_xts_encrypt
1939 .${prefix}_xts_encrypt:
1940 mr $inp,r3 # reassign
1946 mfspr r12,256 # save vrsave
1950 vspltisb $seven,0x07 # 0x070707..07
1951 le?lvsl $leperm,r11,r11
1952 le?vspltisb $tmp,0x0f
1953 le?vxor $leperm,$leperm,$seven
1956 lvx $tweak,0,$ivp # load [unaligned] iv
1957 lvsl $inpperm,0,$ivp
1958 lvx $inptail,$idx,$ivp
1959 le?vxor $inpperm,$inpperm,$tmp
1960 vperm $tweak,$tweak,$inptail,$inpperm
1963 lvsr $inpperm,0,r11 # prepare for unaligned load
1965 addi $inp,$inp,15 # 15 is not typo
1966 le?vxor $inpperm,$inpperm,$tmp
1968 ${UCMP}i $key2,0 # key2==NULL?
1969 beq Lxts_enc_no_key2
1971 ?lvsl $keyperm,0,$key2 # prepare for unaligned key
1972 lwz $rounds,240($key2)
1973 srwi $rounds,$rounds,1
1974 subi $rounds,$rounds,1
1977 lvx $rndkey0,0,$key2
1978 lvx $rndkey1,$idx,$key2
1980 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1981 vxor $tweak,$tweak,$rndkey0
1982 lvx $rndkey0,$idx,$key2
1987 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1988 vcipher $tweak,$tweak,$rndkey1
1989 lvx $rndkey1,$idx,$key2
1991 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1992 vcipher $tweak,$tweak,$rndkey0
1993 lvx $rndkey0,$idx,$key2
1997 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1998 vcipher $tweak,$tweak,$rndkey1
1999 lvx $rndkey1,$idx,$key2
2000 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2001 vcipherlast $tweak,$tweak,$rndkey0
2003 li $ivp,0 # don't chain the tweak
2008 and $len,$len,$idx # in "tweak chaining"
2009 # mode only complete
2010 # blocks are processed
2015 ?lvsl $keyperm,0,$key1 # prepare for unaligned key
2016 lwz $rounds,240($key1)
2017 srwi $rounds,$rounds,1
2018 subi $rounds,$rounds,1
2021 vslb $eighty7,$seven,$seven # 0x808080..80
2022 vor $eighty7,$eighty7,$seven # 0x878787..87
2023 vspltisb $tmp,1 # 0x010101..01
2024 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2027 bge _aesp8_xts_encrypt6x
2029 andi. $taillen,$len,15
2031 subi $taillen,$taillen,16
2036 lvx $rndkey0,0,$key1
2037 lvx $rndkey1,$idx,$key1
2039 vperm $inout,$inout,$inptail,$inpperm
2040 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2041 vxor $inout,$inout,$tweak
2042 vxor $inout,$inout,$rndkey0
2043 lvx $rndkey0,$idx,$key1
2050 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2051 vcipher $inout,$inout,$rndkey1
2052 lvx $rndkey1,$idx,$key1
2054 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2055 vcipher $inout,$inout,$rndkey0
2056 lvx $rndkey0,$idx,$key1
2060 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2061 vcipher $inout,$inout,$rndkey1
2062 lvx $rndkey1,$idx,$key1
2064 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2065 vxor $rndkey0,$rndkey0,$tweak
2066 vcipherlast $output,$inout,$rndkey0
2068 le?vperm $tmp,$output,$output,$leperm
2070 le?stvx_u $tmp,0,$out
2071 be?stvx_u $output,0,$out
2080 lvx $rndkey0,0,$key1
2081 lvx $rndkey1,$idx,$key1
2089 vsrab $tmp,$tweak,$seven # next tweak value
2090 vaddubm $tweak,$tweak,$tweak
2091 vsldoi $tmp,$tmp,$tmp,15
2092 vand $tmp,$tmp,$eighty7
2093 vxor $tweak,$tweak,$tmp
2095 vperm $inout,$inout,$inptail,$inpperm
2096 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2097 vxor $inout,$inout,$tweak
2098 vxor $output,$output,$rndkey0 # just in case $len<16
2099 vxor $inout,$inout,$rndkey0
2100 lvx $rndkey0,$idx,$key1
2107 vxor $output,$output,$tweak
2108 lvsr $inpperm,0,$len # $inpperm is no longer needed
2109 vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2111 vperm $inptail,$inptail,$tmp,$inpperm
2112 vsel $inout,$inout,$output,$inptail
2121 bdnz Loop_xts_enc_steal
2124 b Loop_xts_enc # one more time...
2130 vsrab $tmp,$tweak,$seven # next tweak value
2131 vaddubm $tweak,$tweak,$tweak
2132 vsldoi $tmp,$tmp,$tmp,15
2133 vand $tmp,$tmp,$eighty7
2134 vxor $tweak,$tweak,$tmp
2136 le?vperm $tweak,$tweak,$tweak,$leperm
2137 stvx_u $tweak,0,$ivp
2140 mtspr 256,r12 # restore vrsave
2144 .byte 0,12,0x04,0,0x80,6,6,0
2146 .size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
2148 .globl .${prefix}_xts_decrypt
2150 .${prefix}_xts_decrypt:
2151 mr $inp,r3 # reassign
2157 mfspr r12,256 # save vrsave
2166 vspltisb $seven,0x07 # 0x070707..07
2167 le?lvsl $leperm,r11,r11
2168 le?vspltisb $tmp,0x0f
2169 le?vxor $leperm,$leperm,$seven
2172 lvx $tweak,0,$ivp # load [unaligned] iv
2173 lvsl $inpperm,0,$ivp
2174 lvx $inptail,$idx,$ivp
2175 le?vxor $inpperm,$inpperm,$tmp
2176 vperm $tweak,$tweak,$inptail,$inpperm
2179 lvsr $inpperm,0,r11 # prepare for unaligned load
2181 addi $inp,$inp,15 # 15 is not typo
2182 le?vxor $inpperm,$inpperm,$tmp
2184 ${UCMP}i $key2,0 # key2==NULL?
2185 beq Lxts_dec_no_key2
2187 ?lvsl $keyperm,0,$key2 # prepare for unaligned key
2188 lwz $rounds,240($key2)
2189 srwi $rounds,$rounds,1
2190 subi $rounds,$rounds,1
2193 lvx $rndkey0,0,$key2
2194 lvx $rndkey1,$idx,$key2
2196 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2197 vxor $tweak,$tweak,$rndkey0
2198 lvx $rndkey0,$idx,$key2
2203 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2204 vcipher $tweak,$tweak,$rndkey1
2205 lvx $rndkey1,$idx,$key2
2207 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2208 vcipher $tweak,$tweak,$rndkey0
2209 lvx $rndkey0,$idx,$key2
2213 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2214 vcipher $tweak,$tweak,$rndkey1
2215 lvx $rndkey1,$idx,$key2
2216 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2217 vcipherlast $tweak,$tweak,$rndkey0
2219 li $ivp,0 # don't chain the tweak
2225 add $len,$len,$idx # in "tweak chaining"
2226 # mode only complete
2227 # blocks are processed
2232 ?lvsl $keyperm,0,$key1 # prepare for unaligned key
2233 lwz $rounds,240($key1)
2234 srwi $rounds,$rounds,1
2235 subi $rounds,$rounds,1
2238 vslb $eighty7,$seven,$seven # 0x808080..80
2239 vor $eighty7,$eighty7,$seven # 0x878787..87
2240 vspltisb $tmp,1 # 0x010101..01
2241 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2244 bge _aesp8_xts_decrypt6x
2246 lvx $rndkey0,0,$key1
2247 lvx $rndkey1,$idx,$key1
2249 vperm $inout,$inout,$inptail,$inpperm
2250 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2251 vxor $inout,$inout,$tweak
2252 vxor $inout,$inout,$rndkey0
2253 lvx $rndkey0,$idx,$key1
2263 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2264 vncipher $inout,$inout,$rndkey1
2265 lvx $rndkey1,$idx,$key1
2267 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2268 vncipher $inout,$inout,$rndkey0
2269 lvx $rndkey0,$idx,$key1
2273 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2274 vncipher $inout,$inout,$rndkey1
2275 lvx $rndkey1,$idx,$key1
2277 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2278 vxor $rndkey0,$rndkey0,$tweak
2279 vncipherlast $output,$inout,$rndkey0
2281 le?vperm $tmp,$output,$output,$leperm
2283 le?stvx_u $tmp,0,$out
2284 be?stvx_u $output,0,$out
2293 lvx $rndkey0,0,$key1
2294 lvx $rndkey1,$idx,$key1
2297 vsrab $tmp,$tweak,$seven # next tweak value
2298 vaddubm $tweak,$tweak,$tweak
2299 vsldoi $tmp,$tmp,$tmp,15
2300 vand $tmp,$tmp,$eighty7
2301 vxor $tweak,$tweak,$tmp
2303 vperm $inout,$inout,$inptail,$inpperm
2304 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2305 vxor $inout,$inout,$tweak
2306 vxor $inout,$inout,$rndkey0
2307 lvx $rndkey0,$idx,$key1
2315 vsrab $tmp,$tweak,$seven # next tweak value
2316 vaddubm $tweak1,$tweak,$tweak
2317 vsldoi $tmp,$tmp,$tmp,15
2318 vand $tmp,$tmp,$eighty7
2319 vxor $tweak1,$tweak1,$tmp
2324 vxor $inout,$inout,$tweak # :-(
2325 vxor $inout,$inout,$tweak1 # :-)
2328 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2329 vncipher $inout,$inout,$rndkey1
2330 lvx $rndkey1,$idx,$key1
2332 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2333 vncipher $inout,$inout,$rndkey0
2334 lvx $rndkey0,$idx,$key1
2336 bdnz Loop_xts_dec_short
2338 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2339 vncipher $inout,$inout,$rndkey1
2340 lvx $rndkey1,$idx,$key1
2342 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2343 vxor $rndkey0,$rndkey0,$tweak1
2344 vncipherlast $output,$inout,$rndkey0
2346 le?vperm $tmp,$output,$output,$leperm
2348 le?stvx_u $tmp,0,$out
2349 be?stvx_u $output,0,$out
2354 lvx $rndkey0,0,$key1
2355 lvx $rndkey1,$idx,$key1
2357 vperm $inout,$inout,$inptail,$inpperm
2358 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2360 lvsr $inpperm,0,$len # $inpperm is no longer needed
2361 vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2363 vperm $inptail,$inptail,$tmp,$inpperm
2364 vsel $inout,$inout,$output,$inptail
2366 vxor $rndkey0,$rndkey0,$tweak
2367 vxor $inout,$inout,$rndkey0
2368 lvx $rndkey0,$idx,$key1
2377 bdnz Loop_xts_dec_steal
2380 b Loop_xts_dec # one more time...
2386 vsrab $tmp,$tweak,$seven # next tweak value
2387 vaddubm $tweak,$tweak,$tweak
2388 vsldoi $tmp,$tmp,$tmp,15
2389 vand $tmp,$tmp,$eighty7
2390 vxor $tweak,$tweak,$tmp
2392 le?vperm $tweak,$tweak,$tweak,$leperm
2393 stvx_u $tweak,0,$ivp
2396 mtspr 256,r12 # restore vrsave
2400 .byte 0,12,0x04,0,0x80,6,6,0
2402 .size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
2404 #########################################################################
2405 {{ # Optimized XTS procedures #
2407 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
2408 $x00=0 if ($flavour =~ /osx/);
2409 my ($in0, $in1, $in2, $in3, $in4, $in5)=map("v$_",(0..5));
2410 my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
2411 my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
2412 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
2413 # v26-v31 last 6 round keys
2414 my ($keyperm)=($out0); # aliases with "caller", redundant assignment
2419 _aesp8_xts_encrypt6x:
2420 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
2422 li r7,`$FRAME+8*16+15`
2423 li r3,`$FRAME+8*16+31`
2424 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
2425 stvx v20,r7,$sp # ABI says so
2448 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
2450 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2452 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2454 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2456 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2458 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2460 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2464 # Reverse eighty7 to 0x010101..87
2465 xxlor 2, 32+$eighty7, 32+$eighty7
2466 vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87
2467 xxlor 1, 32+$eighty7, 32+$eighty7
2469 # Load XOR contents. 0xf102132435465768798a9bacbdcedfe
2472 lxvw4x 0, $x40, r6 # load XOR contents
2476 subi $rounds,$rounds,3 # -4 in total
2478 lvx $rndkey0,$x00,$key1 # load key schedule
2480 addi $key1,$key1,0x20
2482 ?vperm $rndkey0,$rndkey0,v30,$keyperm
2483 addi $key_,$sp,$FRAME+15
2487 ?vperm v24,v30,v31,$keyperm
2489 addi $key1,$key1,0x20
2490 stvx v24,$x00,$key_ # off-load round[1]
2491 ?vperm v25,v31,v30,$keyperm
2493 stvx v25,$x10,$key_ # off-load round[2]
2494 addi $key_,$key_,0x20
2495 bdnz Load_xts_enc_key
2498 ?vperm v24,v30,v31,$keyperm
2500 stvx v24,$x00,$key_ # off-load round[3]
2501 ?vperm v25,v31,v26,$keyperm
2503 stvx v25,$x10,$key_ # off-load round[4]
2504 addi $key_,$sp,$FRAME+15 # rewind $key_
2505 ?vperm v26,v26,v27,$keyperm
2507 ?vperm v27,v27,v28,$keyperm
2509 ?vperm v28,v28,v29,$keyperm
2511 ?vperm v29,v29,v30,$keyperm
2512 lvx $twk5,$x70,$key1 # borrow $twk5
2513 ?vperm v30,v30,v31,$keyperm
2514 lvx v24,$x00,$key_ # pre-load round[1]
2515 ?vperm v31,v31,$twk5,$keyperm
2516 lvx v25,$x10,$key_ # pre-load round[2]
2518 # Switch to use the following codes with 0x010101..87 to generate tweak.
2519 # eighty7 = 0x010101..87
2520 # vsrab tmp, tweak, seven # next tweak value, right shift 7 bits
2521 # vand tmp, tmp, eighty7 # last byte with carry
2522 # vaddubm tweak, tweak, tweak # left shift 1 bit (x2)
2524 # vpermxor tweak, tweak, tmp, vsx
2526 vperm $in0,$inout,$inptail,$inpperm
2527 subi $inp,$inp,31 # undo "caller"
2528 vxor $twk0,$tweak,$rndkey0
2529 vsrab $tmp,$tweak,$seven # next tweak value
2530 vaddubm $tweak,$tweak,$tweak
2531 vand $tmp,$tmp,$eighty7
2532 vxor $out0,$in0,$twk0
2534 vpermxor $tweak, $tweak, $tmp, $in1
2536 lvx_u $in1,$x10,$inp
2537 vxor $twk1,$tweak,$rndkey0
2538 vsrab $tmp,$tweak,$seven # next tweak value
2539 vaddubm $tweak,$tweak,$tweak
2540 le?vperm $in1,$in1,$in1,$leperm
2541 vand $tmp,$tmp,$eighty7
2542 vxor $out1,$in1,$twk1
2544 vpermxor $tweak, $tweak, $tmp, $in2
2546 lvx_u $in2,$x20,$inp
2547 andi. $taillen,$len,15
2548 vxor $twk2,$tweak,$rndkey0
2549 vsrab $tmp,$tweak,$seven # next tweak value
2550 vaddubm $tweak,$tweak,$tweak
2551 le?vperm $in2,$in2,$in2,$leperm
2552 vand $tmp,$tmp,$eighty7
2553 vxor $out2,$in2,$twk2
2555 vpermxor $tweak, $tweak, $tmp, $in3
2557 lvx_u $in3,$x30,$inp
2558 sub $len,$len,$taillen
2559 vxor $twk3,$tweak,$rndkey0
2560 vsrab $tmp,$tweak,$seven # next tweak value
2561 vaddubm $tweak,$tweak,$tweak
2562 le?vperm $in3,$in3,$in3,$leperm
2563 vand $tmp,$tmp,$eighty7
2564 vxor $out3,$in3,$twk3
2566 vpermxor $tweak, $tweak, $tmp, $in4
2568 lvx_u $in4,$x40,$inp
2570 vxor $twk4,$tweak,$rndkey0
2571 vsrab $tmp,$tweak,$seven # next tweak value
2572 vaddubm $tweak,$tweak,$tweak
2573 le?vperm $in4,$in4,$in4,$leperm
2574 vand $tmp,$tmp,$eighty7
2575 vxor $out4,$in4,$twk4
2577 vpermxor $tweak, $tweak, $tmp, $in5
2579 lvx_u $in5,$x50,$inp
2581 vxor $twk5,$tweak,$rndkey0
2582 vsrab $tmp,$tweak,$seven # next tweak value
2583 vaddubm $tweak,$tweak,$tweak
2584 le?vperm $in5,$in5,$in5,$leperm
2585 vand $tmp,$tmp,$eighty7
2586 vxor $out5,$in5,$twk5
2588 vpermxor $tweak, $tweak, $tmp, $in0
2590 vxor v31,v31,$rndkey0
2596 vcipher $out0,$out0,v24
2597 vcipher $out1,$out1,v24
2598 vcipher $out2,$out2,v24
2599 vcipher $out3,$out3,v24
2600 vcipher $out4,$out4,v24
2601 vcipher $out5,$out5,v24
2602 lvx v24,$x20,$key_ # round[3]
2603 addi $key_,$key_,0x20
2605 vcipher $out0,$out0,v25
2606 vcipher $out1,$out1,v25
2607 vcipher $out2,$out2,v25
2608 vcipher $out3,$out3,v25
2609 vcipher $out4,$out4,v25
2610 vcipher $out5,$out5,v25
2611 lvx v25,$x10,$key_ # round[4]
2614 xxlor 32+$eighty7, 1, 1 # 0x010101..87
2616 subic $len,$len,96 # $len-=96
2617 vxor $in0,$twk0,v31 # xor with last round key
2618 vcipher $out0,$out0,v24
2619 vcipher $out1,$out1,v24
2620 vsrab $tmp,$tweak,$seven # next tweak value
2621 vxor $twk0,$tweak,$rndkey0
2622 vaddubm $tweak,$tweak,$tweak
2623 vcipher $out2,$out2,v24
2624 vcipher $out3,$out3,v24
2625 vcipher $out4,$out4,v24
2626 vcipher $out5,$out5,v24
2628 subfe. r0,r0,r0 # borrow?-1:0
2629 vand $tmp,$tmp,$eighty7
2630 vcipher $out0,$out0,v25
2631 vcipher $out1,$out1,v25
2633 vpermxor $tweak, $tweak, $tmp, $in1
2634 vcipher $out2,$out2,v25
2635 vcipher $out3,$out3,v25
2637 vsrab $tmp,$tweak,$seven # next tweak value
2638 vxor $twk1,$tweak,$rndkey0
2639 vcipher $out4,$out4,v25
2640 vcipher $out5,$out5,v25
2643 vaddubm $tweak,$tweak,$tweak
2644 vcipher $out0,$out0,v26
2645 vcipher $out1,$out1,v26
2646 vand $tmp,$tmp,$eighty7
2647 vcipher $out2,$out2,v26
2648 vcipher $out3,$out3,v26
2650 vpermxor $tweak, $tweak, $tmp, $in2
2651 vcipher $out4,$out4,v26
2652 vcipher $out5,$out5,v26
2654 add $inp,$inp,r0 # $inp is adjusted in such
2655 # way that at exit from the
2656 # loop inX-in5 are loaded
2659 vsrab $tmp,$tweak,$seven # next tweak value
2660 vxor $twk2,$tweak,$rndkey0
2661 vaddubm $tweak,$tweak,$tweak
2662 vcipher $out0,$out0,v27
2663 vcipher $out1,$out1,v27
2664 vcipher $out2,$out2,v27
2665 vcipher $out3,$out3,v27
2666 vand $tmp,$tmp,$eighty7
2667 vcipher $out4,$out4,v27
2668 vcipher $out5,$out5,v27
2670 addi $key_,$sp,$FRAME+15 # rewind $key_
2672 vpermxor $tweak, $tweak, $tmp, $in3
2673 vcipher $out0,$out0,v28
2674 vcipher $out1,$out1,v28
2676 vsrab $tmp,$tweak,$seven # next tweak value
2677 vxor $twk3,$tweak,$rndkey0
2678 vcipher $out2,$out2,v28
2679 vcipher $out3,$out3,v28
2680 vaddubm $tweak,$tweak,$tweak
2681 vcipher $out4,$out4,v28
2682 vcipher $out5,$out5,v28
2683 lvx v24,$x00,$key_ # re-pre-load round[1]
2684 vand $tmp,$tmp,$eighty7
2686 vcipher $out0,$out0,v29
2687 vcipher $out1,$out1,v29
2689 vpermxor $tweak, $tweak, $tmp, $in4
2690 vcipher $out2,$out2,v29
2691 vcipher $out3,$out3,v29
2693 vsrab $tmp,$tweak,$seven # next tweak value
2694 vxor $twk4,$tweak,$rndkey0
2695 vcipher $out4,$out4,v29
2696 vcipher $out5,$out5,v29
2697 lvx v25,$x10,$key_ # re-pre-load round[2]
2698 vaddubm $tweak,$tweak,$tweak
2700 vcipher $out0,$out0,v30
2701 vcipher $out1,$out1,v30
2702 vand $tmp,$tmp,$eighty7
2703 vcipher $out2,$out2,v30
2704 vcipher $out3,$out3,v30
2706 vpermxor $tweak, $tweak, $tmp, $in5
2707 vcipher $out4,$out4,v30
2708 vcipher $out5,$out5,v30
2710 vsrab $tmp,$tweak,$seven # next tweak value
2711 vxor $twk5,$tweak,$rndkey0
2713 vcipherlast $out0,$out0,$in0
2714 lvx_u $in0,$x00,$inp # load next input block
2715 vaddubm $tweak,$tweak,$tweak
2716 vcipherlast $out1,$out1,$in1
2717 lvx_u $in1,$x10,$inp
2718 vcipherlast $out2,$out2,$in2
2719 le?vperm $in0,$in0,$in0,$leperm
2720 lvx_u $in2,$x20,$inp
2721 vand $tmp,$tmp,$eighty7
2722 vcipherlast $out3,$out3,$in3
2723 le?vperm $in1,$in1,$in1,$leperm
2724 lvx_u $in3,$x30,$inp
2725 vcipherlast $out4,$out4,$in4
2726 le?vperm $in2,$in2,$in2,$leperm
2727 lvx_u $in4,$x40,$inp
2728 xxlor 10, 32+$in0, 32+$in0
2730 vpermxor $tweak, $tweak, $tmp, $in0
2731 xxlor 32+$in0, 10, 10
2732 vcipherlast $tmp,$out5,$in5 # last block might be needed
2734 le?vperm $in3,$in3,$in3,$leperm
2735 lvx_u $in5,$x50,$inp
2737 le?vperm $in4,$in4,$in4,$leperm
2738 le?vperm $in5,$in5,$in5,$leperm
2740 le?vperm $out0,$out0,$out0,$leperm
2741 le?vperm $out1,$out1,$out1,$leperm
2742 stvx_u $out0,$x00,$out # store output
2743 vxor $out0,$in0,$twk0
2744 le?vperm $out2,$out2,$out2,$leperm
2745 stvx_u $out1,$x10,$out
2746 vxor $out1,$in1,$twk1
2747 le?vperm $out3,$out3,$out3,$leperm
2748 stvx_u $out2,$x20,$out
2749 vxor $out2,$in2,$twk2
2750 le?vperm $out4,$out4,$out4,$leperm
2751 stvx_u $out3,$x30,$out
2752 vxor $out3,$in3,$twk3
2753 le?vperm $out5,$tmp,$tmp,$leperm
2754 stvx_u $out4,$x40,$out
2755 vxor $out4,$in4,$twk4
2756 le?stvx_u $out5,$x50,$out
2757 be?stvx_u $tmp, $x50,$out
2758 vxor $out5,$in5,$twk5
2762 beq Loop_xts_enc6x # did $len-=96 borrow?
2764 xxlor 32+$eighty7, 2, 2 # 0x870101..01
2766 addic. $len,$len,0x60
2773 blt Lxts_enc6x_three
2778 vxor $out0,$in1,$twk0
2779 vxor $out1,$in2,$twk1
2780 vxor $out2,$in3,$twk2
2781 vxor $out3,$in4,$twk3
2782 vxor $out4,$in5,$twk4
2786 le?vperm $out0,$out0,$out0,$leperm
2787 vmr $twk0,$twk5 # unused tweak
2788 le?vperm $out1,$out1,$out1,$leperm
2789 stvx_u $out0,$x00,$out # store output
2790 le?vperm $out2,$out2,$out2,$leperm
2791 stvx_u $out1,$x10,$out
2792 le?vperm $out3,$out3,$out3,$leperm
2793 stvx_u $out2,$x20,$out
2794 vxor $tmp,$out4,$twk5 # last block prep for stealing
2795 le?vperm $out4,$out4,$out4,$leperm
2796 stvx_u $out3,$x30,$out
2797 stvx_u $out4,$x40,$out
2799 bne Lxts_enc6x_steal
2804 vxor $out0,$in2,$twk0
2805 vxor $out1,$in3,$twk1
2806 vxor $out2,$in4,$twk2
2807 vxor $out3,$in5,$twk3
2808 vxor $out4,$out4,$out4
2812 le?vperm $out0,$out0,$out0,$leperm
2813 vmr $twk0,$twk4 # unused tweak
2814 le?vperm $out1,$out1,$out1,$leperm
2815 stvx_u $out0,$x00,$out # store output
2816 le?vperm $out2,$out2,$out2,$leperm
2817 stvx_u $out1,$x10,$out
2818 vxor $tmp,$out3,$twk4 # last block prep for stealing
2819 le?vperm $out3,$out3,$out3,$leperm
2820 stvx_u $out2,$x20,$out
2821 stvx_u $out3,$x30,$out
2823 bne Lxts_enc6x_steal
2828 vxor $out0,$in3,$twk0
2829 vxor $out1,$in4,$twk1
2830 vxor $out2,$in5,$twk2
2831 vxor $out3,$out3,$out3
2832 vxor $out4,$out4,$out4
2836 le?vperm $out0,$out0,$out0,$leperm
2837 vmr $twk0,$twk3 # unused tweak
2838 le?vperm $out1,$out1,$out1,$leperm
2839 stvx_u $out0,$x00,$out # store output
2840 vxor $tmp,$out2,$twk3 # last block prep for stealing
2841 le?vperm $out2,$out2,$out2,$leperm
2842 stvx_u $out1,$x10,$out
2843 stvx_u $out2,$x20,$out
2845 bne Lxts_enc6x_steal
2850 vxor $out0,$in4,$twk0
2851 vxor $out1,$in5,$twk1
2852 vxor $out2,$out2,$out2
2853 vxor $out3,$out3,$out3
2854 vxor $out4,$out4,$out4
2858 le?vperm $out0,$out0,$out0,$leperm
2859 vmr $twk0,$twk2 # unused tweak
2860 vxor $tmp,$out1,$twk2 # last block prep for stealing
2861 le?vperm $out1,$out1,$out1,$leperm
2862 stvx_u $out0,$x00,$out # store output
2863 stvx_u $out1,$x10,$out
2865 bne Lxts_enc6x_steal
2870 vxor $out0,$in5,$twk0
2873 vcipher $out0,$out0,v24
2874 lvx v24,$x20,$key_ # round[3]
2875 addi $key_,$key_,0x20
2877 vcipher $out0,$out0,v25
2878 lvx v25,$x10,$key_ # round[4]
2881 add $inp,$inp,$taillen
2883 vcipher $out0,$out0,v24
2886 vcipher $out0,$out0,v25
2888 lvsr $inpperm,0,$taillen
2889 vcipher $out0,$out0,v26
2892 vcipher $out0,$out0,v27
2894 addi $key_,$sp,$FRAME+15 # rewind $key_
2895 vcipher $out0,$out0,v28
2896 lvx v24,$x00,$key_ # re-pre-load round[1]
2898 vcipher $out0,$out0,v29
2899 lvx v25,$x10,$key_ # re-pre-load round[2]
2900 vxor $twk0,$twk0,v31
2902 le?vperm $in0,$in0,$in0,$leperm
2903 vcipher $out0,$out0,v30
2905 vperm $in0,$in0,$in0,$inpperm
2906 vcipherlast $out0,$out0,$twk0
2908 vmr $twk0,$twk1 # unused tweak
2909 vxor $tmp,$out0,$twk1 # last block prep for stealing
2910 le?vperm $out0,$out0,$out0,$leperm
2911 stvx_u $out0,$x00,$out # store output
2913 bne Lxts_enc6x_steal
2921 add $inp,$inp,$taillen
2924 lvsr $inpperm,0,$taillen # $in5 is no more
2925 le?vperm $in0,$in0,$in0,$leperm
2926 vperm $in0,$in0,$in0,$inpperm
2927 vxor $tmp,$tmp,$twk0
2929 vxor $in0,$in0,$twk0
2930 vxor $out0,$out0,$out0
2932 vperm $out0,$out0,$out1,$inpperm
2933 vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember?
2938 Loop_xts_enc6x_steal:
2941 bdnz Loop_xts_enc6x_steal
2945 b Loop_xts_enc1x # one more time...
2952 vxor $tweak,$twk0,$rndkey0
2953 le?vperm $tweak,$tweak,$tweak,$leperm
2954 stvx_u $tweak,0,$ivp
2960 stvx $seven,r10,$sp # wipe copies of round keys
2978 lvx v20,r10,$sp # ABI says so
3000 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3001 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3002 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3003 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3004 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3005 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3006 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3009 .byte 0,12,0x04,1,0x80,6,6,0
3014 vcipher $out0,$out0,v24
3015 vcipher $out1,$out1,v24
3016 vcipher $out2,$out2,v24
3017 vcipher $out3,$out3,v24
3018 vcipher $out4,$out4,v24
3019 lvx v24,$x20,$key_ # round[3]
3020 addi $key_,$key_,0x20
3022 vcipher $out0,$out0,v25
3023 vcipher $out1,$out1,v25
3024 vcipher $out2,$out2,v25
3025 vcipher $out3,$out3,v25
3026 vcipher $out4,$out4,v25
3027 lvx v25,$x10,$key_ # round[4]
3028 bdnz _aesp8_xts_enc5x
3030 add $inp,$inp,$taillen
3032 vcipher $out0,$out0,v24
3033 vcipher $out1,$out1,v24
3034 vcipher $out2,$out2,v24
3035 vcipher $out3,$out3,v24
3036 vcipher $out4,$out4,v24
3039 vcipher $out0,$out0,v25
3040 vcipher $out1,$out1,v25
3041 vcipher $out2,$out2,v25
3042 vcipher $out3,$out3,v25
3043 vcipher $out4,$out4,v25
3044 vxor $twk0,$twk0,v31
3046 vcipher $out0,$out0,v26
3047 lvsr $inpperm,0,$taillen # $in5 is no more
3048 vcipher $out1,$out1,v26
3049 vcipher $out2,$out2,v26
3050 vcipher $out3,$out3,v26
3051 vcipher $out4,$out4,v26
3054 vcipher $out0,$out0,v27
3056 vcipher $out1,$out1,v27
3057 vcipher $out2,$out2,v27
3058 vcipher $out3,$out3,v27
3059 vcipher $out4,$out4,v27
3062 addi $key_,$sp,$FRAME+15 # rewind $key_
3063 vcipher $out0,$out0,v28
3064 vcipher $out1,$out1,v28
3065 vcipher $out2,$out2,v28
3066 vcipher $out3,$out3,v28
3067 vcipher $out4,$out4,v28
3068 lvx v24,$x00,$key_ # re-pre-load round[1]
3071 vcipher $out0,$out0,v29
3072 le?vperm $in0,$in0,$in0,$leperm
3073 vcipher $out1,$out1,v29
3074 vcipher $out2,$out2,v29
3075 vcipher $out3,$out3,v29
3076 vcipher $out4,$out4,v29
3077 lvx v25,$x10,$key_ # re-pre-load round[2]
3080 vcipher $out0,$out0,v30
3081 vperm $in0,$in0,$in0,$inpperm
3082 vcipher $out1,$out1,v30
3083 vcipher $out2,$out2,v30
3084 vcipher $out3,$out3,v30
3085 vcipher $out4,$out4,v30
3087 vcipherlast $out0,$out0,$twk0
3088 vcipherlast $out1,$out1,$in1
3089 vcipherlast $out2,$out2,$in2
3090 vcipherlast $out3,$out3,$in3
3091 vcipherlast $out4,$out4,$in4
3094 .byte 0,12,0x14,0,0,0,0,0
3097 _aesp8_xts_decrypt6x:
3098 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
3100 li r7,`$FRAME+8*16+15`
3101 li r3,`$FRAME+8*16+31`
3102 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
3103 stvx v20,r7,$sp # ABI says so
3126 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
3128 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3130 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3132 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3134 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3136 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3138 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3142 # Reverse eighty7 to 0x010101..87
3143 xxlor 2, 32+$eighty7, 32+$eighty7
3144 vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87
3145 xxlor 1, 32+$eighty7, 32+$eighty7
3147 # Load XOR contents. 0xf102132435465768798a9bacbdcedfe
3150 lxvw4x 0, $x40, r6 # load XOR contents
3154 subi $rounds,$rounds,3 # -4 in total
3156 lvx $rndkey0,$x00,$key1 # load key schedule
3158 addi $key1,$key1,0x20
3160 ?vperm $rndkey0,$rndkey0,v30,$keyperm
3161 addi $key_,$sp,$FRAME+15
3165 ?vperm v24,v30,v31,$keyperm
3167 addi $key1,$key1,0x20
3168 stvx v24,$x00,$key_ # off-load round[1]
3169 ?vperm v25,v31,v30,$keyperm
3171 stvx v25,$x10,$key_ # off-load round[2]
3172 addi $key_,$key_,0x20
3173 bdnz Load_xts_dec_key
3176 ?vperm v24,v30,v31,$keyperm
3178 stvx v24,$x00,$key_ # off-load round[3]
3179 ?vperm v25,v31,v26,$keyperm
3181 stvx v25,$x10,$key_ # off-load round[4]
3182 addi $key_,$sp,$FRAME+15 # rewind $key_
3183 ?vperm v26,v26,v27,$keyperm
3185 ?vperm v27,v27,v28,$keyperm
3187 ?vperm v28,v28,v29,$keyperm
3189 ?vperm v29,v29,v30,$keyperm
3190 lvx $twk5,$x70,$key1 # borrow $twk5
3191 ?vperm v30,v30,v31,$keyperm
3192 lvx v24,$x00,$key_ # pre-load round[1]
3193 ?vperm v31,v31,$twk5,$keyperm
3194 lvx v25,$x10,$key_ # pre-load round[2]
3196 vperm $in0,$inout,$inptail,$inpperm
3197 subi $inp,$inp,31 # undo "caller"
3198 vxor $twk0,$tweak,$rndkey0
3199 vsrab $tmp,$tweak,$seven # next tweak value
3200 vaddubm $tweak,$tweak,$tweak
3201 vand $tmp,$tmp,$eighty7
3202 vxor $out0,$in0,$twk0
3204 vpermxor $tweak, $tweak, $tmp, $in1
3206 lvx_u $in1,$x10,$inp
3207 vxor $twk1,$tweak,$rndkey0
3208 vsrab $tmp,$tweak,$seven # next tweak value
3209 vaddubm $tweak,$tweak,$tweak
3210 le?vperm $in1,$in1,$in1,$leperm
3211 vand $tmp,$tmp,$eighty7
3212 vxor $out1,$in1,$twk1
3214 vpermxor $tweak, $tweak, $tmp, $in2
3216 lvx_u $in2,$x20,$inp
3217 andi. $taillen,$len,15
3218 vxor $twk2,$tweak,$rndkey0
3219 vsrab $tmp,$tweak,$seven # next tweak value
3220 vaddubm $tweak,$tweak,$tweak
3221 le?vperm $in2,$in2,$in2,$leperm
3222 vand $tmp,$tmp,$eighty7
3223 vxor $out2,$in2,$twk2
3225 vpermxor $tweak, $tweak, $tmp, $in3
3227 lvx_u $in3,$x30,$inp
3228 sub $len,$len,$taillen
3229 vxor $twk3,$tweak,$rndkey0
3230 vsrab $tmp,$tweak,$seven # next tweak value
3231 vaddubm $tweak,$tweak,$tweak
3232 le?vperm $in3,$in3,$in3,$leperm
3233 vand $tmp,$tmp,$eighty7
3234 vxor $out3,$in3,$twk3
3236 vpermxor $tweak, $tweak, $tmp, $in4
3238 lvx_u $in4,$x40,$inp
3240 vxor $twk4,$tweak,$rndkey0
3241 vsrab $tmp,$tweak,$seven # next tweak value
3242 vaddubm $tweak,$tweak,$tweak
3243 le?vperm $in4,$in4,$in4,$leperm
3244 vand $tmp,$tmp,$eighty7
3245 vxor $out4,$in4,$twk4
3247 vpermxor $tweak, $tweak, $tmp, $in5
3249 lvx_u $in5,$x50,$inp
3251 vxor $twk5,$tweak,$rndkey0
3252 vsrab $tmp,$tweak,$seven # next tweak value
3253 vaddubm $tweak,$tweak,$tweak
3254 le?vperm $in5,$in5,$in5,$leperm
3255 vand $tmp,$tmp,$eighty7
3256 vxor $out5,$in5,$twk5
3258 vpermxor $tweak, $tweak, $tmp, $in0
3260 vxor v31,v31,$rndkey0
3266 vncipher $out0,$out0,v24
3267 vncipher $out1,$out1,v24
3268 vncipher $out2,$out2,v24
3269 vncipher $out3,$out3,v24
3270 vncipher $out4,$out4,v24
3271 vncipher $out5,$out5,v24
3272 lvx v24,$x20,$key_ # round[3]
3273 addi $key_,$key_,0x20
3275 vncipher $out0,$out0,v25
3276 vncipher $out1,$out1,v25
3277 vncipher $out2,$out2,v25
3278 vncipher $out3,$out3,v25
3279 vncipher $out4,$out4,v25
3280 vncipher $out5,$out5,v25
3281 lvx v25,$x10,$key_ # round[4]
3284 xxlor 32+$eighty7, 1, 1
3286 subic $len,$len,96 # $len-=96
3287 vxor $in0,$twk0,v31 # xor with last round key
3288 vncipher $out0,$out0,v24
3289 vncipher $out1,$out1,v24
3290 vsrab $tmp,$tweak,$seven # next tweak value
3291 vxor $twk0,$tweak,$rndkey0
3292 vaddubm $tweak,$tweak,$tweak
3293 vncipher $out2,$out2,v24
3294 vncipher $out3,$out3,v24
3295 vncipher $out4,$out4,v24
3296 vncipher $out5,$out5,v24
3298 subfe. r0,r0,r0 # borrow?-1:0
3299 vand $tmp,$tmp,$eighty7
3300 vncipher $out0,$out0,v25
3301 vncipher $out1,$out1,v25
3303 vpermxor $tweak, $tweak, $tmp, $in1
3304 vncipher $out2,$out2,v25
3305 vncipher $out3,$out3,v25
3307 vsrab $tmp,$tweak,$seven # next tweak value
3308 vxor $twk1,$tweak,$rndkey0
3309 vncipher $out4,$out4,v25
3310 vncipher $out5,$out5,v25
3313 vaddubm $tweak,$tweak,$tweak
3314 vncipher $out0,$out0,v26
3315 vncipher $out1,$out1,v26
3316 vand $tmp,$tmp,$eighty7
3317 vncipher $out2,$out2,v26
3318 vncipher $out3,$out3,v26
3320 vpermxor $tweak, $tweak, $tmp, $in2
3321 vncipher $out4,$out4,v26
3322 vncipher $out5,$out5,v26
3324 add $inp,$inp,r0 # $inp is adjusted in such
3325 # way that at exit from the
3326 # loop inX-in5 are loaded
3329 vsrab $tmp,$tweak,$seven # next tweak value
3330 vxor $twk2,$tweak,$rndkey0
3331 vaddubm $tweak,$tweak,$tweak
3332 vncipher $out0,$out0,v27
3333 vncipher $out1,$out1,v27
3334 vncipher $out2,$out2,v27
3335 vncipher $out3,$out3,v27
3336 vand $tmp,$tmp,$eighty7
3337 vncipher $out4,$out4,v27
3338 vncipher $out5,$out5,v27
3340 addi $key_,$sp,$FRAME+15 # rewind $key_
3342 vpermxor $tweak, $tweak, $tmp, $in3
3343 vncipher $out0,$out0,v28
3344 vncipher $out1,$out1,v28
3346 vsrab $tmp,$tweak,$seven # next tweak value
3347 vxor $twk3,$tweak,$rndkey0
3348 vncipher $out2,$out2,v28
3349 vncipher $out3,$out3,v28
3350 vaddubm $tweak,$tweak,$tweak
3351 vncipher $out4,$out4,v28
3352 vncipher $out5,$out5,v28
3353 lvx v24,$x00,$key_ # re-pre-load round[1]
3354 vand $tmp,$tmp,$eighty7
3356 vncipher $out0,$out0,v29
3357 vncipher $out1,$out1,v29
3359 vpermxor $tweak, $tweak, $tmp, $in4
3360 vncipher $out2,$out2,v29
3361 vncipher $out3,$out3,v29
3363 vsrab $tmp,$tweak,$seven # next tweak value
3364 vxor $twk4,$tweak,$rndkey0
3365 vncipher $out4,$out4,v29
3366 vncipher $out5,$out5,v29
3367 lvx v25,$x10,$key_ # re-pre-load round[2]
3368 vaddubm $tweak,$tweak,$tweak
3370 vncipher $out0,$out0,v30
3371 vncipher $out1,$out1,v30
3372 vand $tmp,$tmp,$eighty7
3373 vncipher $out2,$out2,v30
3374 vncipher $out3,$out3,v30
3376 vpermxor $tweak, $tweak, $tmp, $in5
3377 vncipher $out4,$out4,v30
3378 vncipher $out5,$out5,v30
3380 vsrab $tmp,$tweak,$seven # next tweak value
3381 vxor $twk5,$tweak,$rndkey0
3383 vncipherlast $out0,$out0,$in0
3384 lvx_u $in0,$x00,$inp # load next input block
3385 vaddubm $tweak,$tweak,$tweak
3386 vncipherlast $out1,$out1,$in1
3387 lvx_u $in1,$x10,$inp
3388 vncipherlast $out2,$out2,$in2
3389 le?vperm $in0,$in0,$in0,$leperm
3390 lvx_u $in2,$x20,$inp
3391 vand $tmp,$tmp,$eighty7
3392 vncipherlast $out3,$out3,$in3
3393 le?vperm $in1,$in1,$in1,$leperm
3394 lvx_u $in3,$x30,$inp
3395 vncipherlast $out4,$out4,$in4
3396 le?vperm $in2,$in2,$in2,$leperm
3397 lvx_u $in4,$x40,$inp
3398 xxlor 10, 32+$in0, 32+$in0
3400 vpermxor $tweak, $tweak, $tmp, $in0
3401 xxlor 32+$in0, 10, 10
3402 vncipherlast $out5,$out5,$in5
3403 le?vperm $in3,$in3,$in3,$leperm
3404 lvx_u $in5,$x50,$inp
3406 le?vperm $in4,$in4,$in4,$leperm
3407 le?vperm $in5,$in5,$in5,$leperm
3409 le?vperm $out0,$out0,$out0,$leperm
3410 le?vperm $out1,$out1,$out1,$leperm
3411 stvx_u $out0,$x00,$out # store output
3412 vxor $out0,$in0,$twk0
3413 le?vperm $out2,$out2,$out2,$leperm
3414 stvx_u $out1,$x10,$out
3415 vxor $out1,$in1,$twk1
3416 le?vperm $out3,$out3,$out3,$leperm
3417 stvx_u $out2,$x20,$out
3418 vxor $out2,$in2,$twk2
3419 le?vperm $out4,$out4,$out4,$leperm
3420 stvx_u $out3,$x30,$out
3421 vxor $out3,$in3,$twk3
3422 le?vperm $out5,$out5,$out5,$leperm
3423 stvx_u $out4,$x40,$out
3424 vxor $out4,$in4,$twk4
3425 stvx_u $out5,$x50,$out
3426 vxor $out5,$in5,$twk5
3430 beq Loop_xts_dec6x # did $len-=96 borrow?
3432 xxlor 32+$eighty7, 2, 2
3434 addic. $len,$len,0x60
3441 blt Lxts_dec6x_three
3446 vxor $out0,$in1,$twk0
3447 vxor $out1,$in2,$twk1
3448 vxor $out2,$in3,$twk2
3449 vxor $out3,$in4,$twk3
3450 vxor $out4,$in5,$twk4
3454 le?vperm $out0,$out0,$out0,$leperm
3455 vmr $twk0,$twk5 # unused tweak
3456 vxor $twk1,$tweak,$rndkey0
3457 le?vperm $out1,$out1,$out1,$leperm
3458 stvx_u $out0,$x00,$out # store output
3459 vxor $out0,$in0,$twk1
3460 le?vperm $out2,$out2,$out2,$leperm
3461 stvx_u $out1,$x10,$out
3462 le?vperm $out3,$out3,$out3,$leperm
3463 stvx_u $out2,$x20,$out
3464 le?vperm $out4,$out4,$out4,$leperm
3465 stvx_u $out3,$x30,$out
3466 stvx_u $out4,$x40,$out
3468 bne Lxts_dec6x_steal
3473 vxor $out0,$in2,$twk0
3474 vxor $out1,$in3,$twk1
3475 vxor $out2,$in4,$twk2
3476 vxor $out3,$in5,$twk3
3477 vxor $out4,$out4,$out4
3481 le?vperm $out0,$out0,$out0,$leperm
3482 vmr $twk0,$twk4 # unused tweak
3484 le?vperm $out1,$out1,$out1,$leperm
3485 stvx_u $out0,$x00,$out # store output
3486 vxor $out0,$in0,$twk5
3487 le?vperm $out2,$out2,$out2,$leperm
3488 stvx_u $out1,$x10,$out
3489 le?vperm $out3,$out3,$out3,$leperm
3490 stvx_u $out2,$x20,$out
3491 stvx_u $out3,$x30,$out
3493 bne Lxts_dec6x_steal
3498 vxor $out0,$in3,$twk0
3499 vxor $out1,$in4,$twk1
3500 vxor $out2,$in5,$twk2
3501 vxor $out3,$out3,$out3
3502 vxor $out4,$out4,$out4
3506 le?vperm $out0,$out0,$out0,$leperm
3507 vmr $twk0,$twk3 # unused tweak
3509 le?vperm $out1,$out1,$out1,$leperm
3510 stvx_u $out0,$x00,$out # store output
3511 vxor $out0,$in0,$twk4
3512 le?vperm $out2,$out2,$out2,$leperm
3513 stvx_u $out1,$x10,$out
3514 stvx_u $out2,$x20,$out
3516 bne Lxts_dec6x_steal
3521 vxor $out0,$in4,$twk0
3522 vxor $out1,$in5,$twk1
3523 vxor $out2,$out2,$out2
3524 vxor $out3,$out3,$out3
3525 vxor $out4,$out4,$out4
3529 le?vperm $out0,$out0,$out0,$leperm
3530 vmr $twk0,$twk2 # unused tweak
3532 le?vperm $out1,$out1,$out1,$leperm
3533 stvx_u $out0,$x00,$out # store output
3534 vxor $out0,$in0,$twk3
3535 stvx_u $out1,$x10,$out
3537 bne Lxts_dec6x_steal
3542 vxor $out0,$in5,$twk0
3545 vncipher $out0,$out0,v24
3546 lvx v24,$x20,$key_ # round[3]
3547 addi $key_,$key_,0x20
3549 vncipher $out0,$out0,v25
3550 lvx v25,$x10,$key_ # round[4]
3554 vncipher $out0,$out0,v24
3558 vncipher $out0,$out0,v25
3561 vncipher $out0,$out0,v26
3564 vncipher $out0,$out0,v27
3566 addi $key_,$sp,$FRAME+15 # rewind $key_
3567 vncipher $out0,$out0,v28
3568 lvx v24,$x00,$key_ # re-pre-load round[1]
3570 vncipher $out0,$out0,v29
3571 lvx v25,$x10,$key_ # re-pre-load round[2]
3572 vxor $twk0,$twk0,v31
3574 le?vperm $in0,$in0,$in0,$leperm
3575 vncipher $out0,$out0,v30
3578 vncipherlast $out0,$out0,$twk0
3580 vmr $twk0,$twk1 # unused tweak
3582 le?vperm $out0,$out0,$out0,$leperm
3583 stvx_u $out0,$x00,$out # store output
3585 vxor $out0,$in0,$twk2
3586 bne Lxts_dec6x_steal
3595 le?vperm $in0,$in0,$in0,$leperm
3596 vxor $out0,$in0,$twk1
3598 vncipher $out0,$out0,v24
3599 lvx v24,$x20,$key_ # round[3]
3600 addi $key_,$key_,0x20
3602 vncipher $out0,$out0,v25
3603 lvx v25,$x10,$key_ # round[4]
3604 bdnz Lxts_dec6x_steal
3606 add $inp,$inp,$taillen
3607 vncipher $out0,$out0,v24
3610 vncipher $out0,$out0,v25
3613 vncipher $out0,$out0,v26
3615 lvsr $inpperm,0,$taillen # $in5 is no more
3616 vncipher $out0,$out0,v27
3618 addi $key_,$sp,$FRAME+15 # rewind $key_
3619 vncipher $out0,$out0,v28
3620 lvx v24,$x00,$key_ # re-pre-load round[1]
3622 vncipher $out0,$out0,v29
3623 lvx v25,$x10,$key_ # re-pre-load round[2]
3624 vxor $twk1,$twk1,v31
3626 le?vperm $in0,$in0,$in0,$leperm
3627 vncipher $out0,$out0,v30
3629 vperm $in0,$in0,$in0,$inpperm
3630 vncipherlast $tmp,$out0,$twk1
3632 le?vperm $out0,$tmp,$tmp,$leperm
3633 le?stvx_u $out0,0,$out
3634 be?stvx_u $tmp,0,$out
3636 vxor $out0,$out0,$out0
3638 vperm $out0,$out0,$out1,$inpperm
3639 vsel $out0,$in0,$tmp,$out0
3640 vxor $out0,$out0,$twk0
3644 Loop_xts_dec6x_steal:
3647 bdnz Loop_xts_dec6x_steal
3651 b Loop_xts_dec1x # one more time...
3658 vxor $tweak,$twk0,$rndkey0
3659 le?vperm $tweak,$tweak,$tweak,$leperm
3660 stvx_u $tweak,0,$ivp
3666 stvx $seven,r10,$sp # wipe copies of round keys
3684 lvx v20,r10,$sp # ABI says so
3706 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3707 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3708 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3709 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3710 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3711 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3712 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3715 .byte 0,12,0x04,1,0x80,6,6,0
3720 vncipher $out0,$out0,v24
3721 vncipher $out1,$out1,v24
3722 vncipher $out2,$out2,v24
3723 vncipher $out3,$out3,v24
3724 vncipher $out4,$out4,v24
3725 lvx v24,$x20,$key_ # round[3]
3726 addi $key_,$key_,0x20
3728 vncipher $out0,$out0,v25
3729 vncipher $out1,$out1,v25
3730 vncipher $out2,$out2,v25
3731 vncipher $out3,$out3,v25
3732 vncipher $out4,$out4,v25
3733 lvx v25,$x10,$key_ # round[4]
3734 bdnz _aesp8_xts_dec5x
3737 vncipher $out0,$out0,v24
3738 vncipher $out1,$out1,v24
3739 vncipher $out2,$out2,v24
3740 vncipher $out3,$out3,v24
3741 vncipher $out4,$out4,v24
3745 vncipher $out0,$out0,v25
3746 vncipher $out1,$out1,v25
3747 vncipher $out2,$out2,v25
3748 vncipher $out3,$out3,v25
3749 vncipher $out4,$out4,v25
3750 vxor $twk0,$twk0,v31
3753 vncipher $out0,$out0,v26
3754 vncipher $out1,$out1,v26
3755 vncipher $out2,$out2,v26
3756 vncipher $out3,$out3,v26
3757 vncipher $out4,$out4,v26
3760 vncipher $out0,$out0,v27
3762 vncipher $out1,$out1,v27
3763 vncipher $out2,$out2,v27
3764 vncipher $out3,$out3,v27
3765 vncipher $out4,$out4,v27
3768 addi $key_,$sp,$FRAME+15 # rewind $key_
3769 vncipher $out0,$out0,v28
3770 vncipher $out1,$out1,v28
3771 vncipher $out2,$out2,v28
3772 vncipher $out3,$out3,v28
3773 vncipher $out4,$out4,v28
3774 lvx v24,$x00,$key_ # re-pre-load round[1]
3777 vncipher $out0,$out0,v29
3778 le?vperm $in0,$in0,$in0,$leperm
3779 vncipher $out1,$out1,v29
3780 vncipher $out2,$out2,v29
3781 vncipher $out3,$out3,v29
3782 vncipher $out4,$out4,v29
3783 lvx v25,$x10,$key_ # re-pre-load round[2]
3786 vncipher $out0,$out0,v30
3787 vncipher $out1,$out1,v30
3788 vncipher $out2,$out2,v30
3789 vncipher $out3,$out3,v30
3790 vncipher $out4,$out4,v30
3792 vncipherlast $out0,$out0,$twk0
3793 vncipherlast $out1,$out1,$in1
3794 vncipherlast $out2,$out2,$in2
3795 vncipherlast $out3,$out3,$in3
3796 vncipherlast $out4,$out4,$in4
3800 .byte 0,12,0x14,0,0,0,0,0
3805 foreach(split("\n",$code)) {
3806 s/\`([^\`]*)\`/eval($1)/geo;
3808 # constants table endian-specific conversion
3809 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
3813 # convert to endian-agnostic format
3815 foreach (split(/,\s*/,$2)) {
3816 my $l = /^0/?oct:int;
3817 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
3820 @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
3823 # little-endian conversion
3824 if ($flavour =~ /le$/o) {
3825 SWITCH: for($conv) {
3826 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
3827 /\?rev/ && do { @bytes=reverse(@bytes); last; };
3832 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
3835 $consts=0 if (m/Lconsts:/o); # end of table
3837 # instructions prefixed with '?' are endian-specific and need
3838 # to be adjusted accordingly...
3839 if ($flavour =~ /le$/o) { # little-endian
3844 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
3845 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
3846 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
3847 } else { # big-endian
3856 close STDOUT or die "error closing STDOUT: $!";