aes/asm/aesp8-ppc.pl: implement "tweak chaining".
[openssl.git] / crypto / aes / asm / aesp8-ppc.pl
1 #! /usr/bin/env perl
2 # Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
3 #
4 # Licensed under the OpenSSL license (the "License").  You may not use
5 # this file except in compliance with the License.  You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
8
9 #
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
16 #
17 # This module implements support for AES instructions as per PowerISA
18 # specification version 2.07, first implemented by POWER8 processor.
19 # The module is endian-agnostic in sense that it supports both big-
20 # and little-endian cases. Data alignment in parallelizable modes is
21 # handled with VSX loads and stores, which implies MSR.VSX flag being
22 # set. It should also be noted that ISA specification doesn't prohibit
23 # alignment exceptions for these instructions on page boundaries.
24 # Initially alignment was handled in pure AltiVec/VMX way [when data
25 # is aligned programmatically, which in turn guarantees exception-
26 # free execution], but it turned to hamper performance when vcipher
27 # instructions are interleaved. It's reckoned that eventual
28 # misalignment penalties at page boundaries are in average lower
29 # than additional overhead in pure AltiVec approach.
30 #
31 # May 2016
32 #
33 # Add XTS subroutine, 9x on little- and 12x improvement on big-endian
34 # systems were measured.
35 #
36 ######################################################################
37 # Current large-block performance in cycles per byte processed with
38 # 128-bit key (less is better).
39 #
40 #               CBC en-/decrypt CTR     XTS
41 # POWER8[le]    3.96/0.72       0.74    1.1
42 # POWER8[be]    3.75/0.65       0.66    1.0
43
44 $flavour = shift;
45
46 if ($flavour =~ /64/) {
47         $SIZE_T =8;
48         $LRSAVE =2*$SIZE_T;
49         $STU    ="stdu";
50         $POP    ="ld";
51         $PUSH   ="std";
52         $UCMP   ="cmpld";
53         $SHL    ="sldi";
54 } elsif ($flavour =~ /32/) {
55         $SIZE_T =4;
56         $LRSAVE =$SIZE_T;
57         $STU    ="stwu";
58         $POP    ="lwz";
59         $PUSH   ="stw";
60         $UCMP   ="cmplw";
61         $SHL    ="slwi";
62 } else { die "nonsense $flavour"; }
63
64 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
65
66 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
67 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
68 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
69 die "can't locate ppc-xlate.pl";
70
71 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
72
73 $FRAME=8*$SIZE_T;
74 $prefix="aes_p8";
75
76 $sp="r1";
77 $vrsave="r12";
78
79 #########################################################################
80 {{{     # Key setup procedures                                          #
81 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
82 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
83 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
84
85 $code.=<<___;
86 .machine        "any"
87
88 .text
89
90 .align  7
91 rcon:
92 .long   0x01000000, 0x01000000, 0x01000000, 0x01000000  ?rev
93 .long   0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000  ?rev
94 .long   0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c  ?rev
95 .long   0,0,0,0                                         ?asis
96 Lconsts:
97         mflr    r0
98         bcl     20,31,\$+4
99         mflr    $ptr     #vvvvv "distance between . and rcon
100         addi    $ptr,$ptr,-0x48
101         mtlr    r0
102         blr
103         .long   0
104         .byte   0,12,0x14,0,0,0,0,0
105 .asciz  "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
106
107 .globl  .${prefix}_set_encrypt_key
108 .align  5
109 .${prefix}_set_encrypt_key:
110 Lset_encrypt_key:
111         mflr            r11
112         $PUSH           r11,$LRSAVE($sp)
113
114         li              $ptr,-1
115         ${UCMP}i        $inp,0
116         beq-            Lenc_key_abort          # if ($inp==0) return -1;
117         ${UCMP}i        $out,0
118         beq-            Lenc_key_abort          # if ($out==0) return -1;
119         li              $ptr,-2
120         cmpwi           $bits,128
121         blt-            Lenc_key_abort
122         cmpwi           $bits,256
123         bgt-            Lenc_key_abort
124         andi.           r0,$bits,0x3f
125         bne-            Lenc_key_abort
126
127         lis             r0,0xfff0
128         mfspr           $vrsave,256
129         mtspr           256,r0
130
131         bl              Lconsts
132         mtlr            r11
133
134         neg             r9,$inp
135         lvx             $in0,0,$inp
136         addi            $inp,$inp,15            # 15 is not typo
137         lvsr            $key,0,r9               # borrow $key
138         li              r8,0x20
139         cmpwi           $bits,192
140         lvx             $in1,0,$inp
141         le?vspltisb     $mask,0x0f              # borrow $mask
142         lvx             $rcon,0,$ptr
143         le?vxor         $key,$key,$mask         # adjust for byte swap
144         lvx             $mask,r8,$ptr
145         addi            $ptr,$ptr,0x10
146         vperm           $in0,$in0,$in1,$key     # align [and byte swap in LE]
147         li              $cnt,8
148         vxor            $zero,$zero,$zero
149         mtctr           $cnt
150
151         ?lvsr           $outperm,0,$out
152         vspltisb        $outmask,-1
153         lvx             $outhead,0,$out
154         ?vperm          $outmask,$zero,$outmask,$outperm
155
156         blt             Loop128
157         addi            $inp,$inp,8
158         beq             L192
159         addi            $inp,$inp,8
160         b               L256
161
162 .align  4
163 Loop128:
164         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
165         vsldoi          $tmp,$zero,$in0,12      # >>32
166          vperm          $outtail,$in0,$in0,$outperm     # rotate
167          vsel           $stage,$outhead,$outtail,$outmask
168          vmr            $outhead,$outtail
169         vcipherlast     $key,$key,$rcon
170          stvx           $stage,0,$out
171          addi           $out,$out,16
172
173         vxor            $in0,$in0,$tmp
174         vsldoi          $tmp,$zero,$tmp,12      # >>32
175         vxor            $in0,$in0,$tmp
176         vsldoi          $tmp,$zero,$tmp,12      # >>32
177         vxor            $in0,$in0,$tmp
178          vadduwm        $rcon,$rcon,$rcon
179         vxor            $in0,$in0,$key
180         bdnz            Loop128
181
182         lvx             $rcon,0,$ptr            # last two round keys
183
184         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
185         vsldoi          $tmp,$zero,$in0,12      # >>32
186          vperm          $outtail,$in0,$in0,$outperm     # rotate
187          vsel           $stage,$outhead,$outtail,$outmask
188          vmr            $outhead,$outtail
189         vcipherlast     $key,$key,$rcon
190          stvx           $stage,0,$out
191          addi           $out,$out,16
192
193         vxor            $in0,$in0,$tmp
194         vsldoi          $tmp,$zero,$tmp,12      # >>32
195         vxor            $in0,$in0,$tmp
196         vsldoi          $tmp,$zero,$tmp,12      # >>32
197         vxor            $in0,$in0,$tmp
198          vadduwm        $rcon,$rcon,$rcon
199         vxor            $in0,$in0,$key
200
201         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
202         vsldoi          $tmp,$zero,$in0,12      # >>32
203          vperm          $outtail,$in0,$in0,$outperm     # rotate
204          vsel           $stage,$outhead,$outtail,$outmask
205          vmr            $outhead,$outtail
206         vcipherlast     $key,$key,$rcon
207          stvx           $stage,0,$out
208          addi           $out,$out,16
209
210         vxor            $in0,$in0,$tmp
211         vsldoi          $tmp,$zero,$tmp,12      # >>32
212         vxor            $in0,$in0,$tmp
213         vsldoi          $tmp,$zero,$tmp,12      # >>32
214         vxor            $in0,$in0,$tmp
215         vxor            $in0,$in0,$key
216          vperm          $outtail,$in0,$in0,$outperm     # rotate
217          vsel           $stage,$outhead,$outtail,$outmask
218          vmr            $outhead,$outtail
219          stvx           $stage,0,$out
220
221         addi            $inp,$out,15            # 15 is not typo
222         addi            $out,$out,0x50
223
224         li              $rounds,10
225         b               Ldone
226
227 .align  4
228 L192:
229         lvx             $tmp,0,$inp
230         li              $cnt,4
231          vperm          $outtail,$in0,$in0,$outperm     # rotate
232          vsel           $stage,$outhead,$outtail,$outmask
233          vmr            $outhead,$outtail
234          stvx           $stage,0,$out
235          addi           $out,$out,16
236         vperm           $in1,$in1,$tmp,$key     # align [and byte swap in LE]
237         vspltisb        $key,8                  # borrow $key
238         mtctr           $cnt
239         vsububm         $mask,$mask,$key        # adjust the mask
240
241 Loop192:
242         vperm           $key,$in1,$in1,$mask    # roate-n-splat
243         vsldoi          $tmp,$zero,$in0,12      # >>32
244         vcipherlast     $key,$key,$rcon
245
246         vxor            $in0,$in0,$tmp
247         vsldoi          $tmp,$zero,$tmp,12      # >>32
248         vxor            $in0,$in0,$tmp
249         vsldoi          $tmp,$zero,$tmp,12      # >>32
250         vxor            $in0,$in0,$tmp
251
252          vsldoi         $stage,$zero,$in1,8
253         vspltw          $tmp,$in0,3
254         vxor            $tmp,$tmp,$in1
255         vsldoi          $in1,$zero,$in1,12      # >>32
256          vadduwm        $rcon,$rcon,$rcon
257         vxor            $in1,$in1,$tmp
258         vxor            $in0,$in0,$key
259         vxor            $in1,$in1,$key
260          vsldoi         $stage,$stage,$in0,8
261
262         vperm           $key,$in1,$in1,$mask    # rotate-n-splat
263         vsldoi          $tmp,$zero,$in0,12      # >>32
264          vperm          $outtail,$stage,$stage,$outperm # rotate
265          vsel           $stage,$outhead,$outtail,$outmask
266          vmr            $outhead,$outtail
267         vcipherlast     $key,$key,$rcon
268          stvx           $stage,0,$out
269          addi           $out,$out,16
270
271          vsldoi         $stage,$in0,$in1,8
272         vxor            $in0,$in0,$tmp
273         vsldoi          $tmp,$zero,$tmp,12      # >>32
274          vperm          $outtail,$stage,$stage,$outperm # rotate
275          vsel           $stage,$outhead,$outtail,$outmask
276          vmr            $outhead,$outtail
277         vxor            $in0,$in0,$tmp
278         vsldoi          $tmp,$zero,$tmp,12      # >>32
279         vxor            $in0,$in0,$tmp
280          stvx           $stage,0,$out
281          addi           $out,$out,16
282
283         vspltw          $tmp,$in0,3
284         vxor            $tmp,$tmp,$in1
285         vsldoi          $in1,$zero,$in1,12      # >>32
286          vadduwm        $rcon,$rcon,$rcon
287         vxor            $in1,$in1,$tmp
288         vxor            $in0,$in0,$key
289         vxor            $in1,$in1,$key
290          vperm          $outtail,$in0,$in0,$outperm     # rotate
291          vsel           $stage,$outhead,$outtail,$outmask
292          vmr            $outhead,$outtail
293          stvx           $stage,0,$out
294          addi           $inp,$out,15            # 15 is not typo
295          addi           $out,$out,16
296         bdnz            Loop192
297
298         li              $rounds,12
299         addi            $out,$out,0x20
300         b               Ldone
301
302 .align  4
303 L256:
304         lvx             $tmp,0,$inp
305         li              $cnt,7
306         li              $rounds,14
307          vperm          $outtail,$in0,$in0,$outperm     # rotate
308          vsel           $stage,$outhead,$outtail,$outmask
309          vmr            $outhead,$outtail
310          stvx           $stage,0,$out
311          addi           $out,$out,16
312         vperm           $in1,$in1,$tmp,$key     # align [and byte swap in LE]
313         mtctr           $cnt
314
315 Loop256:
316         vperm           $key,$in1,$in1,$mask    # rotate-n-splat
317         vsldoi          $tmp,$zero,$in0,12      # >>32
318          vperm          $outtail,$in1,$in1,$outperm     # rotate
319          vsel           $stage,$outhead,$outtail,$outmask
320          vmr            $outhead,$outtail
321         vcipherlast     $key,$key,$rcon
322          stvx           $stage,0,$out
323          addi           $out,$out,16
324
325         vxor            $in0,$in0,$tmp
326         vsldoi          $tmp,$zero,$tmp,12      # >>32
327         vxor            $in0,$in0,$tmp
328         vsldoi          $tmp,$zero,$tmp,12      # >>32
329         vxor            $in0,$in0,$tmp
330          vadduwm        $rcon,$rcon,$rcon
331         vxor            $in0,$in0,$key
332          vperm          $outtail,$in0,$in0,$outperm     # rotate
333          vsel           $stage,$outhead,$outtail,$outmask
334          vmr            $outhead,$outtail
335          stvx           $stage,0,$out
336          addi           $inp,$out,15            # 15 is not typo
337          addi           $out,$out,16
338         bdz             Ldone
339
340         vspltw          $key,$in0,3             # just splat
341         vsldoi          $tmp,$zero,$in1,12      # >>32
342         vsbox           $key,$key
343
344         vxor            $in1,$in1,$tmp
345         vsldoi          $tmp,$zero,$tmp,12      # >>32
346         vxor            $in1,$in1,$tmp
347         vsldoi          $tmp,$zero,$tmp,12      # >>32
348         vxor            $in1,$in1,$tmp
349
350         vxor            $in1,$in1,$key
351         b               Loop256
352
353 .align  4
354 Ldone:
355         lvx             $in1,0,$inp             # redundant in aligned case
356         vsel            $in1,$outhead,$in1,$outmask
357         stvx            $in1,0,$inp
358         li              $ptr,0
359         mtspr           256,$vrsave
360         stw             $rounds,0($out)
361
362 Lenc_key_abort:
363         mr              r3,$ptr
364         blr
365         .long           0
366         .byte           0,12,0x14,1,0,0,3,0
367         .long           0
368 .size   .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
369
370 .globl  .${prefix}_set_decrypt_key
371 .align  5
372 .${prefix}_set_decrypt_key:
373         $STU            $sp,-$FRAME($sp)
374         mflr            r10
375         $PUSH           r10,$FRAME+$LRSAVE($sp)
376         bl              Lset_encrypt_key
377         mtlr            r10
378
379         cmpwi           r3,0
380         bne-            Ldec_key_abort
381
382         slwi            $cnt,$rounds,4
383         subi            $inp,$out,240           # first round key
384         srwi            $rounds,$rounds,1
385         add             $out,$inp,$cnt          # last round key
386         mtctr           $rounds
387
388 Ldeckey:
389         lwz             r0, 0($inp)
390         lwz             r6, 4($inp)
391         lwz             r7, 8($inp)
392         lwz             r8, 12($inp)
393         addi            $inp,$inp,16
394         lwz             r9, 0($out)
395         lwz             r10,4($out)
396         lwz             r11,8($out)
397         lwz             r12,12($out)
398         stw             r0, 0($out)
399         stw             r6, 4($out)
400         stw             r7, 8($out)
401         stw             r8, 12($out)
402         subi            $out,$out,16
403         stw             r9, -16($inp)
404         stw             r10,-12($inp)
405         stw             r11,-8($inp)
406         stw             r12,-4($inp)
407         bdnz            Ldeckey
408
409         xor             r3,r3,r3                # return value
410 Ldec_key_abort:
411         addi            $sp,$sp,$FRAME
412         blr
413         .long           0
414         .byte           0,12,4,1,0x80,0,3,0
415         .long           0
416 .size   .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
417 ___
418 }}}
419 #########################################################################
420 {{{     # Single block en- and decrypt procedures                       #
421 sub gen_block () {
422 my $dir = shift;
423 my $n   = $dir eq "de" ? "n" : "";
424 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
425
426 $code.=<<___;
427 .globl  .${prefix}_${dir}crypt
428 .align  5
429 .${prefix}_${dir}crypt:
430         lwz             $rounds,240($key)
431         lis             r0,0xfc00
432         mfspr           $vrsave,256
433         li              $idx,15                 # 15 is not typo
434         mtspr           256,r0
435
436         lvx             v0,0,$inp
437         neg             r11,$out
438         lvx             v1,$idx,$inp
439         lvsl            v2,0,$inp               # inpperm
440         le?vspltisb     v4,0x0f
441         ?lvsl           v3,0,r11                # outperm
442         le?vxor         v2,v2,v4
443         li              $idx,16
444         vperm           v0,v0,v1,v2             # align [and byte swap in LE]
445         lvx             v1,0,$key
446         ?lvsl           v5,0,$key               # keyperm
447         srwi            $rounds,$rounds,1
448         lvx             v2,$idx,$key
449         addi            $idx,$idx,16
450         subi            $rounds,$rounds,1
451         ?vperm          v1,v1,v2,v5             # align round key
452
453         vxor            v0,v0,v1
454         lvx             v1,$idx,$key
455         addi            $idx,$idx,16
456         mtctr           $rounds
457
458 Loop_${dir}c:
459         ?vperm          v2,v2,v1,v5
460         v${n}cipher     v0,v0,v2
461         lvx             v2,$idx,$key
462         addi            $idx,$idx,16
463         ?vperm          v1,v1,v2,v5
464         v${n}cipher     v0,v0,v1
465         lvx             v1,$idx,$key
466         addi            $idx,$idx,16
467         bdnz            Loop_${dir}c
468
469         ?vperm          v2,v2,v1,v5
470         v${n}cipher     v0,v0,v2
471         lvx             v2,$idx,$key
472         ?vperm          v1,v1,v2,v5
473         v${n}cipherlast v0,v0,v1
474
475         vspltisb        v2,-1
476         vxor            v1,v1,v1
477         li              $idx,15                 # 15 is not typo
478         ?vperm          v2,v1,v2,v3             # outmask
479         le?vxor         v3,v3,v4
480         lvx             v1,0,$out               # outhead
481         vperm           v0,v0,v0,v3             # rotate [and byte swap in LE]
482         vsel            v1,v1,v0,v2
483         lvx             v4,$idx,$out
484         stvx            v1,0,$out
485         vsel            v0,v0,v4,v2
486         stvx            v0,$idx,$out
487
488         mtspr           256,$vrsave
489         blr
490         .long           0
491         .byte           0,12,0x14,0,0,0,3,0
492         .long           0
493 .size   .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
494 ___
495 }
496 &gen_block("en");
497 &gen_block("de");
498 }}}
499 #########################################################################
500 {{{     # CBC en- and decrypt procedures                                #
501 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
502 my ($rndkey0,$rndkey1,$inout,$tmp)=             map("v$_",(0..3));
503 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
504                                                 map("v$_",(4..10));
505 $code.=<<___;
506 .globl  .${prefix}_cbc_encrypt
507 .align  5
508 .${prefix}_cbc_encrypt:
509         ${UCMP}i        $len,16
510         bltlr-
511
512         cmpwi           $enc,0                  # test direction
513         lis             r0,0xffe0
514         mfspr           $vrsave,256
515         mtspr           256,r0
516
517         li              $idx,15
518         vxor            $rndkey0,$rndkey0,$rndkey0
519         le?vspltisb     $tmp,0x0f
520
521         lvx             $ivec,0,$ivp            # load [unaligned] iv
522         lvsl            $inpperm,0,$ivp
523         lvx             $inptail,$idx,$ivp
524         le?vxor         $inpperm,$inpperm,$tmp
525         vperm           $ivec,$ivec,$inptail,$inpperm
526
527         neg             r11,$inp
528         ?lvsl           $keyperm,0,$key         # prepare for unaligned key
529         lwz             $rounds,240($key)
530
531         lvsr            $inpperm,0,r11          # prepare for unaligned load
532         lvx             $inptail,0,$inp
533         addi            $inp,$inp,15            # 15 is not typo
534         le?vxor         $inpperm,$inpperm,$tmp
535
536         ?lvsr           $outperm,0,$out         # prepare for unaligned store
537         vspltisb        $outmask,-1
538         lvx             $outhead,0,$out
539         ?vperm          $outmask,$rndkey0,$outmask,$outperm
540         le?vxor         $outperm,$outperm,$tmp
541
542         srwi            $rounds,$rounds,1
543         li              $idx,16
544         subi            $rounds,$rounds,1
545         beq             Lcbc_dec
546
547 Lcbc_enc:
548         vmr             $inout,$inptail
549         lvx             $inptail,0,$inp
550         addi            $inp,$inp,16
551         mtctr           $rounds
552         subi            $len,$len,16            # len-=16
553
554         lvx             $rndkey0,0,$key
555          vperm          $inout,$inout,$inptail,$inpperm
556         lvx             $rndkey1,$idx,$key
557         addi            $idx,$idx,16
558         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
559         vxor            $inout,$inout,$rndkey0
560         lvx             $rndkey0,$idx,$key
561         addi            $idx,$idx,16
562         vxor            $inout,$inout,$ivec
563
564 Loop_cbc_enc:
565         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
566         vcipher         $inout,$inout,$rndkey1
567         lvx             $rndkey1,$idx,$key
568         addi            $idx,$idx,16
569         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
570         vcipher         $inout,$inout,$rndkey0
571         lvx             $rndkey0,$idx,$key
572         addi            $idx,$idx,16
573         bdnz            Loop_cbc_enc
574
575         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
576         vcipher         $inout,$inout,$rndkey1
577         lvx             $rndkey1,$idx,$key
578         li              $idx,16
579         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
580         vcipherlast     $ivec,$inout,$rndkey0
581         ${UCMP}i        $len,16
582
583         vperm           $tmp,$ivec,$ivec,$outperm
584         vsel            $inout,$outhead,$tmp,$outmask
585         vmr             $outhead,$tmp
586         stvx            $inout,0,$out
587         addi            $out,$out,16
588         bge             Lcbc_enc
589
590         b               Lcbc_done
591
592 .align  4
593 Lcbc_dec:
594         ${UCMP}i        $len,128
595         bge             _aesp8_cbc_decrypt8x
596         vmr             $tmp,$inptail
597         lvx             $inptail,0,$inp
598         addi            $inp,$inp,16
599         mtctr           $rounds
600         subi            $len,$len,16            # len-=16
601
602         lvx             $rndkey0,0,$key
603          vperm          $tmp,$tmp,$inptail,$inpperm
604         lvx             $rndkey1,$idx,$key
605         addi            $idx,$idx,16
606         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
607         vxor            $inout,$tmp,$rndkey0
608         lvx             $rndkey0,$idx,$key
609         addi            $idx,$idx,16
610
611 Loop_cbc_dec:
612         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
613         vncipher        $inout,$inout,$rndkey1
614         lvx             $rndkey1,$idx,$key
615         addi            $idx,$idx,16
616         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
617         vncipher        $inout,$inout,$rndkey0
618         lvx             $rndkey0,$idx,$key
619         addi            $idx,$idx,16
620         bdnz            Loop_cbc_dec
621
622         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
623         vncipher        $inout,$inout,$rndkey1
624         lvx             $rndkey1,$idx,$key
625         li              $idx,16
626         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
627         vncipherlast    $inout,$inout,$rndkey0
628         ${UCMP}i        $len,16
629
630         vxor            $inout,$inout,$ivec
631         vmr             $ivec,$tmp
632         vperm           $tmp,$inout,$inout,$outperm
633         vsel            $inout,$outhead,$tmp,$outmask
634         vmr             $outhead,$tmp
635         stvx            $inout,0,$out
636         addi            $out,$out,16
637         bge             Lcbc_dec
638
639 Lcbc_done:
640         addi            $out,$out,-1
641         lvx             $inout,0,$out           # redundant in aligned case
642         vsel            $inout,$outhead,$inout,$outmask
643         stvx            $inout,0,$out
644
645         neg             $enc,$ivp               # write [unaligned] iv
646         li              $idx,15                 # 15 is not typo
647         vxor            $rndkey0,$rndkey0,$rndkey0
648         vspltisb        $outmask,-1
649         le?vspltisb     $tmp,0x0f
650         ?lvsl           $outperm,0,$enc
651         ?vperm          $outmask,$rndkey0,$outmask,$outperm
652         le?vxor         $outperm,$outperm,$tmp
653         lvx             $outhead,0,$ivp
654         vperm           $ivec,$ivec,$ivec,$outperm
655         vsel            $inout,$outhead,$ivec,$outmask
656         lvx             $inptail,$idx,$ivp
657         stvx            $inout,0,$ivp
658         vsel            $inout,$ivec,$inptail,$outmask
659         stvx            $inout,$idx,$ivp
660
661         mtspr           256,$vrsave
662         blr
663         .long           0
664         .byte           0,12,0x14,0,0,0,6,0
665         .long           0
666 ___
667 #########################################################################
668 {{      # Optimized CBC decrypt procedure                               #
669 my $key_="r11";
670 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
671     $x00=0 if ($flavour =~ /osx/);
672 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
673 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
674 my $rndkey0="v23";      # v24-v25 rotating buffer for first found keys
675                         # v26-v31 last 6 round keys
676 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
677
678 $code.=<<___;
679 .align  5
680 _aesp8_cbc_decrypt8x:
681         $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
682         li              r10,`$FRAME+8*16+15`
683         li              r11,`$FRAME+8*16+31`
684         stvx            v20,r10,$sp             # ABI says so
685         addi            r10,r10,32
686         stvx            v21,r11,$sp
687         addi            r11,r11,32
688         stvx            v22,r10,$sp
689         addi            r10,r10,32
690         stvx            v23,r11,$sp
691         addi            r11,r11,32
692         stvx            v24,r10,$sp
693         addi            r10,r10,32
694         stvx            v25,r11,$sp
695         addi            r11,r11,32
696         stvx            v26,r10,$sp
697         addi            r10,r10,32
698         stvx            v27,r11,$sp
699         addi            r11,r11,32
700         stvx            v28,r10,$sp
701         addi            r10,r10,32
702         stvx            v29,r11,$sp
703         addi            r11,r11,32
704         stvx            v30,r10,$sp
705         stvx            v31,r11,$sp
706         li              r0,-1
707         stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
708         li              $x10,0x10
709         $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
710         li              $x20,0x20
711         $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
712         li              $x30,0x30
713         $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
714         li              $x40,0x40
715         $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
716         li              $x50,0x50
717         $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
718         li              $x60,0x60
719         $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
720         li              $x70,0x70
721         mtspr           256,r0
722
723         subi            $rounds,$rounds,3       # -4 in total
724         subi            $len,$len,128           # bias
725
726         lvx             $rndkey0,$x00,$key      # load key schedule
727         lvx             v30,$x10,$key
728         addi            $key,$key,0x20
729         lvx             v31,$x00,$key
730         ?vperm          $rndkey0,$rndkey0,v30,$keyperm
731         addi            $key_,$sp,$FRAME+15
732         mtctr           $rounds
733
734 Load_cbc_dec_key:
735         ?vperm          v24,v30,v31,$keyperm
736         lvx             v30,$x10,$key
737         addi            $key,$key,0x20
738         stvx            v24,$x00,$key_          # off-load round[1]
739         ?vperm          v25,v31,v30,$keyperm
740         lvx             v31,$x00,$key
741         stvx            v25,$x10,$key_          # off-load round[2]
742         addi            $key_,$key_,0x20
743         bdnz            Load_cbc_dec_key
744
745         lvx             v26,$x10,$key
746         ?vperm          v24,v30,v31,$keyperm
747         lvx             v27,$x20,$key
748         stvx            v24,$x00,$key_          # off-load round[3]
749         ?vperm          v25,v31,v26,$keyperm
750         lvx             v28,$x30,$key
751         stvx            v25,$x10,$key_          # off-load round[4]
752         addi            $key_,$sp,$FRAME+15     # rewind $key_
753         ?vperm          v26,v26,v27,$keyperm
754         lvx             v29,$x40,$key
755         ?vperm          v27,v27,v28,$keyperm
756         lvx             v30,$x50,$key
757         ?vperm          v28,v28,v29,$keyperm
758         lvx             v31,$x60,$key
759         ?vperm          v29,v29,v30,$keyperm
760         lvx             $out0,$x70,$key         # borrow $out0
761         ?vperm          v30,v30,v31,$keyperm
762         lvx             v24,$x00,$key_          # pre-load round[1]
763         ?vperm          v31,v31,$out0,$keyperm
764         lvx             v25,$x10,$key_          # pre-load round[2]
765
766         #lvx            $inptail,0,$inp         # "caller" already did this
767         #addi           $inp,$inp,15            # 15 is not typo
768         subi            $inp,$inp,15            # undo "caller"
769
770          le?li          $idx,8
771         lvx_u           $in0,$x00,$inp          # load first 8 "words"
772          le?lvsl        $inpperm,0,$idx
773          le?vspltisb    $tmp,0x0f
774         lvx_u           $in1,$x10,$inp
775          le?vxor        $inpperm,$inpperm,$tmp  # transform for lvx_u/stvx_u
776         lvx_u           $in2,$x20,$inp
777          le?vperm       $in0,$in0,$in0,$inpperm
778         lvx_u           $in3,$x30,$inp
779          le?vperm       $in1,$in1,$in1,$inpperm
780         lvx_u           $in4,$x40,$inp
781          le?vperm       $in2,$in2,$in2,$inpperm
782         vxor            $out0,$in0,$rndkey0
783         lvx_u           $in5,$x50,$inp
784          le?vperm       $in3,$in3,$in3,$inpperm
785         vxor            $out1,$in1,$rndkey0
786         lvx_u           $in6,$x60,$inp
787          le?vperm       $in4,$in4,$in4,$inpperm
788         vxor            $out2,$in2,$rndkey0
789         lvx_u           $in7,$x70,$inp
790         addi            $inp,$inp,0x80
791          le?vperm       $in5,$in5,$in5,$inpperm
792         vxor            $out3,$in3,$rndkey0
793          le?vperm       $in6,$in6,$in6,$inpperm
794         vxor            $out4,$in4,$rndkey0
795          le?vperm       $in7,$in7,$in7,$inpperm
796         vxor            $out5,$in5,$rndkey0
797         vxor            $out6,$in6,$rndkey0
798         vxor            $out7,$in7,$rndkey0
799
800         mtctr           $rounds
801         b               Loop_cbc_dec8x
802 .align  5
803 Loop_cbc_dec8x:
804         vncipher        $out0,$out0,v24
805         vncipher        $out1,$out1,v24
806         vncipher        $out2,$out2,v24
807         vncipher        $out3,$out3,v24
808         vncipher        $out4,$out4,v24
809         vncipher        $out5,$out5,v24
810         vncipher        $out6,$out6,v24
811         vncipher        $out7,$out7,v24
812         lvx             v24,$x20,$key_          # round[3]
813         addi            $key_,$key_,0x20
814
815         vncipher        $out0,$out0,v25
816         vncipher        $out1,$out1,v25
817         vncipher        $out2,$out2,v25
818         vncipher        $out3,$out3,v25
819         vncipher        $out4,$out4,v25
820         vncipher        $out5,$out5,v25
821         vncipher        $out6,$out6,v25
822         vncipher        $out7,$out7,v25
823         lvx             v25,$x10,$key_          # round[4]
824         bdnz            Loop_cbc_dec8x
825
826         subic           $len,$len,128           # $len-=128
827         vncipher        $out0,$out0,v24
828         vncipher        $out1,$out1,v24
829         vncipher        $out2,$out2,v24
830         vncipher        $out3,$out3,v24
831         vncipher        $out4,$out4,v24
832         vncipher        $out5,$out5,v24
833         vncipher        $out6,$out6,v24
834         vncipher        $out7,$out7,v24
835
836         subfe.          r0,r0,r0                # borrow?-1:0
837         vncipher        $out0,$out0,v25
838         vncipher        $out1,$out1,v25
839         vncipher        $out2,$out2,v25
840         vncipher        $out3,$out3,v25
841         vncipher        $out4,$out4,v25
842         vncipher        $out5,$out5,v25
843         vncipher        $out6,$out6,v25
844         vncipher        $out7,$out7,v25
845
846         and             r0,r0,$len
847         vncipher        $out0,$out0,v26
848         vncipher        $out1,$out1,v26
849         vncipher        $out2,$out2,v26
850         vncipher        $out3,$out3,v26
851         vncipher        $out4,$out4,v26
852         vncipher        $out5,$out5,v26
853         vncipher        $out6,$out6,v26
854         vncipher        $out7,$out7,v26
855
856         add             $inp,$inp,r0            # $inp is adjusted in such
857                                                 # way that at exit from the
858                                                 # loop inX-in7 are loaded
859                                                 # with last "words"
860         vncipher        $out0,$out0,v27
861         vncipher        $out1,$out1,v27
862         vncipher        $out2,$out2,v27
863         vncipher        $out3,$out3,v27
864         vncipher        $out4,$out4,v27
865         vncipher        $out5,$out5,v27
866         vncipher        $out6,$out6,v27
867         vncipher        $out7,$out7,v27
868
869         addi            $key_,$sp,$FRAME+15     # rewind $key_
870         vncipher        $out0,$out0,v28
871         vncipher        $out1,$out1,v28
872         vncipher        $out2,$out2,v28
873         vncipher        $out3,$out3,v28
874         vncipher        $out4,$out4,v28
875         vncipher        $out5,$out5,v28
876         vncipher        $out6,$out6,v28
877         vncipher        $out7,$out7,v28
878         lvx             v24,$x00,$key_          # re-pre-load round[1]
879
880         vncipher        $out0,$out0,v29
881         vncipher        $out1,$out1,v29
882         vncipher        $out2,$out2,v29
883         vncipher        $out3,$out3,v29
884         vncipher        $out4,$out4,v29
885         vncipher        $out5,$out5,v29
886         vncipher        $out6,$out6,v29
887         vncipher        $out7,$out7,v29
888         lvx             v25,$x10,$key_          # re-pre-load round[2]
889
890         vncipher        $out0,$out0,v30
891          vxor           $ivec,$ivec,v31         # xor with last round key
892         vncipher        $out1,$out1,v30
893          vxor           $in0,$in0,v31
894         vncipher        $out2,$out2,v30
895          vxor           $in1,$in1,v31
896         vncipher        $out3,$out3,v30
897          vxor           $in2,$in2,v31
898         vncipher        $out4,$out4,v30
899          vxor           $in3,$in3,v31
900         vncipher        $out5,$out5,v30
901          vxor           $in4,$in4,v31
902         vncipher        $out6,$out6,v30
903          vxor           $in5,$in5,v31
904         vncipher        $out7,$out7,v30
905          vxor           $in6,$in6,v31
906
907         vncipherlast    $out0,$out0,$ivec
908         vncipherlast    $out1,$out1,$in0
909          lvx_u          $in0,$x00,$inp          # load next input block
910         vncipherlast    $out2,$out2,$in1
911          lvx_u          $in1,$x10,$inp
912         vncipherlast    $out3,$out3,$in2
913          le?vperm       $in0,$in0,$in0,$inpperm
914          lvx_u          $in2,$x20,$inp
915         vncipherlast    $out4,$out4,$in3
916          le?vperm       $in1,$in1,$in1,$inpperm
917          lvx_u          $in3,$x30,$inp
918         vncipherlast    $out5,$out5,$in4
919          le?vperm       $in2,$in2,$in2,$inpperm
920          lvx_u          $in4,$x40,$inp
921         vncipherlast    $out6,$out6,$in5
922          le?vperm       $in3,$in3,$in3,$inpperm
923          lvx_u          $in5,$x50,$inp
924         vncipherlast    $out7,$out7,$in6
925          le?vperm       $in4,$in4,$in4,$inpperm
926          lvx_u          $in6,$x60,$inp
927         vmr             $ivec,$in7
928          le?vperm       $in5,$in5,$in5,$inpperm
929          lvx_u          $in7,$x70,$inp
930          addi           $inp,$inp,0x80
931
932         le?vperm        $out0,$out0,$out0,$inpperm
933         le?vperm        $out1,$out1,$out1,$inpperm
934         stvx_u          $out0,$x00,$out
935          le?vperm       $in6,$in6,$in6,$inpperm
936          vxor           $out0,$in0,$rndkey0
937         le?vperm        $out2,$out2,$out2,$inpperm
938         stvx_u          $out1,$x10,$out
939          le?vperm       $in7,$in7,$in7,$inpperm
940          vxor           $out1,$in1,$rndkey0
941         le?vperm        $out3,$out3,$out3,$inpperm
942         stvx_u          $out2,$x20,$out
943          vxor           $out2,$in2,$rndkey0
944         le?vperm        $out4,$out4,$out4,$inpperm
945         stvx_u          $out3,$x30,$out
946          vxor           $out3,$in3,$rndkey0
947         le?vperm        $out5,$out5,$out5,$inpperm
948         stvx_u          $out4,$x40,$out
949          vxor           $out4,$in4,$rndkey0
950         le?vperm        $out6,$out6,$out6,$inpperm
951         stvx_u          $out5,$x50,$out
952          vxor           $out5,$in5,$rndkey0
953         le?vperm        $out7,$out7,$out7,$inpperm
954         stvx_u          $out6,$x60,$out
955          vxor           $out6,$in6,$rndkey0
956         stvx_u          $out7,$x70,$out
957         addi            $out,$out,0x80
958          vxor           $out7,$in7,$rndkey0
959
960         mtctr           $rounds
961         beq             Loop_cbc_dec8x          # did $len-=128 borrow?
962
963         addic.          $len,$len,128
964         beq             Lcbc_dec8x_done
965         nop
966         nop
967
968 Loop_cbc_dec8x_tail:                            # up to 7 "words" tail...
969         vncipher        $out1,$out1,v24
970         vncipher        $out2,$out2,v24
971         vncipher        $out3,$out3,v24
972         vncipher        $out4,$out4,v24
973         vncipher        $out5,$out5,v24
974         vncipher        $out6,$out6,v24
975         vncipher        $out7,$out7,v24
976         lvx             v24,$x20,$key_          # round[3]
977         addi            $key_,$key_,0x20
978
979         vncipher        $out1,$out1,v25
980         vncipher        $out2,$out2,v25
981         vncipher        $out3,$out3,v25
982         vncipher        $out4,$out4,v25
983         vncipher        $out5,$out5,v25
984         vncipher        $out6,$out6,v25
985         vncipher        $out7,$out7,v25
986         lvx             v25,$x10,$key_          # round[4]
987         bdnz            Loop_cbc_dec8x_tail
988
989         vncipher        $out1,$out1,v24
990         vncipher        $out2,$out2,v24
991         vncipher        $out3,$out3,v24
992         vncipher        $out4,$out4,v24
993         vncipher        $out5,$out5,v24
994         vncipher        $out6,$out6,v24
995         vncipher        $out7,$out7,v24
996
997         vncipher        $out1,$out1,v25
998         vncipher        $out2,$out2,v25
999         vncipher        $out3,$out3,v25
1000         vncipher        $out4,$out4,v25
1001         vncipher        $out5,$out5,v25
1002         vncipher        $out6,$out6,v25
1003         vncipher        $out7,$out7,v25
1004
1005         vncipher        $out1,$out1,v26
1006         vncipher        $out2,$out2,v26
1007         vncipher        $out3,$out3,v26
1008         vncipher        $out4,$out4,v26
1009         vncipher        $out5,$out5,v26
1010         vncipher        $out6,$out6,v26
1011         vncipher        $out7,$out7,v26
1012
1013         vncipher        $out1,$out1,v27
1014         vncipher        $out2,$out2,v27
1015         vncipher        $out3,$out3,v27
1016         vncipher        $out4,$out4,v27
1017         vncipher        $out5,$out5,v27
1018         vncipher        $out6,$out6,v27
1019         vncipher        $out7,$out7,v27
1020
1021         vncipher        $out1,$out1,v28
1022         vncipher        $out2,$out2,v28
1023         vncipher        $out3,$out3,v28
1024         vncipher        $out4,$out4,v28
1025         vncipher        $out5,$out5,v28
1026         vncipher        $out6,$out6,v28
1027         vncipher        $out7,$out7,v28
1028
1029         vncipher        $out1,$out1,v29
1030         vncipher        $out2,$out2,v29
1031         vncipher        $out3,$out3,v29
1032         vncipher        $out4,$out4,v29
1033         vncipher        $out5,$out5,v29
1034         vncipher        $out6,$out6,v29
1035         vncipher        $out7,$out7,v29
1036
1037         vncipher        $out1,$out1,v30
1038          vxor           $ivec,$ivec,v31         # last round key
1039         vncipher        $out2,$out2,v30
1040          vxor           $in1,$in1,v31
1041         vncipher        $out3,$out3,v30
1042          vxor           $in2,$in2,v31
1043         vncipher        $out4,$out4,v30
1044          vxor           $in3,$in3,v31
1045         vncipher        $out5,$out5,v30
1046          vxor           $in4,$in4,v31
1047         vncipher        $out6,$out6,v30
1048          vxor           $in5,$in5,v31
1049         vncipher        $out7,$out7,v30
1050          vxor           $in6,$in6,v31
1051
1052         cmplwi          $len,32                 # switch($len)
1053         blt             Lcbc_dec8x_one
1054         nop
1055         beq             Lcbc_dec8x_two
1056         cmplwi          $len,64
1057         blt             Lcbc_dec8x_three
1058         nop
1059         beq             Lcbc_dec8x_four
1060         cmplwi          $len,96
1061         blt             Lcbc_dec8x_five
1062         nop
1063         beq             Lcbc_dec8x_six
1064
1065 Lcbc_dec8x_seven:
1066         vncipherlast    $out1,$out1,$ivec
1067         vncipherlast    $out2,$out2,$in1
1068         vncipherlast    $out3,$out3,$in2
1069         vncipherlast    $out4,$out4,$in3
1070         vncipherlast    $out5,$out5,$in4
1071         vncipherlast    $out6,$out6,$in5
1072         vncipherlast    $out7,$out7,$in6
1073         vmr             $ivec,$in7
1074
1075         le?vperm        $out1,$out1,$out1,$inpperm
1076         le?vperm        $out2,$out2,$out2,$inpperm
1077         stvx_u          $out1,$x00,$out
1078         le?vperm        $out3,$out3,$out3,$inpperm
1079         stvx_u          $out2,$x10,$out
1080         le?vperm        $out4,$out4,$out4,$inpperm
1081         stvx_u          $out3,$x20,$out
1082         le?vperm        $out5,$out5,$out5,$inpperm
1083         stvx_u          $out4,$x30,$out
1084         le?vperm        $out6,$out6,$out6,$inpperm
1085         stvx_u          $out5,$x40,$out
1086         le?vperm        $out7,$out7,$out7,$inpperm
1087         stvx_u          $out6,$x50,$out
1088         stvx_u          $out7,$x60,$out
1089         addi            $out,$out,0x70
1090         b               Lcbc_dec8x_done
1091
1092 .align  5
1093 Lcbc_dec8x_six:
1094         vncipherlast    $out2,$out2,$ivec
1095         vncipherlast    $out3,$out3,$in2
1096         vncipherlast    $out4,$out4,$in3
1097         vncipherlast    $out5,$out5,$in4
1098         vncipherlast    $out6,$out6,$in5
1099         vncipherlast    $out7,$out7,$in6
1100         vmr             $ivec,$in7
1101
1102         le?vperm        $out2,$out2,$out2,$inpperm
1103         le?vperm        $out3,$out3,$out3,$inpperm
1104         stvx_u          $out2,$x00,$out
1105         le?vperm        $out4,$out4,$out4,$inpperm
1106         stvx_u          $out3,$x10,$out
1107         le?vperm        $out5,$out5,$out5,$inpperm
1108         stvx_u          $out4,$x20,$out
1109         le?vperm        $out6,$out6,$out6,$inpperm
1110         stvx_u          $out5,$x30,$out
1111         le?vperm        $out7,$out7,$out7,$inpperm
1112         stvx_u          $out6,$x40,$out
1113         stvx_u          $out7,$x50,$out
1114         addi            $out,$out,0x60
1115         b               Lcbc_dec8x_done
1116
1117 .align  5
1118 Lcbc_dec8x_five:
1119         vncipherlast    $out3,$out3,$ivec
1120         vncipherlast    $out4,$out4,$in3
1121         vncipherlast    $out5,$out5,$in4
1122         vncipherlast    $out6,$out6,$in5
1123         vncipherlast    $out7,$out7,$in6
1124         vmr             $ivec,$in7
1125
1126         le?vperm        $out3,$out3,$out3,$inpperm
1127         le?vperm        $out4,$out4,$out4,$inpperm
1128         stvx_u          $out3,$x00,$out
1129         le?vperm        $out5,$out5,$out5,$inpperm
1130         stvx_u          $out4,$x10,$out
1131         le?vperm        $out6,$out6,$out6,$inpperm
1132         stvx_u          $out5,$x20,$out
1133         le?vperm        $out7,$out7,$out7,$inpperm
1134         stvx_u          $out6,$x30,$out
1135         stvx_u          $out7,$x40,$out
1136         addi            $out,$out,0x50
1137         b               Lcbc_dec8x_done
1138
1139 .align  5
1140 Lcbc_dec8x_four:
1141         vncipherlast    $out4,$out4,$ivec
1142         vncipherlast    $out5,$out5,$in4
1143         vncipherlast    $out6,$out6,$in5
1144         vncipherlast    $out7,$out7,$in6
1145         vmr             $ivec,$in7
1146
1147         le?vperm        $out4,$out4,$out4,$inpperm
1148         le?vperm        $out5,$out5,$out5,$inpperm
1149         stvx_u          $out4,$x00,$out
1150         le?vperm        $out6,$out6,$out6,$inpperm
1151         stvx_u          $out5,$x10,$out
1152         le?vperm        $out7,$out7,$out7,$inpperm
1153         stvx_u          $out6,$x20,$out
1154         stvx_u          $out7,$x30,$out
1155         addi            $out,$out,0x40
1156         b               Lcbc_dec8x_done
1157
1158 .align  5
1159 Lcbc_dec8x_three:
1160         vncipherlast    $out5,$out5,$ivec
1161         vncipherlast    $out6,$out6,$in5
1162         vncipherlast    $out7,$out7,$in6
1163         vmr             $ivec,$in7
1164
1165         le?vperm        $out5,$out5,$out5,$inpperm
1166         le?vperm        $out6,$out6,$out6,$inpperm
1167         stvx_u          $out5,$x00,$out
1168         le?vperm        $out7,$out7,$out7,$inpperm
1169         stvx_u          $out6,$x10,$out
1170         stvx_u          $out7,$x20,$out
1171         addi            $out,$out,0x30
1172         b               Lcbc_dec8x_done
1173
1174 .align  5
1175 Lcbc_dec8x_two:
1176         vncipherlast    $out6,$out6,$ivec
1177         vncipherlast    $out7,$out7,$in6
1178         vmr             $ivec,$in7
1179
1180         le?vperm        $out6,$out6,$out6,$inpperm
1181         le?vperm        $out7,$out7,$out7,$inpperm
1182         stvx_u          $out6,$x00,$out
1183         stvx_u          $out7,$x10,$out
1184         addi            $out,$out,0x20
1185         b               Lcbc_dec8x_done
1186
1187 .align  5
1188 Lcbc_dec8x_one:
1189         vncipherlast    $out7,$out7,$ivec
1190         vmr             $ivec,$in7
1191
1192         le?vperm        $out7,$out7,$out7,$inpperm
1193         stvx_u          $out7,0,$out
1194         addi            $out,$out,0x10
1195
1196 Lcbc_dec8x_done:
1197         le?vperm        $ivec,$ivec,$ivec,$inpperm
1198         stvx_u          $ivec,0,$ivp            # write [unaligned] iv
1199
1200         li              r10,`$FRAME+15`
1201         li              r11,`$FRAME+31`
1202         stvx            $inpperm,r10,$sp        # wipe copies of round keys
1203         addi            r10,r10,32
1204         stvx            $inpperm,r11,$sp
1205         addi            r11,r11,32
1206         stvx            $inpperm,r10,$sp
1207         addi            r10,r10,32
1208         stvx            $inpperm,r11,$sp
1209         addi            r11,r11,32
1210         stvx            $inpperm,r10,$sp
1211         addi            r10,r10,32
1212         stvx            $inpperm,r11,$sp
1213         addi            r11,r11,32
1214         stvx            $inpperm,r10,$sp
1215         addi            r10,r10,32
1216         stvx            $inpperm,r11,$sp
1217         addi            r11,r11,32
1218
1219         mtspr           256,$vrsave
1220         lvx             v20,r10,$sp             # ABI says so
1221         addi            r10,r10,32
1222         lvx             v21,r11,$sp
1223         addi            r11,r11,32
1224         lvx             v22,r10,$sp
1225         addi            r10,r10,32
1226         lvx             v23,r11,$sp
1227         addi            r11,r11,32
1228         lvx             v24,r10,$sp
1229         addi            r10,r10,32
1230         lvx             v25,r11,$sp
1231         addi            r11,r11,32
1232         lvx             v26,r10,$sp
1233         addi            r10,r10,32
1234         lvx             v27,r11,$sp
1235         addi            r11,r11,32
1236         lvx             v28,r10,$sp
1237         addi            r10,r10,32
1238         lvx             v29,r11,$sp
1239         addi            r11,r11,32
1240         lvx             v30,r10,$sp
1241         lvx             v31,r11,$sp
1242         $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1243         $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1244         $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1245         $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1246         $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1247         $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1248         addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1249         blr
1250         .long           0
1251         .byte           0,12,0x04,0,0x80,6,6,0
1252         .long           0
1253 .size   .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1254 ___
1255 }}      }}}
1256
1257 #########################################################################
1258 {{{     # CTR procedure[s]                                              #
1259 my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1260 my ($rndkey0,$rndkey1,$inout,$tmp)=             map("v$_",(0..3));
1261 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1262                                                 map("v$_",(4..11));
1263 my $dat=$tmp;
1264
1265 $code.=<<___;
1266 .globl  .${prefix}_ctr32_encrypt_blocks
1267 .align  5
1268 .${prefix}_ctr32_encrypt_blocks:
1269         ${UCMP}i        $len,1
1270         bltlr-
1271
1272         lis             r0,0xfff0
1273         mfspr           $vrsave,256
1274         mtspr           256,r0
1275
1276         li              $idx,15
1277         vxor            $rndkey0,$rndkey0,$rndkey0
1278         le?vspltisb     $tmp,0x0f
1279
1280         lvx             $ivec,0,$ivp            # load [unaligned] iv
1281         lvsl            $inpperm,0,$ivp
1282         lvx             $inptail,$idx,$ivp
1283          vspltisb       $one,1
1284         le?vxor         $inpperm,$inpperm,$tmp
1285         vperm           $ivec,$ivec,$inptail,$inpperm
1286          vsldoi         $one,$rndkey0,$one,1
1287
1288         neg             r11,$inp
1289         ?lvsl           $keyperm,0,$key         # prepare for unaligned key
1290         lwz             $rounds,240($key)
1291
1292         lvsr            $inpperm,0,r11          # prepare for unaligned load
1293         lvx             $inptail,0,$inp
1294         addi            $inp,$inp,15            # 15 is not typo
1295         le?vxor         $inpperm,$inpperm,$tmp
1296
1297         srwi            $rounds,$rounds,1
1298         li              $idx,16
1299         subi            $rounds,$rounds,1
1300
1301         ${UCMP}i        $len,8
1302         bge             _aesp8_ctr32_encrypt8x
1303
1304         ?lvsr           $outperm,0,$out         # prepare for unaligned store
1305         vspltisb        $outmask,-1
1306         lvx             $outhead,0,$out
1307         ?vperm          $outmask,$rndkey0,$outmask,$outperm
1308         le?vxor         $outperm,$outperm,$tmp
1309
1310         lvx             $rndkey0,0,$key
1311         mtctr           $rounds
1312         lvx             $rndkey1,$idx,$key
1313         addi            $idx,$idx,16
1314         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
1315         vxor            $inout,$ivec,$rndkey0
1316         lvx             $rndkey0,$idx,$key
1317         addi            $idx,$idx,16
1318         b               Loop_ctr32_enc
1319
1320 .align  5
1321 Loop_ctr32_enc:
1322         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
1323         vcipher         $inout,$inout,$rndkey1
1324         lvx             $rndkey1,$idx,$key
1325         addi            $idx,$idx,16
1326         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
1327         vcipher         $inout,$inout,$rndkey0
1328         lvx             $rndkey0,$idx,$key
1329         addi            $idx,$idx,16
1330         bdnz            Loop_ctr32_enc
1331
1332         vadduwm         $ivec,$ivec,$one
1333          vmr            $dat,$inptail
1334          lvx            $inptail,0,$inp
1335          addi           $inp,$inp,16
1336          subic.         $len,$len,1             # blocks--
1337
1338         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
1339         vcipher         $inout,$inout,$rndkey1
1340         lvx             $rndkey1,$idx,$key
1341          vperm          $dat,$dat,$inptail,$inpperm
1342          li             $idx,16
1343         ?vperm          $rndkey1,$rndkey0,$rndkey1,$keyperm
1344          lvx            $rndkey0,0,$key
1345         vxor            $dat,$dat,$rndkey1      # last round key
1346         vcipherlast     $inout,$inout,$dat
1347
1348          lvx            $rndkey1,$idx,$key
1349          addi           $idx,$idx,16
1350         vperm           $inout,$inout,$inout,$outperm
1351         vsel            $dat,$outhead,$inout,$outmask
1352          mtctr          $rounds
1353          ?vperm         $rndkey0,$rndkey0,$rndkey1,$keyperm
1354         vmr             $outhead,$inout
1355          vxor           $inout,$ivec,$rndkey0
1356          lvx            $rndkey0,$idx,$key
1357          addi           $idx,$idx,16
1358         stvx            $dat,0,$out
1359         addi            $out,$out,16
1360         bne             Loop_ctr32_enc
1361
1362         addi            $out,$out,-1
1363         lvx             $inout,0,$out           # redundant in aligned case
1364         vsel            $inout,$outhead,$inout,$outmask
1365         stvx            $inout,0,$out
1366
1367         mtspr           256,$vrsave
1368         blr
1369         .long           0
1370         .byte           0,12,0x14,0,0,0,6,0
1371         .long           0
1372 ___
1373 #########################################################################
1374 {{      # Optimized CTR procedure                                       #
1375 my $key_="r11";
1376 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1377     $x00=0 if ($flavour =~ /osx/);
1378 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1379 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1380 my $rndkey0="v23";      # v24-v25 rotating buffer for first found keys
1381                         # v26-v31 last 6 round keys
1382 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1383 my ($two,$three,$four)=($outhead,$outperm,$outmask);
1384
1385 $code.=<<___;
1386 .align  5
1387 _aesp8_ctr32_encrypt8x:
1388         $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1389         li              r10,`$FRAME+8*16+15`
1390         li              r11,`$FRAME+8*16+31`
1391         stvx            v20,r10,$sp             # ABI says so
1392         addi            r10,r10,32
1393         stvx            v21,r11,$sp
1394         addi            r11,r11,32
1395         stvx            v22,r10,$sp
1396         addi            r10,r10,32
1397         stvx            v23,r11,$sp
1398         addi            r11,r11,32
1399         stvx            v24,r10,$sp
1400         addi            r10,r10,32
1401         stvx            v25,r11,$sp
1402         addi            r11,r11,32
1403         stvx            v26,r10,$sp
1404         addi            r10,r10,32
1405         stvx            v27,r11,$sp
1406         addi            r11,r11,32
1407         stvx            v28,r10,$sp
1408         addi            r10,r10,32
1409         stvx            v29,r11,$sp
1410         addi            r11,r11,32
1411         stvx            v30,r10,$sp
1412         stvx            v31,r11,$sp
1413         li              r0,-1
1414         stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
1415         li              $x10,0x10
1416         $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1417         li              $x20,0x20
1418         $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1419         li              $x30,0x30
1420         $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1421         li              $x40,0x40
1422         $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1423         li              $x50,0x50
1424         $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1425         li              $x60,0x60
1426         $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1427         li              $x70,0x70
1428         mtspr           256,r0
1429
1430         subi            $rounds,$rounds,3       # -4 in total
1431
1432         lvx             $rndkey0,$x00,$key      # load key schedule
1433         lvx             v30,$x10,$key
1434         addi            $key,$key,0x20
1435         lvx             v31,$x00,$key
1436         ?vperm          $rndkey0,$rndkey0,v30,$keyperm
1437         addi            $key_,$sp,$FRAME+15
1438         mtctr           $rounds
1439
1440 Load_ctr32_enc_key:
1441         ?vperm          v24,v30,v31,$keyperm
1442         lvx             v30,$x10,$key
1443         addi            $key,$key,0x20
1444         stvx            v24,$x00,$key_          # off-load round[1]
1445         ?vperm          v25,v31,v30,$keyperm
1446         lvx             v31,$x00,$key
1447         stvx            v25,$x10,$key_          # off-load round[2]
1448         addi            $key_,$key_,0x20
1449         bdnz            Load_ctr32_enc_key
1450
1451         lvx             v26,$x10,$key
1452         ?vperm          v24,v30,v31,$keyperm
1453         lvx             v27,$x20,$key
1454         stvx            v24,$x00,$key_          # off-load round[3]
1455         ?vperm          v25,v31,v26,$keyperm
1456         lvx             v28,$x30,$key
1457         stvx            v25,$x10,$key_          # off-load round[4]
1458         addi            $key_,$sp,$FRAME+15     # rewind $key_
1459         ?vperm          v26,v26,v27,$keyperm
1460         lvx             v29,$x40,$key
1461         ?vperm          v27,v27,v28,$keyperm
1462         lvx             v30,$x50,$key
1463         ?vperm          v28,v28,v29,$keyperm
1464         lvx             v31,$x60,$key
1465         ?vperm          v29,v29,v30,$keyperm
1466         lvx             $out0,$x70,$key         # borrow $out0
1467         ?vperm          v30,v30,v31,$keyperm
1468         lvx             v24,$x00,$key_          # pre-load round[1]
1469         ?vperm          v31,v31,$out0,$keyperm
1470         lvx             v25,$x10,$key_          # pre-load round[2]
1471
1472         vadduwm         $two,$one,$one
1473         subi            $inp,$inp,15            # undo "caller"
1474         $SHL            $len,$len,4
1475
1476         vadduwm         $out1,$ivec,$one        # counter values ...
1477         vadduwm         $out2,$ivec,$two
1478         vxor            $out0,$ivec,$rndkey0    # ... xored with rndkey[0]
1479          le?li          $idx,8
1480         vadduwm         $out3,$out1,$two
1481         vxor            $out1,$out1,$rndkey0
1482          le?lvsl        $inpperm,0,$idx
1483         vadduwm         $out4,$out2,$two
1484         vxor            $out2,$out2,$rndkey0
1485          le?vspltisb    $tmp,0x0f
1486         vadduwm         $out5,$out3,$two
1487         vxor            $out3,$out3,$rndkey0
1488          le?vxor        $inpperm,$inpperm,$tmp  # transform for lvx_u/stvx_u
1489         vadduwm         $out6,$out4,$two
1490         vxor            $out4,$out4,$rndkey0
1491         vadduwm         $out7,$out5,$two
1492         vxor            $out5,$out5,$rndkey0
1493         vadduwm         $ivec,$out6,$two        # next counter value
1494         vxor            $out6,$out6,$rndkey0
1495         vxor            $out7,$out7,$rndkey0
1496
1497         mtctr           $rounds
1498         b               Loop_ctr32_enc8x
1499 .align  5
1500 Loop_ctr32_enc8x:
1501         vcipher         $out0,$out0,v24
1502         vcipher         $out1,$out1,v24
1503         vcipher         $out2,$out2,v24
1504         vcipher         $out3,$out3,v24
1505         vcipher         $out4,$out4,v24
1506         vcipher         $out5,$out5,v24
1507         vcipher         $out6,$out6,v24
1508         vcipher         $out7,$out7,v24
1509 Loop_ctr32_enc8x_middle:
1510         lvx             v24,$x20,$key_          # round[3]
1511         addi            $key_,$key_,0x20
1512
1513         vcipher         $out0,$out0,v25
1514         vcipher         $out1,$out1,v25
1515         vcipher         $out2,$out2,v25
1516         vcipher         $out3,$out3,v25
1517         vcipher         $out4,$out4,v25
1518         vcipher         $out5,$out5,v25
1519         vcipher         $out6,$out6,v25
1520         vcipher         $out7,$out7,v25
1521         lvx             v25,$x10,$key_          # round[4]
1522         bdnz            Loop_ctr32_enc8x
1523
1524         subic           r11,$len,256            # $len-256, borrow $key_
1525         vcipher         $out0,$out0,v24
1526         vcipher         $out1,$out1,v24
1527         vcipher         $out2,$out2,v24
1528         vcipher         $out3,$out3,v24
1529         vcipher         $out4,$out4,v24
1530         vcipher         $out5,$out5,v24
1531         vcipher         $out6,$out6,v24
1532         vcipher         $out7,$out7,v24
1533
1534         subfe           r0,r0,r0                # borrow?-1:0
1535         vcipher         $out0,$out0,v25
1536         vcipher         $out1,$out1,v25
1537         vcipher         $out2,$out2,v25
1538         vcipher         $out3,$out3,v25
1539         vcipher         $out4,$out4,v25
1540         vcipher         $out5,$out5,v25
1541         vcipher         $out6,$out6,v25
1542         vcipher         $out7,$out7,v25
1543
1544         and             r0,r0,r11
1545         addi            $key_,$sp,$FRAME+15     # rewind $key_
1546         vcipher         $out0,$out0,v26
1547         vcipher         $out1,$out1,v26
1548         vcipher         $out2,$out2,v26
1549         vcipher         $out3,$out3,v26
1550         vcipher         $out4,$out4,v26
1551         vcipher         $out5,$out5,v26
1552         vcipher         $out6,$out6,v26
1553         vcipher         $out7,$out7,v26
1554         lvx             v24,$x00,$key_          # re-pre-load round[1]
1555
1556         subic           $len,$len,129           # $len-=129
1557         vcipher         $out0,$out0,v27
1558         addi            $len,$len,1             # $len-=128 really
1559         vcipher         $out1,$out1,v27
1560         vcipher         $out2,$out2,v27
1561         vcipher         $out3,$out3,v27
1562         vcipher         $out4,$out4,v27
1563         vcipher         $out5,$out5,v27
1564         vcipher         $out6,$out6,v27
1565         vcipher         $out7,$out7,v27
1566         lvx             v25,$x10,$key_          # re-pre-load round[2]
1567
1568         vcipher         $out0,$out0,v28
1569          lvx_u          $in0,$x00,$inp          # load input
1570         vcipher         $out1,$out1,v28
1571          lvx_u          $in1,$x10,$inp
1572         vcipher         $out2,$out2,v28
1573          lvx_u          $in2,$x20,$inp
1574         vcipher         $out3,$out3,v28
1575          lvx_u          $in3,$x30,$inp
1576         vcipher         $out4,$out4,v28
1577          lvx_u          $in4,$x40,$inp
1578         vcipher         $out5,$out5,v28
1579          lvx_u          $in5,$x50,$inp
1580         vcipher         $out6,$out6,v28
1581          lvx_u          $in6,$x60,$inp
1582         vcipher         $out7,$out7,v28
1583          lvx_u          $in7,$x70,$inp
1584          addi           $inp,$inp,0x80
1585
1586         vcipher         $out0,$out0,v29
1587          le?vperm       $in0,$in0,$in0,$inpperm
1588         vcipher         $out1,$out1,v29
1589          le?vperm       $in1,$in1,$in1,$inpperm
1590         vcipher         $out2,$out2,v29
1591          le?vperm       $in2,$in2,$in2,$inpperm
1592         vcipher         $out3,$out3,v29
1593          le?vperm       $in3,$in3,$in3,$inpperm
1594         vcipher         $out4,$out4,v29
1595          le?vperm       $in4,$in4,$in4,$inpperm
1596         vcipher         $out5,$out5,v29
1597          le?vperm       $in5,$in5,$in5,$inpperm
1598         vcipher         $out6,$out6,v29
1599          le?vperm       $in6,$in6,$in6,$inpperm
1600         vcipher         $out7,$out7,v29
1601          le?vperm       $in7,$in7,$in7,$inpperm
1602
1603         add             $inp,$inp,r0            # $inp is adjusted in such
1604                                                 # way that at exit from the
1605                                                 # loop inX-in7 are loaded
1606                                                 # with last "words"
1607         subfe.          r0,r0,r0                # borrow?-1:0
1608         vcipher         $out0,$out0,v30
1609          vxor           $in0,$in0,v31           # xor with last round key
1610         vcipher         $out1,$out1,v30
1611          vxor           $in1,$in1,v31
1612         vcipher         $out2,$out2,v30
1613          vxor           $in2,$in2,v31
1614         vcipher         $out3,$out3,v30
1615          vxor           $in3,$in3,v31
1616         vcipher         $out4,$out4,v30
1617          vxor           $in4,$in4,v31
1618         vcipher         $out5,$out5,v30
1619          vxor           $in5,$in5,v31
1620         vcipher         $out6,$out6,v30
1621          vxor           $in6,$in6,v31
1622         vcipher         $out7,$out7,v30
1623          vxor           $in7,$in7,v31
1624
1625         bne             Lctr32_enc8x_break      # did $len-129 borrow?
1626
1627         vcipherlast     $in0,$out0,$in0
1628         vcipherlast     $in1,$out1,$in1
1629          vadduwm        $out1,$ivec,$one        # counter values ...
1630         vcipherlast     $in2,$out2,$in2
1631          vadduwm        $out2,$ivec,$two
1632          vxor           $out0,$ivec,$rndkey0    # ... xored with rndkey[0]
1633         vcipherlast     $in3,$out3,$in3
1634          vadduwm        $out3,$out1,$two
1635          vxor           $out1,$out1,$rndkey0
1636         vcipherlast     $in4,$out4,$in4
1637          vadduwm        $out4,$out2,$two
1638          vxor           $out2,$out2,$rndkey0
1639         vcipherlast     $in5,$out5,$in5
1640          vadduwm        $out5,$out3,$two
1641          vxor           $out3,$out3,$rndkey0
1642         vcipherlast     $in6,$out6,$in6
1643          vadduwm        $out6,$out4,$two
1644          vxor           $out4,$out4,$rndkey0
1645         vcipherlast     $in7,$out7,$in7
1646          vadduwm        $out7,$out5,$two
1647          vxor           $out5,$out5,$rndkey0
1648         le?vperm        $in0,$in0,$in0,$inpperm
1649          vadduwm        $ivec,$out6,$two        # next counter value
1650          vxor           $out6,$out6,$rndkey0
1651         le?vperm        $in1,$in1,$in1,$inpperm
1652          vxor           $out7,$out7,$rndkey0
1653         mtctr           $rounds
1654
1655          vcipher        $out0,$out0,v24
1656         stvx_u          $in0,$x00,$out
1657         le?vperm        $in2,$in2,$in2,$inpperm
1658          vcipher        $out1,$out1,v24
1659         stvx_u          $in1,$x10,$out
1660         le?vperm        $in3,$in3,$in3,$inpperm
1661          vcipher        $out2,$out2,v24
1662         stvx_u          $in2,$x20,$out
1663         le?vperm        $in4,$in4,$in4,$inpperm
1664          vcipher        $out3,$out3,v24
1665         stvx_u          $in3,$x30,$out
1666         le?vperm        $in5,$in5,$in5,$inpperm
1667          vcipher        $out4,$out4,v24
1668         stvx_u          $in4,$x40,$out
1669         le?vperm        $in6,$in6,$in6,$inpperm
1670          vcipher        $out5,$out5,v24
1671         stvx_u          $in5,$x50,$out
1672         le?vperm        $in7,$in7,$in7,$inpperm
1673          vcipher        $out6,$out6,v24
1674         stvx_u          $in6,$x60,$out
1675          vcipher        $out7,$out7,v24
1676         stvx_u          $in7,$x70,$out
1677         addi            $out,$out,0x80
1678
1679         b               Loop_ctr32_enc8x_middle
1680
1681 .align  5
1682 Lctr32_enc8x_break:
1683         cmpwi           $len,-0x60
1684         blt             Lctr32_enc8x_one
1685         nop
1686         beq             Lctr32_enc8x_two
1687         cmpwi           $len,-0x40
1688         blt             Lctr32_enc8x_three
1689         nop
1690         beq             Lctr32_enc8x_four
1691         cmpwi           $len,-0x20
1692         blt             Lctr32_enc8x_five
1693         nop
1694         beq             Lctr32_enc8x_six
1695         cmpwi           $len,0x00
1696         blt             Lctr32_enc8x_seven
1697
1698 Lctr32_enc8x_eight:
1699         vcipherlast     $out0,$out0,$in0
1700         vcipherlast     $out1,$out1,$in1
1701         vcipherlast     $out2,$out2,$in2
1702         vcipherlast     $out3,$out3,$in3
1703         vcipherlast     $out4,$out4,$in4
1704         vcipherlast     $out5,$out5,$in5
1705         vcipherlast     $out6,$out6,$in6
1706         vcipherlast     $out7,$out7,$in7
1707
1708         le?vperm        $out0,$out0,$out0,$inpperm
1709         le?vperm        $out1,$out1,$out1,$inpperm
1710         stvx_u          $out0,$x00,$out
1711         le?vperm        $out2,$out2,$out2,$inpperm
1712         stvx_u          $out1,$x10,$out
1713         le?vperm        $out3,$out3,$out3,$inpperm
1714         stvx_u          $out2,$x20,$out
1715         le?vperm        $out4,$out4,$out4,$inpperm
1716         stvx_u          $out3,$x30,$out
1717         le?vperm        $out5,$out5,$out5,$inpperm
1718         stvx_u          $out4,$x40,$out
1719         le?vperm        $out6,$out6,$out6,$inpperm
1720         stvx_u          $out5,$x50,$out
1721         le?vperm        $out7,$out7,$out7,$inpperm
1722         stvx_u          $out6,$x60,$out
1723         stvx_u          $out7,$x70,$out
1724         addi            $out,$out,0x80
1725         b               Lctr32_enc8x_done
1726
1727 .align  5
1728 Lctr32_enc8x_seven:
1729         vcipherlast     $out0,$out0,$in1
1730         vcipherlast     $out1,$out1,$in2
1731         vcipherlast     $out2,$out2,$in3
1732         vcipherlast     $out3,$out3,$in4
1733         vcipherlast     $out4,$out4,$in5
1734         vcipherlast     $out5,$out5,$in6
1735         vcipherlast     $out6,$out6,$in7
1736
1737         le?vperm        $out0,$out0,$out0,$inpperm
1738         le?vperm        $out1,$out1,$out1,$inpperm
1739         stvx_u          $out0,$x00,$out
1740         le?vperm        $out2,$out2,$out2,$inpperm
1741         stvx_u          $out1,$x10,$out
1742         le?vperm        $out3,$out3,$out3,$inpperm
1743         stvx_u          $out2,$x20,$out
1744         le?vperm        $out4,$out4,$out4,$inpperm
1745         stvx_u          $out3,$x30,$out
1746         le?vperm        $out5,$out5,$out5,$inpperm
1747         stvx_u          $out4,$x40,$out
1748         le?vperm        $out6,$out6,$out6,$inpperm
1749         stvx_u          $out5,$x50,$out
1750         stvx_u          $out6,$x60,$out
1751         addi            $out,$out,0x70
1752         b               Lctr32_enc8x_done
1753
1754 .align  5
1755 Lctr32_enc8x_six:
1756         vcipherlast     $out0,$out0,$in2
1757         vcipherlast     $out1,$out1,$in3
1758         vcipherlast     $out2,$out2,$in4
1759         vcipherlast     $out3,$out3,$in5
1760         vcipherlast     $out4,$out4,$in6
1761         vcipherlast     $out5,$out5,$in7
1762
1763         le?vperm        $out0,$out0,$out0,$inpperm
1764         le?vperm        $out1,$out1,$out1,$inpperm
1765         stvx_u          $out0,$x00,$out
1766         le?vperm        $out2,$out2,$out2,$inpperm
1767         stvx_u          $out1,$x10,$out
1768         le?vperm        $out3,$out3,$out3,$inpperm
1769         stvx_u          $out2,$x20,$out
1770         le?vperm        $out4,$out4,$out4,$inpperm
1771         stvx_u          $out3,$x30,$out
1772         le?vperm        $out5,$out5,$out5,$inpperm
1773         stvx_u          $out4,$x40,$out
1774         stvx_u          $out5,$x50,$out
1775         addi            $out,$out,0x60
1776         b               Lctr32_enc8x_done
1777
1778 .align  5
1779 Lctr32_enc8x_five:
1780         vcipherlast     $out0,$out0,$in3
1781         vcipherlast     $out1,$out1,$in4
1782         vcipherlast     $out2,$out2,$in5
1783         vcipherlast     $out3,$out3,$in6
1784         vcipherlast     $out4,$out4,$in7
1785
1786         le?vperm        $out0,$out0,$out0,$inpperm
1787         le?vperm        $out1,$out1,$out1,$inpperm
1788         stvx_u          $out0,$x00,$out
1789         le?vperm        $out2,$out2,$out2,$inpperm
1790         stvx_u          $out1,$x10,$out
1791         le?vperm        $out3,$out3,$out3,$inpperm
1792         stvx_u          $out2,$x20,$out
1793         le?vperm        $out4,$out4,$out4,$inpperm
1794         stvx_u          $out3,$x30,$out
1795         stvx_u          $out4,$x40,$out
1796         addi            $out,$out,0x50
1797         b               Lctr32_enc8x_done
1798
1799 .align  5
1800 Lctr32_enc8x_four:
1801         vcipherlast     $out0,$out0,$in4
1802         vcipherlast     $out1,$out1,$in5
1803         vcipherlast     $out2,$out2,$in6
1804         vcipherlast     $out3,$out3,$in7
1805
1806         le?vperm        $out0,$out0,$out0,$inpperm
1807         le?vperm        $out1,$out1,$out1,$inpperm
1808         stvx_u          $out0,$x00,$out
1809         le?vperm        $out2,$out2,$out2,$inpperm
1810         stvx_u          $out1,$x10,$out
1811         le?vperm        $out3,$out3,$out3,$inpperm
1812         stvx_u          $out2,$x20,$out
1813         stvx_u          $out3,$x30,$out
1814         addi            $out,$out,0x40
1815         b               Lctr32_enc8x_done
1816
1817 .align  5
1818 Lctr32_enc8x_three:
1819         vcipherlast     $out0,$out0,$in5
1820         vcipherlast     $out1,$out1,$in6
1821         vcipherlast     $out2,$out2,$in7
1822
1823         le?vperm        $out0,$out0,$out0,$inpperm
1824         le?vperm        $out1,$out1,$out1,$inpperm
1825         stvx_u          $out0,$x00,$out
1826         le?vperm        $out2,$out2,$out2,$inpperm
1827         stvx_u          $out1,$x10,$out
1828         stvx_u          $out2,$x20,$out
1829         addi            $out,$out,0x30
1830         b               Lcbc_dec8x_done
1831
1832 .align  5
1833 Lctr32_enc8x_two:
1834         vcipherlast     $out0,$out0,$in6
1835         vcipherlast     $out1,$out1,$in7
1836
1837         le?vperm        $out0,$out0,$out0,$inpperm
1838         le?vperm        $out1,$out1,$out1,$inpperm
1839         stvx_u          $out0,$x00,$out
1840         stvx_u          $out1,$x10,$out
1841         addi            $out,$out,0x20
1842         b               Lcbc_dec8x_done
1843
1844 .align  5
1845 Lctr32_enc8x_one:
1846         vcipherlast     $out0,$out0,$in7
1847
1848         le?vperm        $out0,$out0,$out0,$inpperm
1849         stvx_u          $out0,0,$out
1850         addi            $out,$out,0x10
1851
1852 Lctr32_enc8x_done:
1853         li              r10,`$FRAME+15`
1854         li              r11,`$FRAME+31`
1855         stvx            $inpperm,r10,$sp        # wipe copies of round keys
1856         addi            r10,r10,32
1857         stvx            $inpperm,r11,$sp
1858         addi            r11,r11,32
1859         stvx            $inpperm,r10,$sp
1860         addi            r10,r10,32
1861         stvx            $inpperm,r11,$sp
1862         addi            r11,r11,32
1863         stvx            $inpperm,r10,$sp
1864         addi            r10,r10,32
1865         stvx            $inpperm,r11,$sp
1866         addi            r11,r11,32
1867         stvx            $inpperm,r10,$sp
1868         addi            r10,r10,32
1869         stvx            $inpperm,r11,$sp
1870         addi            r11,r11,32
1871
1872         mtspr           256,$vrsave
1873         lvx             v20,r10,$sp             # ABI says so
1874         addi            r10,r10,32
1875         lvx             v21,r11,$sp
1876         addi            r11,r11,32
1877         lvx             v22,r10,$sp
1878         addi            r10,r10,32
1879         lvx             v23,r11,$sp
1880         addi            r11,r11,32
1881         lvx             v24,r10,$sp
1882         addi            r10,r10,32
1883         lvx             v25,r11,$sp
1884         addi            r11,r11,32
1885         lvx             v26,r10,$sp
1886         addi            r10,r10,32
1887         lvx             v27,r11,$sp
1888         addi            r11,r11,32
1889         lvx             v28,r10,$sp
1890         addi            r10,r10,32
1891         lvx             v29,r11,$sp
1892         addi            r11,r11,32
1893         lvx             v30,r10,$sp
1894         lvx             v31,r11,$sp
1895         $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1896         $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1897         $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1898         $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1899         $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1900         $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1901         addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1902         blr
1903         .long           0
1904         .byte           0,12,0x04,0,0x80,6,6,0
1905         .long           0
1906 .size   .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
1907 ___
1908 }}      }}}
1909
1910 #########################################################################
1911 {{{     # XTS procedures                                                #
1912 # int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len,   #
1913 #                             const AES_KEY *key1, const AES_KEY *key2, #
1914 #                             [const] unsigned char iv[16]);            #
1915 # If $key2 is NULL, then a "tweak chaining" mode is engaged, in which   #
1916 # input tweak value is assumed to be encrypted already, and last tweak  #
1917 # value, one suitable for consecutive call on same chunk of data, is    #
1918 # written back to original buffer. In addition, in "tweak chaining"     #
1919 # mode only complete input blocks are processed.                        #
1920
1921 my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) =     map("r$_",(3..10));
1922 my ($rndkey0,$rndkey1,$inout) =                         map("v$_",(0..2));
1923 my ($output,$inptail,$inpperm,$leperm,$keyperm) =       map("v$_",(3..7));
1924 my ($tweak,$seven,$eighty7,$tmp,$tweak1) =              map("v$_",(8..12));
1925 my $taillen = $key2;
1926
1927    ($inp,$idx) = ($idx,$inp);                           # reassign
1928
1929 $code.=<<___;
1930 .globl  .${prefix}_xts_encrypt
1931 .align  5
1932 .${prefix}_xts_encrypt:
1933         mr              $inp,r3                         # reassign
1934         li              r3,-1
1935         ${UCMP}i        $len,16
1936         bltlr-
1937
1938         lis             r0,0xfff0
1939         mfspr           r12,256                         # save vrsave
1940         li              r11,0
1941         mtspr           256,r0
1942
1943         vspltisb        $seven,0x07                     # 0x070707..07
1944         le?lvsl         $leperm,r11,r11
1945         le?vspltisb     $tmp,0x0f
1946         le?vxor         $leperm,$leperm,$seven
1947
1948         li              $idx,15
1949         lvx             $tweak,0,$ivp                   # load [unaligned] iv
1950         lvsl            $inpperm,0,$ivp
1951         lvx             $inptail,$idx,$ivp
1952         le?vxor         $inpperm,$inpperm,$tmp
1953         vperm           $tweak,$tweak,$inptail,$inpperm
1954
1955         neg             r11,$inp
1956         lvsr            $inpperm,0,r11                  # prepare for unaligned load
1957         lvx             $inout,0,$inp
1958         addi            $inp,$inp,15                    # 15 is not typo
1959         le?vxor         $inpperm,$inpperm,$tmp
1960
1961         ${UCMP}i        $key2,0                         # key2==NULL?
1962         beq             Lxts_enc_no_key2
1963
1964         ?lvsl           $keyperm,0,$key2                # prepare for unaligned key
1965         lwz             $rounds,240($key2)
1966         srwi            $rounds,$rounds,1
1967         subi            $rounds,$rounds,1
1968         li              $idx,16
1969
1970         lvx             $rndkey0,0,$key2
1971         lvx             $rndkey1,$idx,$key2
1972         addi            $idx,$idx,16
1973         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
1974         vxor            $tweak,$tweak,$rndkey0
1975         lvx             $rndkey0,$idx,$key2
1976         addi            $idx,$idx,16
1977         mtctr           $rounds
1978
1979 Ltweak_xts_enc:
1980         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
1981         vcipher         $tweak,$tweak,$rndkey1
1982         lvx             $rndkey1,$idx,$key2
1983         addi            $idx,$idx,16
1984         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
1985         vcipher         $tweak,$tweak,$rndkey0
1986         lvx             $rndkey0,$idx,$key2
1987         addi            $idx,$idx,16
1988         bdnz            Ltweak_xts_enc
1989
1990         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
1991         vcipher         $tweak,$tweak,$rndkey1
1992         lvx             $rndkey1,$idx,$key2
1993         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
1994         vcipherlast     $tweak,$tweak,$rndkey0
1995
1996         li              $ivp,0                          # don't chain the tweak
1997         b               Lxts_enc
1998
1999 Lxts_enc_no_key2:
2000         li              $idx,-16
2001         and             $len,$len,$idx                  # in "tweak chaining"
2002                                                         # mode only complete
2003                                                         # blocks are processed
2004 Lxts_enc:
2005         lvx             $inptail,0,$inp
2006         addi            $inp,$inp,16
2007
2008         ?lvsl           $keyperm,0,$key1                # prepare for unaligned key
2009         lwz             $rounds,240($key1)
2010         srwi            $rounds,$rounds,1
2011         subi            $rounds,$rounds,1
2012         li              $idx,16
2013
2014         vslb            $eighty7,$seven,$seven          # 0x808080..80
2015         vor             $eighty7,$eighty7,$seven        # 0x878787..87
2016         vspltisb        $tmp,1                          # 0x010101..01
2017         vsldoi          $eighty7,$eighty7,$tmp,15       # 0x870101..01
2018
2019         ${UCMP}i        $len,96
2020         bge             _aesp8_xts_encrypt6x
2021
2022         andi.           $taillen,$len,15
2023         subic           r0,$len,32
2024         subi            $taillen,$taillen,16
2025         subfe           r0,r0,r0
2026         and             r0,r0,$taillen
2027         add             $inp,$inp,r0
2028
2029         lvx             $rndkey0,0,$key1
2030         lvx             $rndkey1,$idx,$key1
2031         addi            $idx,$idx,16
2032         vperm           $inout,$inout,$inptail,$inpperm
2033         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2034         vxor            $inout,$inout,$tweak
2035         vxor            $inout,$inout,$rndkey0
2036         lvx             $rndkey0,$idx,$key1
2037         addi            $idx,$idx,16
2038         mtctr           $rounds
2039         b               Loop_xts_enc
2040
2041 .align  5
2042 Loop_xts_enc:
2043         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2044         vcipher         $inout,$inout,$rndkey1
2045         lvx             $rndkey1,$idx,$key1
2046         addi            $idx,$idx,16
2047         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2048         vcipher         $inout,$inout,$rndkey0
2049         lvx             $rndkey0,$idx,$key1
2050         addi            $idx,$idx,16
2051         bdnz            Loop_xts_enc
2052
2053         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2054         vcipher         $inout,$inout,$rndkey1
2055         lvx             $rndkey1,$idx,$key1
2056         li              $idx,16
2057         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2058         vxor            $rndkey0,$rndkey0,$tweak
2059         vcipherlast     $output,$inout,$rndkey0
2060
2061         le?vperm        $tmp,$output,$output,$leperm
2062         be?nop
2063         le?stvx_u       $tmp,0,$out
2064         be?stvx_u       $output,0,$out
2065         addi            $out,$out,16
2066
2067         subic.          $len,$len,16
2068         beq             Lxts_enc_done
2069
2070         vmr             $inout,$inptail
2071         lvx             $inptail,0,$inp
2072         addi            $inp,$inp,16
2073         lvx             $rndkey0,0,$key1
2074         lvx             $rndkey1,$idx,$key1
2075         addi            $idx,$idx,16
2076
2077         subic           r0,$len,32
2078         subfe           r0,r0,r0
2079         and             r0,r0,$taillen
2080         add             $inp,$inp,r0
2081
2082         vsrab           $tmp,$tweak,$seven              # next tweak value
2083         vaddubm         $tweak,$tweak,$tweak
2084         vsldoi          $tmp,$tmp,$tmp,15
2085         vand            $tmp,$tmp,$eighty7
2086         vxor            $tweak,$tweak,$tmp
2087
2088         vperm           $inout,$inout,$inptail,$inpperm
2089         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2090         vxor            $inout,$inout,$tweak
2091         vxor            $output,$output,$rndkey0        # just in case $len<16
2092         vxor            $inout,$inout,$rndkey0
2093         lvx             $rndkey0,$idx,$key1
2094         addi            $idx,$idx,16
2095
2096         mtctr           $rounds
2097         ${UCMP}i        $len,16
2098         bge             Loop_xts_enc
2099
2100         vxor            $output,$output,$tweak
2101         lvsr            $inpperm,0,$len                 # $inpperm is no longer needed
2102         vxor            $inptail,$inptail,$inptail      # $inptail is no longer needed
2103         vspltisb        $tmp,-1
2104         vperm           $inptail,$inptail,$tmp,$inpperm
2105         vsel            $inout,$inout,$output,$inptail
2106
2107         subi            r11,$out,17
2108         subi            $out,$out,16
2109         mtctr           $len
2110         li              $len,16
2111 Loop_xts_enc_steal:
2112         lbzu            r0,1(r11)
2113         stb             r0,16(r11)
2114         bdnz            Loop_xts_enc_steal
2115
2116         mtctr           $rounds
2117         b               Loop_xts_enc                    # one more time...
2118
2119 Lxts_enc_done:
2120         ${UCMP}i        $ivp,0
2121         beq             Lxts_enc_ret
2122
2123         vsrab           $tmp,$tweak,$seven              # next tweak value
2124         vaddubm         $tweak,$tweak,$tweak
2125         vsldoi          $tmp,$tmp,$tmp,15
2126         vand            $tmp,$tmp,$eighty7
2127         vxor            $tweak,$tweak,$tmp
2128
2129         le?vperm        $tweak,$tweak,$tweak,$leperm
2130         stvx_u          $tweak,0,$ivp
2131
2132 Lxts_enc_ret:
2133         mtspr           256,r12                         # restore vrsave
2134         li              r3,0
2135         blr
2136         .long           0
2137         .byte           0,12,0x04,0,0x80,6,6,0
2138         .long           0
2139 .size   .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
2140
2141 .globl  .${prefix}_xts_decrypt
2142 .align  5
2143 .${prefix}_xts_decrypt:
2144         mr              $inp,r3                         # reassign
2145         li              r3,-1
2146         ${UCMP}i        $len,16
2147         bltlr-
2148
2149         lis             r0,0xfff8
2150         mfspr           r12,256                         # save vrsave
2151         li              r11,0
2152         mtspr           256,r0
2153
2154         andi.           r0,$len,15
2155         neg             r0,r0
2156         andi.           r0,r0,16
2157         sub             $len,$len,r0
2158
2159         vspltisb        $seven,0x07                     # 0x070707..07
2160         le?lvsl         $leperm,r11,r11
2161         le?vspltisb     $tmp,0x0f
2162         le?vxor         $leperm,$leperm,$seven
2163
2164         li              $idx,15
2165         lvx             $tweak,0,$ivp                   # load [unaligned] iv
2166         lvsl            $inpperm,0,$ivp
2167         lvx             $inptail,$idx,$ivp
2168         le?vxor         $inpperm,$inpperm,$tmp
2169         vperm           $tweak,$tweak,$inptail,$inpperm
2170
2171         neg             r11,$inp
2172         lvsr            $inpperm,0,r11                  # prepare for unaligned load
2173         lvx             $inout,0,$inp
2174         addi            $inp,$inp,15                    # 15 is not typo
2175         le?vxor         $inpperm,$inpperm,$tmp
2176
2177         ${UCMP}i        $key2,0                         # key2==NULL?
2178         beq             Lxts_dec_no_key2
2179
2180         ?lvsl           $keyperm,0,$key2                # prepare for unaligned key
2181         lwz             $rounds,240($key2)
2182         srwi            $rounds,$rounds,1
2183         subi            $rounds,$rounds,1
2184         li              $idx,16
2185
2186         lvx             $rndkey0,0,$key2
2187         lvx             $rndkey1,$idx,$key2
2188         addi            $idx,$idx,16
2189         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2190         vxor            $tweak,$tweak,$rndkey0
2191         lvx             $rndkey0,$idx,$key2
2192         addi            $idx,$idx,16
2193         mtctr           $rounds
2194
2195 Ltweak_xts_dec:
2196         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2197         vcipher         $tweak,$tweak,$rndkey1
2198         lvx             $rndkey1,$idx,$key2
2199         addi            $idx,$idx,16
2200         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2201         vcipher         $tweak,$tweak,$rndkey0
2202         lvx             $rndkey0,$idx,$key2
2203         addi            $idx,$idx,16
2204         bdnz            Ltweak_xts_dec
2205
2206         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2207         vcipher         $tweak,$tweak,$rndkey1
2208         lvx             $rndkey1,$idx,$key2
2209         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2210         vcipherlast     $tweak,$tweak,$rndkey0
2211
2212         li              $ivp,0                          # don't chain the tweak
2213         b               Lxts_dec
2214
2215 Lxts_dec_no_key2:
2216         neg             $idx,$len
2217         andi.           $idx,$idx,15
2218         add             $len,$len,$idx                  # in "tweak chaining"
2219                                                         # mode only complete
2220                                                         # blocks are processed
2221 Lxts_dec:
2222         lvx             $inptail,0,$inp
2223         addi            $inp,$inp,16
2224
2225         ?lvsl           $keyperm,0,$key1                # prepare for unaligned key
2226         lwz             $rounds,240($key1)
2227         srwi            $rounds,$rounds,1
2228         subi            $rounds,$rounds,1
2229         li              $idx,16
2230
2231         vslb            $eighty7,$seven,$seven          # 0x808080..80
2232         vor             $eighty7,$eighty7,$seven        # 0x878787..87
2233         vspltisb        $tmp,1                          # 0x010101..01
2234         vsldoi          $eighty7,$eighty7,$tmp,15       # 0x870101..01
2235
2236         ${UCMP}i        $len,96
2237         bge             _aesp8_xts_decrypt6x
2238
2239         lvx             $rndkey0,0,$key1
2240         lvx             $rndkey1,$idx,$key1
2241         addi            $idx,$idx,16
2242         vperm           $inout,$inout,$inptail,$inpperm
2243         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2244         vxor            $inout,$inout,$tweak
2245         vxor            $inout,$inout,$rndkey0
2246         lvx             $rndkey0,$idx,$key1
2247         addi            $idx,$idx,16
2248         mtctr           $rounds
2249
2250         ${UCMP}i        $len,16
2251         blt             Ltail_xts_dec
2252         be?b            Loop_xts_dec
2253
2254 .align  5
2255 Loop_xts_dec:
2256         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2257         vncipher        $inout,$inout,$rndkey1
2258         lvx             $rndkey1,$idx,$key1
2259         addi            $idx,$idx,16
2260         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2261         vncipher        $inout,$inout,$rndkey0
2262         lvx             $rndkey0,$idx,$key1
2263         addi            $idx,$idx,16
2264         bdnz            Loop_xts_dec
2265
2266         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2267         vncipher        $inout,$inout,$rndkey1
2268         lvx             $rndkey1,$idx,$key1
2269         li              $idx,16
2270         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2271         vxor            $rndkey0,$rndkey0,$tweak
2272         vncipherlast    $output,$inout,$rndkey0
2273
2274         le?vperm        $tmp,$output,$output,$leperm
2275         be?nop
2276         le?stvx_u       $tmp,0,$out
2277         be?stvx_u       $output,0,$out
2278         addi            $out,$out,16
2279
2280         subic.          $len,$len,16
2281         beq             Lxts_dec_done
2282
2283         vmr             $inout,$inptail
2284         lvx             $inptail,0,$inp
2285         addi            $inp,$inp,16
2286         lvx             $rndkey0,0,$key1
2287         lvx             $rndkey1,$idx,$key1
2288         addi            $idx,$idx,16
2289
2290         vsrab           $tmp,$tweak,$seven              # next tweak value
2291         vaddubm         $tweak,$tweak,$tweak
2292         vsldoi          $tmp,$tmp,$tmp,15
2293         vand            $tmp,$tmp,$eighty7
2294         vxor            $tweak,$tweak,$tmp
2295
2296         vperm           $inout,$inout,$inptail,$inpperm
2297         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2298         vxor            $inout,$inout,$tweak
2299         vxor            $inout,$inout,$rndkey0
2300         lvx             $rndkey0,$idx,$key1
2301         addi            $idx,$idx,16
2302
2303         mtctr           $rounds
2304         ${UCMP}i        $len,16
2305         bge             Loop_xts_dec
2306
2307 Ltail_xts_dec:
2308         vsrab           $tmp,$tweak,$seven              # next tweak value
2309         vaddubm         $tweak1,$tweak,$tweak
2310         vsldoi          $tmp,$tmp,$tmp,15
2311         vand            $tmp,$tmp,$eighty7
2312         vxor            $tweak1,$tweak1,$tmp
2313
2314         subi            $inp,$inp,16
2315         add             $inp,$inp,$len
2316
2317         vxor            $inout,$inout,$tweak            # :-(
2318         vxor            $inout,$inout,$tweak1           # :-)
2319
2320 Loop_xts_dec_short:
2321         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2322         vncipher        $inout,$inout,$rndkey1
2323         lvx             $rndkey1,$idx,$key1
2324         addi            $idx,$idx,16
2325         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2326         vncipher        $inout,$inout,$rndkey0
2327         lvx             $rndkey0,$idx,$key1
2328         addi            $idx,$idx,16
2329         bdnz            Loop_xts_dec_short
2330
2331         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2332         vncipher        $inout,$inout,$rndkey1
2333         lvx             $rndkey1,$idx,$key1
2334         li              $idx,16
2335         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2336         vxor            $rndkey0,$rndkey0,$tweak1
2337         vncipherlast    $output,$inout,$rndkey0
2338
2339         le?vperm        $tmp,$output,$output,$leperm
2340         be?nop
2341         le?stvx_u       $tmp,0,$out
2342         be?stvx_u       $output,0,$out
2343
2344         vmr             $inout,$inptail
2345         lvx             $inptail,0,$inp
2346         #addi           $inp,$inp,16
2347         lvx             $rndkey0,0,$key1
2348         lvx             $rndkey1,$idx,$key1
2349         addi            $idx,$idx,16
2350         vperm           $inout,$inout,$inptail,$inpperm
2351         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2352
2353         lvsr            $inpperm,0,$len                 # $inpperm is no longer needed
2354         vxor            $inptail,$inptail,$inptail      # $inptail is no longer needed
2355         vspltisb        $tmp,-1
2356         vperm           $inptail,$inptail,$tmp,$inpperm
2357         vsel            $inout,$inout,$output,$inptail
2358
2359         vxor            $rndkey0,$rndkey0,$tweak
2360         vxor            $inout,$inout,$rndkey0
2361         lvx             $rndkey0,$idx,$key1
2362         addi            $idx,$idx,16
2363
2364         subi            r11,$out,1
2365         mtctr           $len
2366         li              $len,16
2367 Loop_xts_dec_steal:
2368         lbzu            r0,1(r11)
2369         stb             r0,16(r11)
2370         bdnz            Loop_xts_dec_steal
2371
2372         mtctr           $rounds
2373         b               Loop_xts_dec                    # one more time...
2374
2375 Lxts_dec_done:
2376         ${UCMP}i        $ivp,0
2377         beq             Lxts_dec_ret
2378
2379         vsrab           $tmp,$tweak,$seven              # next tweak value
2380         vaddubm         $tweak,$tweak,$tweak
2381         vsldoi          $tmp,$tmp,$tmp,15
2382         vand            $tmp,$tmp,$eighty7
2383         vxor            $tweak,$tweak,$tmp
2384
2385         le?vperm        $tweak,$tweak,$tweak,$leperm
2386         stvx_u          $tweak,0,$ivp
2387
2388 Lxts_dec_ret:
2389         mtspr           256,r12                         # restore vrsave
2390         li              r3,0
2391         blr
2392         .long           0
2393         .byte           0,12,0x04,0,0x80,6,6,0
2394         .long           0
2395 .size   .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
2396 ___
2397 #########################################################################
2398 {{      # Optimized XTS procedures                                      #
2399 my $key_=$key2;
2400 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
2401     $x00=0 if ($flavour =~ /osx/);
2402 my ($in0,  $in1,  $in2,  $in3,  $in4,  $in5 )=map("v$_",(0..5));
2403 my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
2404 my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
2405 my $rndkey0="v23";      # v24-v25 rotating buffer for first found keys
2406                         # v26-v31 last 6 round keys
2407 my ($keyperm)=($out0);  # aliases with "caller", redundant assignment
2408 my $taillen=$x70;
2409
2410 $code.=<<___;
2411 .align  5
2412 _aesp8_xts_encrypt6x:
2413         $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
2414         mflr            r11
2415         li              r7,`$FRAME+8*16+15`
2416         li              r3,`$FRAME+8*16+31`
2417         $PUSH           r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
2418         stvx            v20,r7,$sp              # ABI says so
2419         addi            r7,r7,32
2420         stvx            v21,r3,$sp
2421         addi            r3,r3,32
2422         stvx            v22,r7,$sp
2423         addi            r7,r7,32
2424         stvx            v23,r3,$sp
2425         addi            r3,r3,32
2426         stvx            v24,r7,$sp
2427         addi            r7,r7,32
2428         stvx            v25,r3,$sp
2429         addi            r3,r3,32
2430         stvx            v26,r7,$sp
2431         addi            r7,r7,32
2432         stvx            v27,r3,$sp
2433         addi            r3,r3,32
2434         stvx            v28,r7,$sp
2435         addi            r7,r7,32
2436         stvx            v29,r3,$sp
2437         addi            r3,r3,32
2438         stvx            v30,r7,$sp
2439         stvx            v31,r3,$sp
2440         li              r0,-1
2441         stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
2442         li              $x10,0x10
2443         $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2444         li              $x20,0x20
2445         $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2446         li              $x30,0x30
2447         $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2448         li              $x40,0x40
2449         $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2450         li              $x50,0x50
2451         $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2452         li              $x60,0x60
2453         $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2454         li              $x70,0x70
2455         mtspr           256,r0
2456
2457         subi            $rounds,$rounds,3       # -4 in total
2458
2459         lvx             $rndkey0,$x00,$key1     # load key schedule
2460         lvx             v30,$x10,$key1
2461         addi            $key1,$key1,0x20
2462         lvx             v31,$x00,$key1
2463         ?vperm          $rndkey0,$rndkey0,v30,$keyperm
2464         addi            $key_,$sp,$FRAME+15
2465         mtctr           $rounds
2466
2467 Load_xts_enc_key:
2468         ?vperm          v24,v30,v31,$keyperm
2469         lvx             v30,$x10,$key1
2470         addi            $key1,$key1,0x20
2471         stvx            v24,$x00,$key_          # off-load round[1]
2472         ?vperm          v25,v31,v30,$keyperm
2473         lvx             v31,$x00,$key1
2474         stvx            v25,$x10,$key_          # off-load round[2]
2475         addi            $key_,$key_,0x20
2476         bdnz            Load_xts_enc_key
2477
2478         lvx             v26,$x10,$key1
2479         ?vperm          v24,v30,v31,$keyperm
2480         lvx             v27,$x20,$key1
2481         stvx            v24,$x00,$key_          # off-load round[3]
2482         ?vperm          v25,v31,v26,$keyperm
2483         lvx             v28,$x30,$key1
2484         stvx            v25,$x10,$key_          # off-load round[4]
2485         addi            $key_,$sp,$FRAME+15     # rewind $key_
2486         ?vperm          v26,v26,v27,$keyperm
2487         lvx             v29,$x40,$key1
2488         ?vperm          v27,v27,v28,$keyperm
2489         lvx             v30,$x50,$key1
2490         ?vperm          v28,v28,v29,$keyperm
2491         lvx             v31,$x60,$key1
2492         ?vperm          v29,v29,v30,$keyperm
2493         lvx             $twk5,$x70,$key1        # borrow $twk5
2494         ?vperm          v30,v30,v31,$keyperm
2495         lvx             v24,$x00,$key_          # pre-load round[1]
2496         ?vperm          v31,v31,$twk5,$keyperm
2497         lvx             v25,$x10,$key_          # pre-load round[2]
2498
2499          vperm          $in0,$inout,$inptail,$inpperm
2500          subi           $inp,$inp,31            # undo "caller"
2501         vxor            $twk0,$tweak,$rndkey0
2502         vsrab           $tmp,$tweak,$seven      # next tweak value
2503         vaddubm         $tweak,$tweak,$tweak
2504         vsldoi          $tmp,$tmp,$tmp,15
2505         vand            $tmp,$tmp,$eighty7
2506          vxor           $out0,$in0,$twk0
2507         vxor            $tweak,$tweak,$tmp
2508
2509          lvx_u          $in1,$x10,$inp
2510         vxor            $twk1,$tweak,$rndkey0
2511         vsrab           $tmp,$tweak,$seven      # next tweak value
2512         vaddubm         $tweak,$tweak,$tweak
2513         vsldoi          $tmp,$tmp,$tmp,15
2514          le?vperm       $in1,$in1,$in1,$leperm
2515         vand            $tmp,$tmp,$eighty7
2516          vxor           $out1,$in1,$twk1
2517         vxor            $tweak,$tweak,$tmp
2518
2519          lvx_u          $in2,$x20,$inp
2520          andi.          $taillen,$len,15
2521         vxor            $twk2,$tweak,$rndkey0
2522         vsrab           $tmp,$tweak,$seven      # next tweak value
2523         vaddubm         $tweak,$tweak,$tweak
2524         vsldoi          $tmp,$tmp,$tmp,15
2525          le?vperm       $in2,$in2,$in2,$leperm
2526         vand            $tmp,$tmp,$eighty7
2527          vxor           $out2,$in2,$twk2
2528         vxor            $tweak,$tweak,$tmp
2529
2530          lvx_u          $in3,$x30,$inp
2531          sub            $len,$len,$taillen
2532         vxor            $twk3,$tweak,$rndkey0
2533         vsrab           $tmp,$tweak,$seven      # next tweak value
2534         vaddubm         $tweak,$tweak,$tweak
2535         vsldoi          $tmp,$tmp,$tmp,15
2536          le?vperm       $in3,$in3,$in3,$leperm
2537         vand            $tmp,$tmp,$eighty7
2538          vxor           $out3,$in3,$twk3
2539         vxor            $tweak,$tweak,$tmp
2540
2541          lvx_u          $in4,$x40,$inp
2542          subi           $len,$len,0x60
2543         vxor            $twk4,$tweak,$rndkey0
2544         vsrab           $tmp,$tweak,$seven      # next tweak value
2545         vaddubm         $tweak,$tweak,$tweak
2546         vsldoi          $tmp,$tmp,$tmp,15
2547          le?vperm       $in4,$in4,$in4,$leperm
2548         vand            $tmp,$tmp,$eighty7
2549          vxor           $out4,$in4,$twk4
2550         vxor            $tweak,$tweak,$tmp
2551
2552          lvx_u          $in5,$x50,$inp
2553          addi           $inp,$inp,0x60
2554         vxor            $twk5,$tweak,$rndkey0
2555         vsrab           $tmp,$tweak,$seven      # next tweak value
2556         vaddubm         $tweak,$tweak,$tweak
2557         vsldoi          $tmp,$tmp,$tmp,15
2558          le?vperm       $in5,$in5,$in5,$leperm
2559         vand            $tmp,$tmp,$eighty7
2560          vxor           $out5,$in5,$twk5
2561         vxor            $tweak,$tweak,$tmp
2562
2563         vxor            v31,v31,$rndkey0
2564         mtctr           $rounds
2565         b               Loop_xts_enc6x
2566
2567 .align  5
2568 Loop_xts_enc6x:
2569         vcipher         $out0,$out0,v24
2570         vcipher         $out1,$out1,v24
2571         vcipher         $out2,$out2,v24
2572         vcipher         $out3,$out3,v24
2573         vcipher         $out4,$out4,v24
2574         vcipher         $out5,$out5,v24
2575         lvx             v24,$x20,$key_          # round[3]
2576         addi            $key_,$key_,0x20
2577
2578         vcipher         $out0,$out0,v25
2579         vcipher         $out1,$out1,v25
2580         vcipher         $out2,$out2,v25
2581         vcipher         $out3,$out3,v25
2582         vcipher         $out4,$out4,v25
2583         vcipher         $out5,$out5,v25
2584         lvx             v25,$x10,$key_          # round[4]
2585         bdnz            Loop_xts_enc6x
2586
2587         subic           $len,$len,96            # $len-=96
2588          vxor           $in0,$twk0,v31          # xor with last round key
2589         vcipher         $out0,$out0,v24
2590         vcipher         $out1,$out1,v24
2591          vsrab          $tmp,$tweak,$seven      # next tweak value
2592          vxor           $twk0,$tweak,$rndkey0
2593          vaddubm        $tweak,$tweak,$tweak
2594         vcipher         $out2,$out2,v24
2595         vcipher         $out3,$out3,v24
2596          vsldoi         $tmp,$tmp,$tmp,15
2597         vcipher         $out4,$out4,v24
2598         vcipher         $out5,$out5,v24
2599
2600         subfe.          r0,r0,r0                # borrow?-1:0
2601          vand           $tmp,$tmp,$eighty7
2602         vcipher         $out0,$out0,v25
2603         vcipher         $out1,$out1,v25
2604          vxor           $tweak,$tweak,$tmp
2605         vcipher         $out2,$out2,v25
2606         vcipher         $out3,$out3,v25
2607          vxor           $in1,$twk1,v31
2608          vsrab          $tmp,$tweak,$seven      # next tweak value
2609          vxor           $twk1,$tweak,$rndkey0
2610         vcipher         $out4,$out4,v25
2611         vcipher         $out5,$out5,v25
2612
2613         and             r0,r0,$len
2614          vaddubm        $tweak,$tweak,$tweak
2615          vsldoi         $tmp,$tmp,$tmp,15
2616         vcipher         $out0,$out0,v26
2617         vcipher         $out1,$out1,v26
2618          vand           $tmp,$tmp,$eighty7
2619         vcipher         $out2,$out2,v26
2620         vcipher         $out3,$out3,v26
2621          vxor           $tweak,$tweak,$tmp
2622         vcipher         $out4,$out4,v26
2623         vcipher         $out5,$out5,v26
2624
2625         add             $inp,$inp,r0            # $inp is adjusted in such
2626                                                 # way that at exit from the
2627                                                 # loop inX-in5 are loaded
2628                                                 # with last "words"
2629          vxor           $in2,$twk2,v31
2630          vsrab          $tmp,$tweak,$seven      # next tweak value
2631          vxor           $twk2,$tweak,$rndkey0
2632          vaddubm        $tweak,$tweak,$tweak
2633         vcipher         $out0,$out0,v27
2634         vcipher         $out1,$out1,v27
2635          vsldoi         $tmp,$tmp,$tmp,15
2636         vcipher         $out2,$out2,v27
2637         vcipher         $out3,$out3,v27
2638          vand           $tmp,$tmp,$eighty7
2639         vcipher         $out4,$out4,v27
2640         vcipher         $out5,$out5,v27
2641
2642         addi            $key_,$sp,$FRAME+15     # rewind $key_
2643          vxor           $tweak,$tweak,$tmp
2644         vcipher         $out0,$out0,v28
2645         vcipher         $out1,$out1,v28
2646          vxor           $in3,$twk3,v31
2647          vsrab          $tmp,$tweak,$seven      # next tweak value
2648          vxor           $twk3,$tweak,$rndkey0
2649         vcipher         $out2,$out2,v28
2650         vcipher         $out3,$out3,v28
2651          vaddubm        $tweak,$tweak,$tweak
2652          vsldoi         $tmp,$tmp,$tmp,15
2653         vcipher         $out4,$out4,v28
2654         vcipher         $out5,$out5,v28
2655         lvx             v24,$x00,$key_          # re-pre-load round[1]
2656          vand           $tmp,$tmp,$eighty7
2657
2658         vcipher         $out0,$out0,v29
2659         vcipher         $out1,$out1,v29
2660          vxor           $tweak,$tweak,$tmp
2661         vcipher         $out2,$out2,v29
2662         vcipher         $out3,$out3,v29
2663          vxor           $in4,$twk4,v31
2664          vsrab          $tmp,$tweak,$seven      # next tweak value
2665          vxor           $twk4,$tweak,$rndkey0
2666         vcipher         $out4,$out4,v29
2667         vcipher         $out5,$out5,v29
2668         lvx             v25,$x10,$key_          # re-pre-load round[2]
2669          vaddubm        $tweak,$tweak,$tweak
2670          vsldoi         $tmp,$tmp,$tmp,15
2671
2672         vcipher         $out0,$out0,v30
2673         vcipher         $out1,$out1,v30
2674          vand           $tmp,$tmp,$eighty7
2675         vcipher         $out2,$out2,v30
2676         vcipher         $out3,$out3,v30
2677          vxor           $tweak,$tweak,$tmp
2678         vcipher         $out4,$out4,v30
2679         vcipher         $out5,$out5,v30
2680          vxor           $in5,$twk5,v31
2681          vsrab          $tmp,$tweak,$seven      # next tweak value
2682          vxor           $twk5,$tweak,$rndkey0
2683
2684         vcipherlast     $out0,$out0,$in0
2685          lvx_u          $in0,$x00,$inp          # load next input block
2686          vaddubm        $tweak,$tweak,$tweak
2687          vsldoi         $tmp,$tmp,$tmp,15
2688         vcipherlast     $out1,$out1,$in1
2689          lvx_u          $in1,$x10,$inp
2690         vcipherlast     $out2,$out2,$in2
2691          le?vperm       $in0,$in0,$in0,$leperm
2692          lvx_u          $in2,$x20,$inp
2693          vand           $tmp,$tmp,$eighty7
2694         vcipherlast     $out3,$out3,$in3
2695          le?vperm       $in1,$in1,$in1,$leperm
2696          lvx_u          $in3,$x30,$inp
2697         vcipherlast     $out4,$out4,$in4
2698          le?vperm       $in2,$in2,$in2,$leperm
2699          lvx_u          $in4,$x40,$inp
2700          vxor           $tweak,$tweak,$tmp
2701         vcipherlast     $tmp,$out5,$in5         # last block might be needed
2702                                                 # in stealing mode
2703          le?vperm       $in3,$in3,$in3,$leperm
2704          lvx_u          $in5,$x50,$inp
2705          addi           $inp,$inp,0x60
2706          le?vperm       $in4,$in4,$in4,$leperm
2707          le?vperm       $in5,$in5,$in5,$leperm
2708
2709         le?vperm        $out0,$out0,$out0,$leperm
2710         le?vperm        $out1,$out1,$out1,$leperm
2711         stvx_u          $out0,$x00,$out         # store output
2712          vxor           $out0,$in0,$twk0
2713         le?vperm        $out2,$out2,$out2,$leperm
2714         stvx_u          $out1,$x10,$out
2715          vxor           $out1,$in1,$twk1
2716         le?vperm        $out3,$out3,$out3,$leperm
2717         stvx_u          $out2,$x20,$out
2718          vxor           $out2,$in2,$twk2
2719         le?vperm        $out4,$out4,$out4,$leperm
2720         stvx_u          $out3,$x30,$out
2721          vxor           $out3,$in3,$twk3
2722         le?vperm        $out5,$tmp,$tmp,$leperm
2723         stvx_u          $out4,$x40,$out
2724          vxor           $out4,$in4,$twk4
2725         le?stvx_u       $out5,$x50,$out
2726         be?stvx_u       $tmp, $x50,$out
2727          vxor           $out5,$in5,$twk5
2728         addi            $out,$out,0x60
2729
2730         mtctr           $rounds
2731         beq             Loop_xts_enc6x          # did $len-=96 borrow?
2732
2733         addic.          $len,$len,0x60
2734         beq             Lxts_enc6x_zero
2735         cmpwi           $len,0x20
2736         blt             Lxts_enc6x_one
2737         nop
2738         beq             Lxts_enc6x_two
2739         cmpwi           $len,0x40
2740         blt             Lxts_enc6x_three
2741         nop
2742         beq             Lxts_enc6x_four
2743
2744 Lxts_enc6x_five:
2745         vxor            $out0,$in1,$twk0
2746         vxor            $out1,$in2,$twk1
2747         vxor            $out2,$in3,$twk2
2748         vxor            $out3,$in4,$twk3
2749         vxor            $out4,$in5,$twk4
2750
2751         bl              _aesp8_xts_enc5x
2752
2753         le?vperm        $out0,$out0,$out0,$leperm
2754         vmr             $twk0,$twk5             # unused tweak
2755         le?vperm        $out1,$out1,$out1,$leperm
2756         stvx_u          $out0,$x00,$out         # store output
2757         le?vperm        $out2,$out2,$out2,$leperm
2758         stvx_u          $out1,$x10,$out
2759         le?vperm        $out3,$out3,$out3,$leperm
2760         stvx_u          $out2,$x20,$out
2761         vxor            $tmp,$out4,$twk5        # last block prep for stealing
2762         le?vperm        $out4,$out4,$out4,$leperm
2763         stvx_u          $out3,$x30,$out
2764         stvx_u          $out4,$x40,$out
2765         addi            $out,$out,0x50
2766         bne             Lxts_enc6x_steal
2767         b               Lxts_enc6x_done
2768
2769 .align  4
2770 Lxts_enc6x_four:
2771         vxor            $out0,$in2,$twk0
2772         vxor            $out1,$in3,$twk1
2773         vxor            $out2,$in4,$twk2
2774         vxor            $out3,$in5,$twk3
2775         vxor            $out4,$out4,$out4
2776
2777         bl              _aesp8_xts_enc5x
2778
2779         le?vperm        $out0,$out0,$out0,$leperm
2780         vmr             $twk0,$twk4             # unused tweak
2781         le?vperm        $out1,$out1,$out1,$leperm
2782         stvx_u          $out0,$x00,$out         # store output
2783         le?vperm        $out2,$out2,$out2,$leperm
2784         stvx_u          $out1,$x10,$out
2785         vxor            $tmp,$out3,$twk4        # last block prep for stealing
2786         le?vperm        $out3,$out3,$out3,$leperm
2787         stvx_u          $out2,$x20,$out
2788         stvx_u          $out3,$x30,$out
2789         addi            $out,$out,0x40
2790         bne             Lxts_enc6x_steal
2791         b               Lxts_enc6x_done
2792
2793 .align  4
2794 Lxts_enc6x_three:
2795         vxor            $out0,$in3,$twk0
2796         vxor            $out1,$in4,$twk1
2797         vxor            $out2,$in5,$twk2
2798         vxor            $out3,$out3,$out3
2799         vxor            $out4,$out4,$out4
2800
2801         bl              _aesp8_xts_enc5x
2802
2803         le?vperm        $out0,$out0,$out0,$leperm
2804         vmr             $twk0,$twk3             # unused tweak
2805         le?vperm        $out1,$out1,$out1,$leperm
2806         stvx_u          $out0,$x00,$out         # store output
2807         vxor            $tmp,$out2,$twk3        # last block prep for stealing
2808         le?vperm        $out2,$out2,$out2,$leperm
2809         stvx_u          $out1,$x10,$out
2810         stvx_u          $out2,$x20,$out
2811         addi            $out,$out,0x30
2812         bne             Lxts_enc6x_steal
2813         b               Lxts_enc6x_done
2814
2815 .align  4
2816 Lxts_enc6x_two:
2817         vxor            $out0,$in4,$twk0
2818         vxor            $out1,$in5,$twk1
2819         vxor            $out2,$out2,$out2
2820         vxor            $out3,$out3,$out3
2821         vxor            $out4,$out4,$out4
2822
2823         bl              _aesp8_xts_enc5x
2824
2825         le?vperm        $out0,$out0,$out0,$leperm
2826         vmr             $twk0,$twk2             # unused tweak
2827         vxor            $tmp,$out1,$twk2        # last block prep for stealing
2828         le?vperm        $out1,$out1,$out1,$leperm
2829         stvx_u          $out0,$x00,$out         # store output
2830         stvx_u          $out1,$x10,$out
2831         addi            $out,$out,0x20
2832         bne             Lxts_enc6x_steal
2833         b               Lxts_enc6x_done
2834
2835 .align  4
2836 Lxts_enc6x_one:
2837         vxor            $out0,$in5,$twk0
2838         nop
2839 Loop_xts_enc1x:
2840         vcipher         $out0,$out0,v24
2841         lvx             v24,$x20,$key_          # round[3]
2842         addi            $key_,$key_,0x20
2843
2844         vcipher         $out0,$out0,v25
2845         lvx             v25,$x10,$key_          # round[4]
2846         bdnz            Loop_xts_enc1x
2847
2848         add             $inp,$inp,$taillen
2849         cmpwi           $taillen,0
2850         vcipher         $out0,$out0,v24
2851
2852         subi            $inp,$inp,16
2853         vcipher         $out0,$out0,v25
2854
2855         lvsr            $inpperm,0,$taillen
2856         vcipher         $out0,$out0,v26
2857
2858         lvx_u           $in0,0,$inp
2859         vcipher         $out0,$out0,v27
2860
2861         addi            $key_,$sp,$FRAME+15     # rewind $key_
2862         vcipher         $out0,$out0,v28
2863         lvx             v24,$x00,$key_          # re-pre-load round[1]
2864
2865         vcipher         $out0,$out0,v29
2866         lvx             v25,$x10,$key_          # re-pre-load round[2]
2867          vxor           $twk0,$twk0,v31
2868
2869         le?vperm        $in0,$in0,$in0,$leperm
2870         vcipher         $out0,$out0,v30
2871
2872         vperm           $in0,$in0,$in0,$inpperm
2873         vcipherlast     $out0,$out0,$twk0
2874
2875         vmr             $twk0,$twk1             # unused tweak
2876         vxor            $tmp,$out0,$twk1        # last block prep for stealing
2877         le?vperm        $out0,$out0,$out0,$leperm
2878         stvx_u          $out0,$x00,$out         # store output
2879         addi            $out,$out,0x10
2880         bne             Lxts_enc6x_steal
2881         b               Lxts_enc6x_done
2882
2883 .align  4
2884 Lxts_enc6x_zero:
2885         cmpwi           $taillen,0
2886         beq             Lxts_enc6x_done
2887
2888         add             $inp,$inp,$taillen
2889         subi            $inp,$inp,16
2890         lvx_u           $in0,0,$inp
2891         lvsr            $inpperm,0,$taillen     # $in5 is no more
2892         le?vperm        $in0,$in0,$in0,$leperm
2893         vperm           $in0,$in0,$in0,$inpperm
2894         vxor            $tmp,$tmp,$twk0
2895 Lxts_enc6x_steal:
2896         vxor            $in0,$in0,$twk0
2897         vxor            $out0,$out0,$out0
2898         vspltisb        $out1,-1
2899         vperm           $out0,$out0,$out1,$inpperm
2900         vsel            $out0,$in0,$tmp,$out0   # $tmp is last block, remember?
2901
2902         subi            r30,$out,17
2903         subi            $out,$out,16
2904         mtctr           $taillen
2905 Loop_xts_enc6x_steal:
2906         lbzu            r0,1(r30)
2907         stb             r0,16(r30)
2908         bdnz            Loop_xts_enc6x_steal
2909
2910         li              $taillen,0
2911         mtctr           $rounds
2912         b               Loop_xts_enc1x          # one more time...
2913
2914 .align  4
2915 Lxts_enc6x_done:
2916         ${UCMP}i        $ivp,0
2917         beq             Lxts_enc6x_ret
2918
2919         vxor            $tweak,$twk0,$rndkey0
2920         le?vperm        $tweak,$tweak,$tweak,$leperm
2921         stvx_u          $tweak,0,$ivp
2922
2923 Lxts_enc6x_ret:
2924         mtlr            r11
2925         li              r10,`$FRAME+15`
2926         li              r11,`$FRAME+31`
2927         stvx            $seven,r10,$sp          # wipe copies of round keys
2928         addi            r10,r10,32
2929         stvx            $seven,r11,$sp
2930         addi            r11,r11,32
2931         stvx            $seven,r10,$sp
2932         addi            r10,r10,32
2933         stvx            $seven,r11,$sp
2934         addi            r11,r11,32
2935         stvx            $seven,r10,$sp
2936         addi            r10,r10,32
2937         stvx            $seven,r11,$sp
2938         addi            r11,r11,32
2939         stvx            $seven,r10,$sp
2940         addi            r10,r10,32
2941         stvx            $seven,r11,$sp
2942         addi            r11,r11,32
2943
2944         mtspr           256,$vrsave
2945         lvx             v20,r10,$sp             # ABI says so
2946         addi            r10,r10,32
2947         lvx             v21,r11,$sp
2948         addi            r11,r11,32
2949         lvx             v22,r10,$sp
2950         addi            r10,r10,32
2951         lvx             v23,r11,$sp
2952         addi            r11,r11,32
2953         lvx             v24,r10,$sp
2954         addi            r10,r10,32
2955         lvx             v25,r11,$sp
2956         addi            r11,r11,32
2957         lvx             v26,r10,$sp
2958         addi            r10,r10,32
2959         lvx             v27,r11,$sp
2960         addi            r11,r11,32
2961         lvx             v28,r10,$sp
2962         addi            r10,r10,32
2963         lvx             v29,r11,$sp
2964         addi            r11,r11,32
2965         lvx             v30,r10,$sp
2966         lvx             v31,r11,$sp
2967         $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2968         $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2969         $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2970         $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2971         $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2972         $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2973         addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
2974         blr
2975         .long           0
2976         .byte           0,12,0x04,1,0x80,6,6,0
2977         .long           0
2978
2979 .align  5
2980 _aesp8_xts_enc5x:
2981         vcipher         $out0,$out0,v24
2982         vcipher         $out1,$out1,v24
2983         vcipher         $out2,$out2,v24
2984         vcipher         $out3,$out3,v24
2985         vcipher         $out4,$out4,v24
2986         lvx             v24,$x20,$key_          # round[3]
2987         addi            $key_,$key_,0x20
2988
2989         vcipher         $out0,$out0,v25
2990         vcipher         $out1,$out1,v25
2991         vcipher         $out2,$out2,v25
2992         vcipher         $out3,$out3,v25
2993         vcipher         $out4,$out4,v25
2994         lvx             v25,$x10,$key_          # round[4]
2995         bdnz            _aesp8_xts_enc5x
2996
2997         add             $inp,$inp,$taillen
2998         cmpwi           $taillen,0
2999         vcipher         $out0,$out0,v24
3000         vcipher         $out1,$out1,v24
3001         vcipher         $out2,$out2,v24
3002         vcipher         $out3,$out3,v24
3003         vcipher         $out4,$out4,v24
3004
3005         subi            $inp,$inp,16
3006         vcipher         $out0,$out0,v25
3007         vcipher         $out1,$out1,v25
3008         vcipher         $out2,$out2,v25
3009         vcipher         $out3,$out3,v25
3010         vcipher         $out4,$out4,v25
3011          vxor           $twk0,$twk0,v31
3012
3013         vcipher         $out0,$out0,v26
3014         lvsr            $inpperm,r0,$taillen    # $in5 is no more
3015         vcipher         $out1,$out1,v26
3016         vcipher         $out2,$out2,v26
3017         vcipher         $out3,$out3,v26
3018         vcipher         $out4,$out4,v26
3019          vxor           $in1,$twk1,v31
3020
3021         vcipher         $out0,$out0,v27
3022         lvx_u           $in0,0,$inp
3023         vcipher         $out1,$out1,v27
3024         vcipher         $out2,$out2,v27
3025         vcipher         $out3,$out3,v27
3026         vcipher         $out4,$out4,v27
3027          vxor           $in2,$twk2,v31
3028
3029         addi            $key_,$sp,$FRAME+15     # rewind $key_
3030         vcipher         $out0,$out0,v28
3031         vcipher         $out1,$out1,v28
3032         vcipher         $out2,$out2,v28
3033         vcipher         $out3,$out3,v28
3034         vcipher         $out4,$out4,v28
3035         lvx             v24,$x00,$key_          # re-pre-load round[1]
3036          vxor           $in3,$twk3,v31
3037
3038         vcipher         $out0,$out0,v29
3039         le?vperm        $in0,$in0,$in0,$leperm
3040         vcipher         $out1,$out1,v29
3041         vcipher         $out2,$out2,v29
3042         vcipher         $out3,$out3,v29
3043         vcipher         $out4,$out4,v29
3044         lvx             v25,$x10,$key_          # re-pre-load round[2]
3045          vxor           $in4,$twk4,v31
3046
3047         vcipher         $out0,$out0,v30
3048         vperm           $in0,$in0,$in0,$inpperm
3049         vcipher         $out1,$out1,v30
3050         vcipher         $out2,$out2,v30
3051         vcipher         $out3,$out3,v30
3052         vcipher         $out4,$out4,v30
3053