Also check for errors in x86_64-xlate.pl.
[openssl.git] / crypto / aes / asm / aesp8-ppc.pl
1 #! /usr/bin/env perl
2 # Copyright 2014-2018 The OpenSSL Project Authors. All Rights Reserved.
3 #
4 # Licensed under the Apache License 2.0 (the "License").  You may not use
5 # this file except in compliance with the License.  You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
8
9 #
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
16 #
17 # This module implements support for AES instructions as per PowerISA
18 # specification version 2.07, first implemented by POWER8 processor.
19 # The module is endian-agnostic in sense that it supports both big-
20 # and little-endian cases. Data alignment in parallelizable modes is
21 # handled with VSX loads and stores, which implies MSR.VSX flag being
22 # set. It should also be noted that ISA specification doesn't prohibit
23 # alignment exceptions for these instructions on page boundaries.
24 # Initially alignment was handled in pure AltiVec/VMX way [when data
25 # is aligned programmatically, which in turn guarantees exception-
26 # free execution], but it turned to hamper performance when vcipher
27 # instructions are interleaved. It's reckoned that eventual
28 # misalignment penalties at page boundaries are in average lower
29 # than additional overhead in pure AltiVec approach.
30 #
31 # May 2016
32 #
33 # Add XTS subroutine, 9x on little- and 12x improvement on big-endian
34 # systems were measured.
35 #
36 ######################################################################
37 # Current large-block performance in cycles per byte processed with
38 # 128-bit key (less is better).
39 #
40 #               CBC en-/decrypt CTR     XTS
41 # POWER8[le]    3.96/0.72       0.74    1.1
42 # POWER8[be]    3.75/0.65       0.66    1.0
43 # POWER9[le]    4.02/0.86       0.84    1.05
44 # POWER9[be]    3.99/0.78       0.79    0.97
45
46 # $output is the last argument if it looks like a file (it has an extension)
47 # $flavour is the first argument if it doesn't look like a file
48 $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
49 $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
50
51 if ($flavour =~ /64/) {
52         $SIZE_T =8;
53         $LRSAVE =2*$SIZE_T;
54         $STU    ="stdu";
55         $POP    ="ld";
56         $PUSH   ="std";
57         $UCMP   ="cmpld";
58         $SHL    ="sldi";
59 } elsif ($flavour =~ /32/) {
60         $SIZE_T =4;
61         $LRSAVE =$SIZE_T;
62         $STU    ="stwu";
63         $POP    ="lwz";
64         $PUSH   ="stw";
65         $UCMP   ="cmplw";
66         $SHL    ="slwi";
67 } else { die "nonsense $flavour"; }
68
69 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
70
71 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
72 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
73 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
74 die "can't locate ppc-xlate.pl";
75
76 open STDOUT,"| $^X $xlate $flavour \"$output\""
77     or die "can't call $xlate: $!";
78
79 $FRAME=8*$SIZE_T;
80 $prefix="aes_p8";
81
82 $sp="r1";
83 $vrsave="r12";
84
85 #########################################################################
86 {{{     # Key setup procedures                                          #
87 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
88 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
89 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
90
91 $code.=<<___;
92 .machine        "any"
93
94 .text
95
96 .align  7
97 rcon:
98 .long   0x01000000, 0x01000000, 0x01000000, 0x01000000  ?rev
99 .long   0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000  ?rev
100 .long   0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c  ?rev
101 .long   0,0,0,0                                         ?asis
102 Lconsts:
103         mflr    r0
104         bcl     20,31,\$+4
105         mflr    $ptr     #vvvvv "distance between . and rcon
106         addi    $ptr,$ptr,-0x48
107         mtlr    r0
108         blr
109         .long   0
110         .byte   0,12,0x14,0,0,0,0,0
111 .asciz  "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
112
113 .globl  .${prefix}_set_encrypt_key
114 .align  5
115 .${prefix}_set_encrypt_key:
116 Lset_encrypt_key:
117         mflr            r11
118         $PUSH           r11,$LRSAVE($sp)
119
120         li              $ptr,-1
121         ${UCMP}i        $inp,0
122         beq-            Lenc_key_abort          # if ($inp==0) return -1;
123         ${UCMP}i        $out,0
124         beq-            Lenc_key_abort          # if ($out==0) return -1;
125         li              $ptr,-2
126         cmpwi           $bits,128
127         blt-            Lenc_key_abort
128         cmpwi           $bits,256
129         bgt-            Lenc_key_abort
130         andi.           r0,$bits,0x3f
131         bne-            Lenc_key_abort
132
133         lis             r0,0xfff0
134         mfspr           $vrsave,256
135         mtspr           256,r0
136
137         bl              Lconsts
138         mtlr            r11
139
140         neg             r9,$inp
141         lvx             $in0,0,$inp
142         addi            $inp,$inp,15            # 15 is not typo
143         lvsr            $key,0,r9               # borrow $key
144         li              r8,0x20
145         cmpwi           $bits,192
146         lvx             $in1,0,$inp
147         le?vspltisb     $mask,0x0f              # borrow $mask
148         lvx             $rcon,0,$ptr
149         le?vxor         $key,$key,$mask         # adjust for byte swap
150         lvx             $mask,r8,$ptr
151         addi            $ptr,$ptr,0x10
152         vperm           $in0,$in0,$in1,$key     # align [and byte swap in LE]
153         li              $cnt,8
154         vxor            $zero,$zero,$zero
155         mtctr           $cnt
156
157         ?lvsr           $outperm,0,$out
158         vspltisb        $outmask,-1
159         lvx             $outhead,0,$out
160         ?vperm          $outmask,$zero,$outmask,$outperm
161
162         blt             Loop128
163         addi            $inp,$inp,8
164         beq             L192
165         addi            $inp,$inp,8
166         b               L256
167
168 .align  4
169 Loop128:
170         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
171         vsldoi          $tmp,$zero,$in0,12      # >>32
172          vperm          $outtail,$in0,$in0,$outperm     # rotate
173          vsel           $stage,$outhead,$outtail,$outmask
174          vmr            $outhead,$outtail
175         vcipherlast     $key,$key,$rcon
176          stvx           $stage,0,$out
177          addi           $out,$out,16
178
179         vxor            $in0,$in0,$tmp
180         vsldoi          $tmp,$zero,$tmp,12      # >>32
181         vxor            $in0,$in0,$tmp
182         vsldoi          $tmp,$zero,$tmp,12      # >>32
183         vxor            $in0,$in0,$tmp
184          vadduwm        $rcon,$rcon,$rcon
185         vxor            $in0,$in0,$key
186         bdnz            Loop128
187
188         lvx             $rcon,0,$ptr            # last two round keys
189
190         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
191         vsldoi          $tmp,$zero,$in0,12      # >>32
192          vperm          $outtail,$in0,$in0,$outperm     # rotate
193          vsel           $stage,$outhead,$outtail,$outmask
194          vmr            $outhead,$outtail
195         vcipherlast     $key,$key,$rcon
196          stvx           $stage,0,$out
197          addi           $out,$out,16
198
199         vxor            $in0,$in0,$tmp
200         vsldoi          $tmp,$zero,$tmp,12      # >>32
201         vxor            $in0,$in0,$tmp
202         vsldoi          $tmp,$zero,$tmp,12      # >>32
203         vxor            $in0,$in0,$tmp
204          vadduwm        $rcon,$rcon,$rcon
205         vxor            $in0,$in0,$key
206
207         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
208         vsldoi          $tmp,$zero,$in0,12      # >>32
209          vperm          $outtail,$in0,$in0,$outperm     # rotate
210          vsel           $stage,$outhead,$outtail,$outmask
211          vmr            $outhead,$outtail
212         vcipherlast     $key,$key,$rcon
213          stvx           $stage,0,$out
214          addi           $out,$out,16
215
216         vxor            $in0,$in0,$tmp
217         vsldoi          $tmp,$zero,$tmp,12      # >>32
218         vxor            $in0,$in0,$tmp
219         vsldoi          $tmp,$zero,$tmp,12      # >>32
220         vxor            $in0,$in0,$tmp
221         vxor            $in0,$in0,$key
222          vperm          $outtail,$in0,$in0,$outperm     # rotate
223          vsel           $stage,$outhead,$outtail,$outmask
224          vmr            $outhead,$outtail
225          stvx           $stage,0,$out
226
227         addi            $inp,$out,15            # 15 is not typo
228         addi            $out,$out,0x50
229
230         li              $rounds,10
231         b               Ldone
232
233 .align  4
234 L192:
235         lvx             $tmp,0,$inp
236         li              $cnt,4
237          vperm          $outtail,$in0,$in0,$outperm     # rotate
238          vsel           $stage,$outhead,$outtail,$outmask
239          vmr            $outhead,$outtail
240          stvx           $stage,0,$out
241          addi           $out,$out,16
242         vperm           $in1,$in1,$tmp,$key     # align [and byte swap in LE]
243         vspltisb        $key,8                  # borrow $key
244         mtctr           $cnt
245         vsububm         $mask,$mask,$key        # adjust the mask
246
247 Loop192:
248         vperm           $key,$in1,$in1,$mask    # roate-n-splat
249         vsldoi          $tmp,$zero,$in0,12      # >>32
250         vcipherlast     $key,$key,$rcon
251
252         vxor            $in0,$in0,$tmp
253         vsldoi          $tmp,$zero,$tmp,12      # >>32
254         vxor            $in0,$in0,$tmp
255         vsldoi          $tmp,$zero,$tmp,12      # >>32
256         vxor            $in0,$in0,$tmp
257
258          vsldoi         $stage,$zero,$in1,8
259         vspltw          $tmp,$in0,3
260         vxor            $tmp,$tmp,$in1
261         vsldoi          $in1,$zero,$in1,12      # >>32
262          vadduwm        $rcon,$rcon,$rcon
263         vxor            $in1,$in1,$tmp
264         vxor            $in0,$in0,$key
265         vxor            $in1,$in1,$key
266          vsldoi         $stage,$stage,$in0,8
267
268         vperm           $key,$in1,$in1,$mask    # rotate-n-splat
269         vsldoi          $tmp,$zero,$in0,12      # >>32
270          vperm          $outtail,$stage,$stage,$outperm # rotate
271          vsel           $stage,$outhead,$outtail,$outmask
272          vmr            $outhead,$outtail
273         vcipherlast     $key,$key,$rcon
274          stvx           $stage,0,$out
275          addi           $out,$out,16
276
277          vsldoi         $stage,$in0,$in1,8
278         vxor            $in0,$in0,$tmp
279         vsldoi          $tmp,$zero,$tmp,12      # >>32
280          vperm          $outtail,$stage,$stage,$outperm # rotate
281          vsel           $stage,$outhead,$outtail,$outmask
282          vmr            $outhead,$outtail
283         vxor            $in0,$in0,$tmp
284         vsldoi          $tmp,$zero,$tmp,12      # >>32
285         vxor            $in0,$in0,$tmp
286          stvx           $stage,0,$out
287          addi           $out,$out,16
288
289         vspltw          $tmp,$in0,3
290         vxor            $tmp,$tmp,$in1
291         vsldoi          $in1,$zero,$in1,12      # >>32
292          vadduwm        $rcon,$rcon,$rcon
293         vxor            $in1,$in1,$tmp
294         vxor            $in0,$in0,$key
295         vxor            $in1,$in1,$key
296          vperm          $outtail,$in0,$in0,$outperm     # rotate
297          vsel           $stage,$outhead,$outtail,$outmask
298          vmr            $outhead,$outtail
299          stvx           $stage,0,$out
300          addi           $inp,$out,15            # 15 is not typo
301          addi           $out,$out,16
302         bdnz            Loop192
303
304         li              $rounds,12
305         addi            $out,$out,0x20
306         b               Ldone
307
308 .align  4
309 L256:
310         lvx             $tmp,0,$inp
311         li              $cnt,7
312         li              $rounds,14
313          vperm          $outtail,$in0,$in0,$outperm     # rotate
314          vsel           $stage,$outhead,$outtail,$outmask
315          vmr            $outhead,$outtail
316          stvx           $stage,0,$out
317          addi           $out,$out,16
318         vperm           $in1,$in1,$tmp,$key     # align [and byte swap in LE]
319         mtctr           $cnt
320
321 Loop256:
322         vperm           $key,$in1,$in1,$mask    # rotate-n-splat
323         vsldoi          $tmp,$zero,$in0,12      # >>32
324          vperm          $outtail,$in1,$in1,$outperm     # rotate
325          vsel           $stage,$outhead,$outtail,$outmask
326          vmr            $outhead,$outtail
327         vcipherlast     $key,$key,$rcon
328          stvx           $stage,0,$out
329          addi           $out,$out,16
330
331         vxor            $in0,$in0,$tmp
332         vsldoi          $tmp,$zero,$tmp,12      # >>32
333         vxor            $in0,$in0,$tmp
334         vsldoi          $tmp,$zero,$tmp,12      # >>32
335         vxor            $in0,$in0,$tmp
336          vadduwm        $rcon,$rcon,$rcon
337         vxor            $in0,$in0,$key
338          vperm          $outtail,$in0,$in0,$outperm     # rotate
339          vsel           $stage,$outhead,$outtail,$outmask
340          vmr            $outhead,$outtail
341          stvx           $stage,0,$out
342          addi           $inp,$out,15            # 15 is not typo
343          addi           $out,$out,16
344         bdz             Ldone
345
346         vspltw          $key,$in0,3             # just splat
347         vsldoi          $tmp,$zero,$in1,12      # >>32
348         vsbox           $key,$key
349
350         vxor            $in1,$in1,$tmp
351         vsldoi          $tmp,$zero,$tmp,12      # >>32
352         vxor            $in1,$in1,$tmp
353         vsldoi          $tmp,$zero,$tmp,12      # >>32
354         vxor            $in1,$in1,$tmp
355
356         vxor            $in1,$in1,$key
357         b               Loop256
358
359 .align  4
360 Ldone:
361         lvx             $in1,0,$inp             # redundant in aligned case
362         vsel            $in1,$outhead,$in1,$outmask
363         stvx            $in1,0,$inp
364         li              $ptr,0
365         mtspr           256,$vrsave
366         stw             $rounds,0($out)
367
368 Lenc_key_abort:
369         mr              r3,$ptr
370         blr
371         .long           0
372         .byte           0,12,0x14,1,0,0,3,0
373         .long           0
374 .size   .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
375
376 .globl  .${prefix}_set_decrypt_key
377 .align  5
378 .${prefix}_set_decrypt_key:
379         $STU            $sp,-$FRAME($sp)
380         mflr            r10
381         $PUSH           r10,$FRAME+$LRSAVE($sp)
382         bl              Lset_encrypt_key
383         mtlr            r10
384
385         cmpwi           r3,0
386         bne-            Ldec_key_abort
387
388         slwi            $cnt,$rounds,4
389         subi            $inp,$out,240           # first round key
390         srwi            $rounds,$rounds,1
391         add             $out,$inp,$cnt          # last round key
392         mtctr           $rounds
393
394 Ldeckey:
395         lwz             r0, 0($inp)
396         lwz             r6, 4($inp)
397         lwz             r7, 8($inp)
398         lwz             r8, 12($inp)
399         addi            $inp,$inp,16
400         lwz             r9, 0($out)
401         lwz             r10,4($out)
402         lwz             r11,8($out)
403         lwz             r12,12($out)
404         stw             r0, 0($out)
405         stw             r6, 4($out)
406         stw             r7, 8($out)
407         stw             r8, 12($out)
408         subi            $out,$out,16
409         stw             r9, -16($inp)
410         stw             r10,-12($inp)
411         stw             r11,-8($inp)
412         stw             r12,-4($inp)
413         bdnz            Ldeckey
414
415         xor             r3,r3,r3                # return value
416 Ldec_key_abort:
417         addi            $sp,$sp,$FRAME
418         blr
419         .long           0
420         .byte           0,12,4,1,0x80,0,3,0
421         .long           0
422 .size   .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
423 ___
424 }}}
425 #########################################################################
426 {{{     # Single block en- and decrypt procedures                       #
427 sub gen_block () {
428 my $dir = shift;
429 my $n   = $dir eq "de" ? "n" : "";
430 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
431
432 $code.=<<___;
433 .globl  .${prefix}_${dir}crypt
434 .align  5
435 .${prefix}_${dir}crypt:
436         lwz             $rounds,240($key)
437         lis             r0,0xfc00
438         mfspr           $vrsave,256
439         li              $idx,15                 # 15 is not typo
440         mtspr           256,r0
441
442         lvx             v0,0,$inp
443         neg             r11,$out
444         lvx             v1,$idx,$inp
445         lvsl            v2,0,$inp               # inpperm
446         le?vspltisb     v4,0x0f
447         ?lvsl           v3,0,r11                # outperm
448         le?vxor         v2,v2,v4
449         li              $idx,16
450         vperm           v0,v0,v1,v2             # align [and byte swap in LE]
451         lvx             v1,0,$key
452         ?lvsl           v5,0,$key               # keyperm
453         srwi            $rounds,$rounds,1
454         lvx             v2,$idx,$key
455         addi            $idx,$idx,16
456         subi            $rounds,$rounds,1
457         ?vperm          v1,v1,v2,v5             # align round key
458
459         vxor            v0,v0,v1
460         lvx             v1,$idx,$key
461         addi            $idx,$idx,16
462         mtctr           $rounds
463
464 Loop_${dir}c:
465         ?vperm          v2,v2,v1,v5
466         v${n}cipher     v0,v0,v2
467         lvx             v2,$idx,$key
468         addi            $idx,$idx,16
469         ?vperm          v1,v1,v2,v5
470         v${n}cipher     v0,v0,v1
471         lvx             v1,$idx,$key
472         addi            $idx,$idx,16
473         bdnz            Loop_${dir}c
474
475         ?vperm          v2,v2,v1,v5
476         v${n}cipher     v0,v0,v2
477         lvx             v2,$idx,$key
478         ?vperm          v1,v1,v2,v5
479         v${n}cipherlast v0,v0,v1
480
481         vspltisb        v2,-1
482         vxor            v1,v1,v1
483         li              $idx,15                 # 15 is not typo
484         ?vperm          v2,v1,v2,v3             # outmask
485         le?vxor         v3,v3,v4
486         lvx             v1,0,$out               # outhead
487         vperm           v0,v0,v0,v3             # rotate [and byte swap in LE]
488         vsel            v1,v1,v0,v2
489         lvx             v4,$idx,$out
490         stvx            v1,0,$out
491         vsel            v0,v0,v4,v2
492         stvx            v0,$idx,$out
493
494         mtspr           256,$vrsave
495         blr
496         .long           0
497         .byte           0,12,0x14,0,0,0,3,0
498         .long           0
499 .size   .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
500 ___
501 }
502 &gen_block("en");
503 &gen_block("de");
504 }}}
505 #########################################################################
506 {{{     # CBC en- and decrypt procedures                                #
507 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
508 my ($rndkey0,$rndkey1,$inout,$tmp)=             map("v$_",(0..3));
509 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
510                                                 map("v$_",(4..10));
511 $code.=<<___;
512 .globl  .${prefix}_cbc_encrypt
513 .align  5
514 .${prefix}_cbc_encrypt:
515         ${UCMP}i        $len,16
516         bltlr-
517
518         cmpwi           $enc,0                  # test direction
519         lis             r0,0xffe0
520         mfspr           $vrsave,256
521         mtspr           256,r0
522
523         li              $idx,15
524         vxor            $rndkey0,$rndkey0,$rndkey0
525         le?vspltisb     $tmp,0x0f
526
527         lvx             $ivec,0,$ivp            # load [unaligned] iv
528         lvsl            $inpperm,0,$ivp
529         lvx             $inptail,$idx,$ivp
530         le?vxor         $inpperm,$inpperm,$tmp
531         vperm           $ivec,$ivec,$inptail,$inpperm
532
533         neg             r11,$inp
534         ?lvsl           $keyperm,0,$key         # prepare for unaligned key
535         lwz             $rounds,240($key)
536
537         lvsr            $inpperm,0,r11          # prepare for unaligned load
538         lvx             $inptail,0,$inp
539         addi            $inp,$inp,15            # 15 is not typo
540         le?vxor         $inpperm,$inpperm,$tmp
541
542         ?lvsr           $outperm,0,$out         # prepare for unaligned store
543         vspltisb        $outmask,-1
544         lvx             $outhead,0,$out
545         ?vperm          $outmask,$rndkey0,$outmask,$outperm
546         le?vxor         $outperm,$outperm,$tmp
547
548         srwi            $rounds,$rounds,1
549         li              $idx,16
550         subi            $rounds,$rounds,1
551         beq             Lcbc_dec
552
553 Lcbc_enc:
554         vmr             $inout,$inptail
555         lvx             $inptail,0,$inp
556         addi            $inp,$inp,16
557         mtctr           $rounds
558         subi            $len,$len,16            # len-=16
559
560         lvx             $rndkey0,0,$key
561          vperm          $inout,$inout,$inptail,$inpperm
562         lvx             $rndkey1,$idx,$key
563         addi            $idx,$idx,16
564         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
565         vxor            $inout,$inout,$rndkey0
566         lvx             $rndkey0,$idx,$key
567         addi            $idx,$idx,16
568         vxor            $inout,$inout,$ivec
569
570 Loop_cbc_enc:
571         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
572         vcipher         $inout,$inout,$rndkey1
573         lvx             $rndkey1,$idx,$key
574         addi            $idx,$idx,16
575         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
576         vcipher         $inout,$inout,$rndkey0
577         lvx             $rndkey0,$idx,$key
578         addi            $idx,$idx,16
579         bdnz            Loop_cbc_enc
580
581         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
582         vcipher         $inout,$inout,$rndkey1
583         lvx             $rndkey1,$idx,$key
584         li              $idx,16
585         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
586         vcipherlast     $ivec,$inout,$rndkey0
587         ${UCMP}i        $len,16
588
589         vperm           $tmp,$ivec,$ivec,$outperm
590         vsel            $inout,$outhead,$tmp,$outmask
591         vmr             $outhead,$tmp
592         stvx            $inout,0,$out
593         addi            $out,$out,16
594         bge             Lcbc_enc
595
596         b               Lcbc_done
597
598 .align  4
599 Lcbc_dec:
600         ${UCMP}i        $len,128
601         bge             _aesp8_cbc_decrypt8x
602         vmr             $tmp,$inptail
603         lvx             $inptail,0,$inp
604         addi            $inp,$inp,16
605         mtctr           $rounds
606         subi            $len,$len,16            # len-=16
607
608         lvx             $rndkey0,0,$key
609          vperm          $tmp,$tmp,$inptail,$inpperm
610         lvx             $rndkey1,$idx,$key
611         addi            $idx,$idx,16
612         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
613         vxor            $inout,$tmp,$rndkey0
614         lvx             $rndkey0,$idx,$key
615         addi            $idx,$idx,16
616
617 Loop_cbc_dec:
618         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
619         vncipher        $inout,$inout,$rndkey1
620         lvx             $rndkey1,$idx,$key
621         addi            $idx,$idx,16
622         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
623         vncipher        $inout,$inout,$rndkey0
624         lvx             $rndkey0,$idx,$key
625         addi            $idx,$idx,16
626         bdnz            Loop_cbc_dec
627
628         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
629         vncipher        $inout,$inout,$rndkey1
630         lvx             $rndkey1,$idx,$key
631         li              $idx,16
632         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
633         vncipherlast    $inout,$inout,$rndkey0
634         ${UCMP}i        $len,16
635
636         vxor            $inout,$inout,$ivec
637         vmr             $ivec,$tmp
638         vperm           $tmp,$inout,$inout,$outperm
639         vsel            $inout,$outhead,$tmp,$outmask
640         vmr             $outhead,$tmp
641         stvx            $inout,0,$out
642         addi            $out,$out,16
643         bge             Lcbc_dec
644
645 Lcbc_done:
646         addi            $out,$out,-1
647         lvx             $inout,0,$out           # redundant in aligned case
648         vsel            $inout,$outhead,$inout,$outmask
649         stvx            $inout,0,$out
650
651         neg             $enc,$ivp               # write [unaligned] iv
652         li              $idx,15                 # 15 is not typo
653         vxor            $rndkey0,$rndkey0,$rndkey0
654         vspltisb        $outmask,-1
655         le?vspltisb     $tmp,0x0f
656         ?lvsl           $outperm,0,$enc
657         ?vperm          $outmask,$rndkey0,$outmask,$outperm
658         le?vxor         $outperm,$outperm,$tmp
659         lvx             $outhead,0,$ivp
660         vperm           $ivec,$ivec,$ivec,$outperm
661         vsel            $inout,$outhead,$ivec,$outmask
662         lvx             $inptail,$idx,$ivp
663         stvx            $inout,0,$ivp
664         vsel            $inout,$ivec,$inptail,$outmask
665         stvx            $inout,$idx,$ivp
666
667         mtspr           256,$vrsave
668         blr
669         .long           0
670         .byte           0,12,0x14,0,0,0,6,0
671         .long           0
672 ___
673 #########################################################################
674 {{      # Optimized CBC decrypt procedure                               #
675 my $key_="r11";
676 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
677     $x00=0 if ($flavour =~ /osx/);
678 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
679 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
680 my $rndkey0="v23";      # v24-v25 rotating buffer for first found keys
681                         # v26-v31 last 6 round keys
682 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
683
684 $code.=<<___;
685 .align  5
686 _aesp8_cbc_decrypt8x:
687         $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
688         li              r10,`$FRAME+8*16+15`
689         li              r11,`$FRAME+8*16+31`
690         stvx            v20,r10,$sp             # ABI says so
691         addi            r10,r10,32
692         stvx            v21,r11,$sp
693         addi            r11,r11,32
694         stvx            v22,r10,$sp
695         addi            r10,r10,32
696         stvx            v23,r11,$sp
697         addi            r11,r11,32
698         stvx            v24,r10,$sp
699         addi            r10,r10,32
700         stvx            v25,r11,$sp
701         addi            r11,r11,32
702         stvx            v26,r10,$sp
703         addi            r10,r10,32
704         stvx            v27,r11,$sp
705         addi            r11,r11,32
706         stvx            v28,r10,$sp
707         addi            r10,r10,32
708         stvx            v29,r11,$sp
709         addi            r11,r11,32
710         stvx            v30,r10,$sp
711         stvx            v31,r11,$sp
712         li              r0,-1
713         stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
714         li              $x10,0x10
715         $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
716         li              $x20,0x20
717         $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
718         li              $x30,0x30
719         $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
720         li              $x40,0x40
721         $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
722         li              $x50,0x50
723         $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
724         li              $x60,0x60
725         $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
726         li              $x70,0x70
727         mtspr           256,r0
728
729         subi            $rounds,$rounds,3       # -4 in total
730         subi            $len,$len,128           # bias
731
732         lvx             $rndkey0,$x00,$key      # load key schedule
733         lvx             v30,$x10,$key
734         addi            $key,$key,0x20
735         lvx             v31,$x00,$key
736         ?vperm          $rndkey0,$rndkey0,v30,$keyperm
737         addi            $key_,$sp,$FRAME+15
738         mtctr           $rounds
739
740 Load_cbc_dec_key:
741         ?vperm          v24,v30,v31,$keyperm
742         lvx             v30,$x10,$key
743         addi            $key,$key,0x20
744         stvx            v24,$x00,$key_          # off-load round[1]
745         ?vperm          v25,v31,v30,$keyperm
746         lvx             v31,$x00,$key
747         stvx            v25,$x10,$key_          # off-load round[2]
748         addi            $key_,$key_,0x20
749         bdnz            Load_cbc_dec_key
750
751         lvx             v26,$x10,$key
752         ?vperm          v24,v30,v31,$keyperm
753         lvx             v27,$x20,$key
754         stvx            v24,$x00,$key_          # off-load round[3]
755         ?vperm          v25,v31,v26,$keyperm
756         lvx             v28,$x30,$key
757         stvx            v25,$x10,$key_          # off-load round[4]
758         addi            $key_,$sp,$FRAME+15     # rewind $key_
759         ?vperm          v26,v26,v27,$keyperm
760         lvx             v29,$x40,$key
761         ?vperm          v27,v27,v28,$keyperm
762         lvx             v30,$x50,$key
763         ?vperm          v28,v28,v29,$keyperm
764         lvx             v31,$x60,$key
765         ?vperm          v29,v29,v30,$keyperm
766         lvx             $out0,$x70,$key         # borrow $out0
767         ?vperm          v30,v30,v31,$keyperm
768         lvx             v24,$x00,$key_          # pre-load round[1]
769         ?vperm          v31,v31,$out0,$keyperm
770         lvx             v25,$x10,$key_          # pre-load round[2]
771
772         #lvx            $inptail,0,$inp         # "caller" already did this
773         #addi           $inp,$inp,15            # 15 is not typo
774         subi            $inp,$inp,15            # undo "caller"
775
776          le?li          $idx,8
777         lvx_u           $in0,$x00,$inp          # load first 8 "words"
778          le?lvsl        $inpperm,0,$idx
779          le?vspltisb    $tmp,0x0f
780         lvx_u           $in1,$x10,$inp
781          le?vxor        $inpperm,$inpperm,$tmp  # transform for lvx_u/stvx_u
782         lvx_u           $in2,$x20,$inp
783          le?vperm       $in0,$in0,$in0,$inpperm
784         lvx_u           $in3,$x30,$inp
785          le?vperm       $in1,$in1,$in1,$inpperm
786         lvx_u           $in4,$x40,$inp
787          le?vperm       $in2,$in2,$in2,$inpperm
788         vxor            $out0,$in0,$rndkey0
789         lvx_u           $in5,$x50,$inp
790          le?vperm       $in3,$in3,$in3,$inpperm
791         vxor            $out1,$in1,$rndkey0
792         lvx_u           $in6,$x60,$inp
793          le?vperm       $in4,$in4,$in4,$inpperm
794         vxor            $out2,$in2,$rndkey0
795         lvx_u           $in7,$x70,$inp
796         addi            $inp,$inp,0x80
797          le?vperm       $in5,$in5,$in5,$inpperm
798         vxor            $out3,$in3,$rndkey0
799          le?vperm       $in6,$in6,$in6,$inpperm
800         vxor            $out4,$in4,$rndkey0
801          le?vperm       $in7,$in7,$in7,$inpperm
802         vxor            $out5,$in5,$rndkey0
803         vxor            $out6,$in6,$rndkey0
804         vxor            $out7,$in7,$rndkey0
805
806         mtctr           $rounds
807         b               Loop_cbc_dec8x
808 .align  5
809 Loop_cbc_dec8x:
810         vncipher        $out0,$out0,v24
811         vncipher        $out1,$out1,v24
812         vncipher        $out2,$out2,v24
813         vncipher        $out3,$out3,v24
814         vncipher        $out4,$out4,v24
815         vncipher        $out5,$out5,v24
816         vncipher        $out6,$out6,v24
817         vncipher        $out7,$out7,v24
818         lvx             v24,$x20,$key_          # round[3]
819         addi            $key_,$key_,0x20
820
821         vncipher        $out0,$out0,v25
822         vncipher        $out1,$out1,v25
823         vncipher        $out2,$out2,v25
824         vncipher        $out3,$out3,v25
825         vncipher        $out4,$out4,v25
826         vncipher        $out5,$out5,v25
827         vncipher        $out6,$out6,v25
828         vncipher        $out7,$out7,v25
829         lvx             v25,$x10,$key_          # round[4]
830         bdnz            Loop_cbc_dec8x
831
832         subic           $len,$len,128           # $len-=128
833         vncipher        $out0,$out0,v24
834         vncipher        $out1,$out1,v24
835         vncipher        $out2,$out2,v24
836         vncipher        $out3,$out3,v24
837         vncipher        $out4,$out4,v24
838         vncipher        $out5,$out5,v24
839         vncipher        $out6,$out6,v24
840         vncipher        $out7,$out7,v24
841
842         subfe.          r0,r0,r0                # borrow?-1:0
843         vncipher        $out0,$out0,v25
844         vncipher        $out1,$out1,v25
845         vncipher        $out2,$out2,v25
846         vncipher        $out3,$out3,v25
847         vncipher        $out4,$out4,v25
848         vncipher        $out5,$out5,v25
849         vncipher        $out6,$out6,v25
850         vncipher        $out7,$out7,v25
851
852         and             r0,r0,$len
853         vncipher        $out0,$out0,v26
854         vncipher        $out1,$out1,v26
855         vncipher        $out2,$out2,v26
856         vncipher        $out3,$out3,v26
857         vncipher        $out4,$out4,v26
858         vncipher        $out5,$out5,v26
859         vncipher        $out6,$out6,v26
860         vncipher        $out7,$out7,v26
861
862         add             $inp,$inp,r0            # $inp is adjusted in such
863                                                 # way that at exit from the
864                                                 # loop inX-in7 are loaded
865                                                 # with last "words"
866         vncipher        $out0,$out0,v27
867         vncipher        $out1,$out1,v27
868         vncipher        $out2,$out2,v27
869         vncipher        $out3,$out3,v27
870         vncipher        $out4,$out4,v27
871         vncipher        $out5,$out5,v27
872         vncipher        $out6,$out6,v27
873         vncipher        $out7,$out7,v27
874
875         addi            $key_,$sp,$FRAME+15     # rewind $key_
876         vncipher        $out0,$out0,v28
877         vncipher        $out1,$out1,v28
878         vncipher        $out2,$out2,v28
879         vncipher        $out3,$out3,v28
880         vncipher        $out4,$out4,v28
881         vncipher        $out5,$out5,v28
882         vncipher        $out6,$out6,v28
883         vncipher        $out7,$out7,v28
884         lvx             v24,$x00,$key_          # re-pre-load round[1]
885
886         vncipher        $out0,$out0,v29
887         vncipher        $out1,$out1,v29
888         vncipher        $out2,$out2,v29
889         vncipher        $out3,$out3,v29
890         vncipher        $out4,$out4,v29
891         vncipher        $out5,$out5,v29
892         vncipher        $out6,$out6,v29
893         vncipher        $out7,$out7,v29
894         lvx             v25,$x10,$key_          # re-pre-load round[2]
895
896         vncipher        $out0,$out0,v30
897          vxor           $ivec,$ivec,v31         # xor with last round key
898         vncipher        $out1,$out1,v30
899          vxor           $in0,$in0,v31
900         vncipher        $out2,$out2,v30
901          vxor           $in1,$in1,v31
902         vncipher        $out3,$out3,v30
903          vxor           $in2,$in2,v31
904         vncipher        $out4,$out4,v30
905          vxor           $in3,$in3,v31
906         vncipher        $out5,$out5,v30
907          vxor           $in4,$in4,v31
908         vncipher        $out6,$out6,v30
909          vxor           $in5,$in5,v31
910         vncipher        $out7,$out7,v30
911          vxor           $in6,$in6,v31
912
913         vncipherlast    $out0,$out0,$ivec
914         vncipherlast    $out1,$out1,$in0
915          lvx_u          $in0,$x00,$inp          # load next input block
916         vncipherlast    $out2,$out2,$in1
917          lvx_u          $in1,$x10,$inp
918         vncipherlast    $out3,$out3,$in2
919          le?vperm       $in0,$in0,$in0,$inpperm
920          lvx_u          $in2,$x20,$inp
921         vncipherlast    $out4,$out4,$in3
922          le?vperm       $in1,$in1,$in1,$inpperm
923          lvx_u          $in3,$x30,$inp
924         vncipherlast    $out5,$out5,$in4
925          le?vperm       $in2,$in2,$in2,$inpperm
926          lvx_u          $in4,$x40,$inp
927         vncipherlast    $out6,$out6,$in5
928          le?vperm       $in3,$in3,$in3,$inpperm
929          lvx_u          $in5,$x50,$inp
930         vncipherlast    $out7,$out7,$in6
931          le?vperm       $in4,$in4,$in4,$inpperm
932          lvx_u          $in6,$x60,$inp
933         vmr             $ivec,$in7
934          le?vperm       $in5,$in5,$in5,$inpperm
935          lvx_u          $in7,$x70,$inp
936          addi           $inp,$inp,0x80
937
938         le?vperm        $out0,$out0,$out0,$inpperm
939         le?vperm        $out1,$out1,$out1,$inpperm
940         stvx_u          $out0,$x00,$out
941          le?vperm       $in6,$in6,$in6,$inpperm
942          vxor           $out0,$in0,$rndkey0
943         le?vperm        $out2,$out2,$out2,$inpperm
944         stvx_u          $out1,$x10,$out
945          le?vperm       $in7,$in7,$in7,$inpperm
946          vxor           $out1,$in1,$rndkey0
947         le?vperm        $out3,$out3,$out3,$inpperm
948         stvx_u          $out2,$x20,$out
949          vxor           $out2,$in2,$rndkey0
950         le?vperm        $out4,$out4,$out4,$inpperm
951         stvx_u          $out3,$x30,$out
952          vxor           $out3,$in3,$rndkey0
953         le?vperm        $out5,$out5,$out5,$inpperm
954         stvx_u          $out4,$x40,$out
955          vxor           $out4,$in4,$rndkey0
956         le?vperm        $out6,$out6,$out6,$inpperm
957         stvx_u          $out5,$x50,$out
958          vxor           $out5,$in5,$rndkey0
959         le?vperm        $out7,$out7,$out7,$inpperm
960         stvx_u          $out6,$x60,$out
961          vxor           $out6,$in6,$rndkey0
962         stvx_u          $out7,$x70,$out
963         addi            $out,$out,0x80
964          vxor           $out7,$in7,$rndkey0
965
966         mtctr           $rounds
967         beq             Loop_cbc_dec8x          # did $len-=128 borrow?
968
969         addic.          $len,$len,128
970         beq             Lcbc_dec8x_done
971         nop
972         nop
973
974 Loop_cbc_dec8x_tail:                            # up to 7 "words" tail...
975         vncipher        $out1,$out1,v24
976         vncipher        $out2,$out2,v24
977         vncipher        $out3,$out3,v24
978         vncipher        $out4,$out4,v24
979         vncipher        $out5,$out5,v24
980         vncipher        $out6,$out6,v24
981         vncipher        $out7,$out7,v24
982         lvx             v24,$x20,$key_          # round[3]
983         addi            $key_,$key_,0x20
984
985         vncipher        $out1,$out1,v25
986         vncipher        $out2,$out2,v25
987         vncipher        $out3,$out3,v25
988         vncipher        $out4,$out4,v25
989         vncipher        $out5,$out5,v25
990         vncipher        $out6,$out6,v25
991         vncipher        $out7,$out7,v25
992         lvx             v25,$x10,$key_          # round[4]
993         bdnz            Loop_cbc_dec8x_tail
994
995         vncipher        $out1,$out1,v24
996         vncipher        $out2,$out2,v24
997         vncipher        $out3,$out3,v24
998         vncipher        $out4,$out4,v24
999         vncipher        $out5,$out5,v24
1000         vncipher        $out6,$out6,v24
1001         vncipher        $out7,$out7,v24
1002
1003         vncipher        $out1,$out1,v25
1004         vncipher        $out2,$out2,v25
1005         vncipher        $out3,$out3,v25
1006         vncipher        $out4,$out4,v25
1007         vncipher        $out5,$out5,v25
1008         vncipher        $out6,$out6,v25
1009         vncipher        $out7,$out7,v25
1010
1011         vncipher        $out1,$out1,v26
1012         vncipher        $out2,$out2,v26
1013         vncipher        $out3,$out3,v26
1014         vncipher        $out4,$out4,v26
1015         vncipher        $out5,$out5,v26
1016         vncipher        $out6,$out6,v26
1017         vncipher        $out7,$out7,v26
1018
1019         vncipher        $out1,$out1,v27
1020         vncipher        $out2,$out2,v27
1021         vncipher        $out3,$out3,v27
1022         vncipher        $out4,$out4,v27
1023         vncipher        $out5,$out5,v27
1024         vncipher        $out6,$out6,v27
1025         vncipher        $out7,$out7,v27
1026
1027         vncipher        $out1,$out1,v28
1028         vncipher        $out2,$out2,v28
1029         vncipher        $out3,$out3,v28
1030         vncipher        $out4,$out4,v28
1031         vncipher        $out5,$out5,v28
1032         vncipher        $out6,$out6,v28
1033         vncipher        $out7,$out7,v28
1034
1035         vncipher        $out1,$out1,v29
1036         vncipher        $out2,$out2,v29
1037         vncipher        $out3,$out3,v29
1038         vncipher        $out4,$out4,v29
1039         vncipher        $out5,$out5,v29
1040         vncipher        $out6,$out6,v29
1041         vncipher        $out7,$out7,v29
1042
1043         vncipher        $out1,$out1,v30
1044          vxor           $ivec,$ivec,v31         # last round key
1045         vncipher        $out2,$out2,v30
1046          vxor           $in1,$in1,v31
1047         vncipher        $out3,$out3,v30
1048          vxor           $in2,$in2,v31
1049         vncipher        $out4,$out4,v30
1050          vxor           $in3,$in3,v31
1051         vncipher        $out5,$out5,v30
1052          vxor           $in4,$in4,v31
1053         vncipher        $out6,$out6,v30
1054          vxor           $in5,$in5,v31
1055         vncipher        $out7,$out7,v30
1056          vxor           $in6,$in6,v31
1057
1058         cmplwi          $len,32                 # switch($len)
1059         blt             Lcbc_dec8x_one
1060         nop
1061         beq             Lcbc_dec8x_two
1062         cmplwi          $len,64
1063         blt             Lcbc_dec8x_three
1064         nop
1065         beq             Lcbc_dec8x_four
1066         cmplwi          $len,96
1067         blt             Lcbc_dec8x_five
1068         nop
1069         beq             Lcbc_dec8x_six
1070
1071 Lcbc_dec8x_seven:
1072         vncipherlast    $out1,$out1,$ivec
1073         vncipherlast    $out2,$out2,$in1
1074         vncipherlast    $out3,$out3,$in2
1075         vncipherlast    $out4,$out4,$in3
1076         vncipherlast    $out5,$out5,$in4
1077         vncipherlast    $out6,$out6,$in5
1078         vncipherlast    $out7,$out7,$in6
1079         vmr             $ivec,$in7
1080
1081         le?vperm        $out1,$out1,$out1,$inpperm
1082         le?vperm        $out2,$out2,$out2,$inpperm
1083         stvx_u          $out1,$x00,$out
1084         le?vperm        $out3,$out3,$out3,$inpperm
1085         stvx_u          $out2,$x10,$out
1086         le?vperm        $out4,$out4,$out4,$inpperm
1087         stvx_u          $out3,$x20,$out
1088         le?vperm        $out5,$out5,$out5,$inpperm
1089         stvx_u          $out4,$x30,$out
1090         le?vperm        $out6,$out6,$out6,$inpperm
1091         stvx_u          $out5,$x40,$out
1092         le?vperm        $out7,$out7,$out7,$inpperm
1093         stvx_u          $out6,$x50,$out
1094         stvx_u          $out7,$x60,$out
1095         addi            $out,$out,0x70
1096         b               Lcbc_dec8x_done
1097
1098 .align  5
1099 Lcbc_dec8x_six:
1100         vncipherlast    $out2,$out2,$ivec
1101         vncipherlast    $out3,$out3,$in2
1102         vncipherlast    $out4,$out4,$in3
1103         vncipherlast    $out5,$out5,$in4
1104         vncipherlast    $out6,$out6,$in5
1105         vncipherlast    $out7,$out7,$in6
1106         vmr             $ivec,$in7
1107
1108         le?vperm        $out2,$out2,$out2,$inpperm
1109         le?vperm        $out3,$out3,$out3,$inpperm
1110         stvx_u          $out2,$x00,$out
1111         le?vperm        $out4,$out4,$out4,$inpperm
1112         stvx_u          $out3,$x10,$out
1113         le?vperm        $out5,$out5,$out5,$inpperm
1114         stvx_u          $out4,$x20,$out
1115         le?vperm        $out6,$out6,$out6,$inpperm
1116         stvx_u          $out5,$x30,$out
1117         le?vperm        $out7,$out7,$out7,$inpperm
1118         stvx_u          $out6,$x40,$out
1119         stvx_u          $out7,$x50,$out
1120         addi            $out,$out,0x60
1121         b               Lcbc_dec8x_done
1122
1123 .align  5
1124 Lcbc_dec8x_five:
1125         vncipherlast    $out3,$out3,$ivec
1126         vncipherlast    $out4,$out4,$in3
1127         vncipherlast    $out5,$out5,$in4
1128         vncipherlast    $out6,$out6,$in5
1129         vncipherlast    $out7,$out7,$in6
1130         vmr             $ivec,$in7
1131
1132         le?vperm        $out3,$out3,$out3,$inpperm
1133         le?vperm        $out4,$out4,$out4,$inpperm
1134         stvx_u          $out3,$x00,$out
1135         le?vperm        $out5,$out5,$out5,$inpperm
1136         stvx_u          $out4,$x10,$out
1137         le?vperm        $out6,$out6,$out6,$inpperm
1138         stvx_u          $out5,$x20,$out
1139         le?vperm        $out7,$out7,$out7,$inpperm
1140         stvx_u          $out6,$x30,$out
1141         stvx_u          $out7,$x40,$out
1142         addi            $out,$out,0x50
1143         b               Lcbc_dec8x_done
1144
1145 .align  5
1146 Lcbc_dec8x_four:
1147         vncipherlast    $out4,$out4,$ivec
1148         vncipherlast    $out5,$out5,$in4
1149         vncipherlast    $out6,$out6,$in5
1150         vncipherlast    $out7,$out7,$in6
1151         vmr             $ivec,$in7
1152
1153         le?vperm        $out4,$out4,$out4,$inpperm
1154         le?vperm        $out5,$out5,$out5,$inpperm
1155         stvx_u          $out4,$x00,$out
1156         le?vperm        $out6,$out6,$out6,$inpperm
1157         stvx_u          $out5,$x10,$out
1158         le?vperm        $out7,$out7,$out7,$inpperm
1159         stvx_u          $out6,$x20,$out
1160         stvx_u          $out7,$x30,$out
1161         addi            $out,$out,0x40
1162         b               Lcbc_dec8x_done
1163
1164 .align  5
1165 Lcbc_dec8x_three:
1166         vncipherlast    $out5,$out5,$ivec
1167         vncipherlast    $out6,$out6,$in5
1168         vncipherlast    $out7,$out7,$in6
1169         vmr             $ivec,$in7
1170
1171         le?vperm        $out5,$out5,$out5,$inpperm
1172         le?vperm        $out6,$out6,$out6,$inpperm
1173         stvx_u          $out5,$x00,$out
1174         le?vperm        $out7,$out7,$out7,$inpperm
1175         stvx_u          $out6,$x10,$out
1176         stvx_u          $out7,$x20,$out
1177         addi            $out,$out,0x30
1178         b               Lcbc_dec8x_done
1179
1180 .align  5
1181 Lcbc_dec8x_two:
1182         vncipherlast    $out6,$out6,$ivec
1183         vncipherlast    $out7,$out7,$in6
1184         vmr             $ivec,$in7
1185
1186         le?vperm        $out6,$out6,$out6,$inpperm
1187         le?vperm        $out7,$out7,$out7,$inpperm
1188         stvx_u          $out6,$x00,$out
1189         stvx_u          $out7,$x10,$out
1190         addi            $out,$out,0x20
1191         b               Lcbc_dec8x_done
1192
1193 .align  5
1194 Lcbc_dec8x_one:
1195         vncipherlast    $out7,$out7,$ivec
1196         vmr             $ivec,$in7
1197
1198         le?vperm        $out7,$out7,$out7,$inpperm
1199         stvx_u          $out7,0,$out
1200         addi            $out,$out,0x10
1201
1202 Lcbc_dec8x_done:
1203         le?vperm        $ivec,$ivec,$ivec,$inpperm
1204         stvx_u          $ivec,0,$ivp            # write [unaligned] iv
1205
1206         li              r10,`$FRAME+15`
1207         li              r11,`$FRAME+31`
1208         stvx            $inpperm,r10,$sp        # wipe copies of round keys
1209         addi            r10,r10,32
1210         stvx            $inpperm,r11,$sp
1211         addi            r11,r11,32
1212         stvx            $inpperm,r10,$sp
1213         addi            r10,r10,32
1214         stvx            $inpperm,r11,$sp
1215         addi            r11,r11,32
1216         stvx            $inpperm,r10,$sp
1217         addi            r10,r10,32
1218         stvx            $inpperm,r11,$sp
1219         addi            r11,r11,32
1220         stvx            $inpperm,r10,$sp
1221         addi            r10,r10,32
1222         stvx            $inpperm,r11,$sp
1223         addi            r11,r11,32
1224
1225         mtspr           256,$vrsave
1226         lvx             v20,r10,$sp             # ABI says so
1227         addi            r10,r10,32
1228         lvx             v21,r11,$sp
1229         addi            r11,r11,32
1230         lvx             v22,r10,$sp
1231         addi            r10,r10,32
1232         lvx             v23,r11,$sp
1233         addi            r11,r11,32
1234         lvx             v24,r10,$sp
1235         addi            r10,r10,32
1236         lvx             v25,r11,$sp
1237         addi            r11,r11,32
1238         lvx             v26,r10,$sp
1239         addi            r10,r10,32
1240         lvx             v27,r11,$sp
1241         addi            r11,r11,32
1242         lvx             v28,r10,$sp
1243         addi            r10,r10,32
1244         lvx             v29,r11,$sp
1245         addi            r11,r11,32
1246         lvx             v30,r10,$sp
1247         lvx             v31,r11,$sp
1248         $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1249         $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1250         $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1251         $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1252         $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1253         $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1254         addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1255         blr
1256         .long           0
1257         .byte           0,12,0x04,0,0x80,6,6,0
1258         .long           0
1259 .size   .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1260 ___
1261 }}      }}}
1262
1263 #########################################################################
1264 {{{     # CTR procedure[s]                                              #
1265 my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1266 my ($rndkey0,$rndkey1,$inout,$tmp)=             map("v$_",(0..3));
1267 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1268                                                 map("v$_",(4..11));
1269 my $dat=$tmp;
1270
1271 $code.=<<___;
1272 .globl  .${prefix}_ctr32_encrypt_blocks
1273 .align  5
1274 .${prefix}_ctr32_encrypt_blocks:
1275         ${UCMP}i        $len,1
1276         bltlr-
1277
1278         lis             r0,0xfff0
1279         mfspr           $vrsave,256
1280         mtspr           256,r0
1281
1282         li              $idx,15
1283         vxor            $rndkey0,$rndkey0,$rndkey0
1284         le?vspltisb     $tmp,0x0f
1285
1286         lvx             $ivec,0,$ivp            # load [unaligned] iv
1287         lvsl            $inpperm,0,$ivp
1288         lvx             $inptail,$idx,$ivp
1289          vspltisb       $one,1
1290         le?vxor         $inpperm,$inpperm,$tmp
1291         vperm           $ivec,$ivec,$inptail,$inpperm
1292          vsldoi         $one,$rndkey0,$one,1
1293
1294         neg             r11,$inp
1295         ?lvsl           $keyperm,0,$key         # prepare for unaligned key
1296         lwz             $rounds,240($key)
1297
1298         lvsr            $inpperm,0,r11          # prepare for unaligned load
1299         lvx             $inptail,0,$inp
1300         addi            $inp,$inp,15            # 15 is not typo
1301         le?vxor         $inpperm,$inpperm,$tmp
1302
1303         srwi            $rounds,$rounds,1
1304         li              $idx,16
1305         subi            $rounds,$rounds,1
1306
1307         ${UCMP}i        $len,8
1308         bge             _aesp8_ctr32_encrypt8x
1309
1310         ?lvsr           $outperm,0,$out         # prepare for unaligned store
1311         vspltisb        $outmask,-1
1312         lvx             $outhead,0,$out
1313         ?vperm          $outmask,$rndkey0,$outmask,$outperm
1314         le?vxor         $outperm,$outperm,$tmp
1315
1316         lvx             $rndkey0,0,$key
1317         mtctr           $rounds
1318         lvx             $rndkey1,$idx,$key
1319         addi            $idx,$idx,16
1320         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
1321         vxor            $inout,$ivec,$rndkey0
1322         lvx             $rndkey0,$idx,$key
1323         addi            $idx,$idx,16
1324         b               Loop_ctr32_enc
1325
1326 .align  5
1327 Loop_ctr32_enc:
1328         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
1329         vcipher         $inout,$inout,$rndkey1
1330         lvx             $rndkey1,$idx,$key
1331         addi            $idx,$idx,16
1332         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
1333         vcipher         $inout,$inout,$rndkey0
1334         lvx             $rndkey0,$idx,$key
1335         addi            $idx,$idx,16
1336         bdnz            Loop_ctr32_enc
1337
1338         vadduwm         $ivec,$ivec,$one
1339          vmr            $dat,$inptail
1340          lvx            $inptail,0,$inp
1341          addi           $inp,$inp,16
1342          subic.         $len,$len,1             # blocks--
1343
1344         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
1345         vcipher         $inout,$inout,$rndkey1
1346         lvx             $rndkey1,$idx,$key
1347          vperm          $dat,$dat,$inptail,$inpperm
1348          li             $idx,16
1349         ?vperm          $rndkey1,$rndkey0,$rndkey1,$keyperm
1350          lvx            $rndkey0,0,$key
1351         vxor            $dat,$dat,$rndkey1      # last round key
1352         vcipherlast     $inout,$inout,$dat
1353
1354          lvx            $rndkey1,$idx,$key
1355          addi           $idx,$idx,16
1356         vperm           $inout,$inout,$inout,$outperm
1357         vsel            $dat,$outhead,$inout,$outmask
1358          mtctr          $rounds
1359          ?vperm         $rndkey0,$rndkey0,$rndkey1,$keyperm
1360         vmr             $outhead,$inout
1361          vxor           $inout,$ivec,$rndkey0
1362          lvx            $rndkey0,$idx,$key
1363          addi           $idx,$idx,16
1364         stvx            $dat,0,$out
1365         addi            $out,$out,16
1366         bne             Loop_ctr32_enc
1367
1368         addi            $out,$out,-1
1369         lvx             $inout,0,$out           # redundant in aligned case
1370         vsel            $inout,$outhead,$inout,$outmask
1371         stvx            $inout,0,$out
1372
1373         mtspr           256,$vrsave
1374         blr
1375         .long           0
1376         .byte           0,12,0x14,0,0,0,6,0
1377         .long           0
1378 ___
1379 #########################################################################
1380 {{      # Optimized CTR procedure                                       #
1381 my $key_="r11";
1382 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1383     $x00=0 if ($flavour =~ /osx/);
1384 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1385 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1386 my $rndkey0="v23";      # v24-v25 rotating buffer for first found keys
1387                         # v26-v31 last 6 round keys
1388 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1389 my ($two,$three,$four)=($outhead,$outperm,$outmask);
1390
1391 $code.=<<___;
1392 .align  5
1393 _aesp8_ctr32_encrypt8x:
1394         $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1395         li              r10,`$FRAME+8*16+15`
1396         li              r11,`$FRAME+8*16+31`
1397         stvx            v20,r10,$sp             # ABI says so
1398         addi            r10,r10,32
1399         stvx            v21,r11,$sp
1400         addi            r11,r11,32
1401         stvx            v22,r10,$sp
1402         addi            r10,r10,32
1403         stvx            v23,r11,$sp
1404         addi            r11,r11,32
1405         stvx            v24,r10,$sp
1406         addi            r10,r10,32
1407         stvx            v25,r11,$sp
1408         addi            r11,r11,32
1409         stvx            v26,r10,$sp
1410         addi            r10,r10,32
1411         stvx            v27,r11,$sp
1412         addi            r11,r11,32
1413         stvx            v28,r10,$sp
1414         addi            r10,r10,32
1415         stvx            v29,r11,$sp
1416         addi            r11,r11,32
1417         stvx            v30,r10,$sp
1418         stvx            v31,r11,$sp
1419         li              r0,-1
1420         stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
1421         li              $x10,0x10
1422         $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1423         li              $x20,0x20
1424         $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1425         li              $x30,0x30
1426         $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1427         li              $x40,0x40
1428         $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1429         li              $x50,0x50
1430         $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1431         li              $x60,0x60
1432         $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1433         li              $x70,0x70
1434         mtspr           256,r0
1435
1436         subi            $rounds,$rounds,3       # -4 in total
1437
1438         lvx             $rndkey0,$x00,$key      # load key schedule
1439         lvx             v30,$x10,$key
1440         addi            $key,$key,0x20
1441         lvx             v31,$x00,$key
1442         ?vperm          $rndkey0,$rndkey0,v30,$keyperm
1443         addi            $key_,$sp,$FRAME+15
1444         mtctr           $rounds
1445
1446 Load_ctr32_enc_key:
1447         ?vperm          v24,v30,v31,$keyperm
1448         lvx             v30,$x10,$key
1449         addi            $key,$key,0x20
1450         stvx            v24,$x00,$key_          # off-load round[1]
1451         ?vperm          v25,v31,v30,$keyperm
1452         lvx             v31,$x00,$key
1453         stvx            v25,$x10,$key_          # off-load round[2]
1454         addi            $key_,$key_,0x20
1455         bdnz            Load_ctr32_enc_key
1456
1457         lvx             v26,$x10,$key
1458         ?vperm          v24,v30,v31,$keyperm
1459         lvx             v27,$x20,$key
1460         stvx            v24,$x00,$key_          # off-load round[3]
1461         ?vperm          v25,v31,v26,$keyperm
1462         lvx             v28,$x30,$key
1463         stvx            v25,$x10,$key_          # off-load round[4]
1464         addi            $key_,$sp,$FRAME+15     # rewind $key_
1465         ?vperm          v26,v26,v27,$keyperm
1466         lvx             v29,$x40,$key
1467         ?vperm          v27,v27,v28,$keyperm
1468         lvx             v30,$x50,$key
1469         ?vperm          v28,v28,v29,$keyperm
1470         lvx             v31,$x60,$key
1471         ?vperm          v29,v29,v30,$keyperm
1472         lvx             $out0,$x70,$key         # borrow $out0
1473         ?vperm          v30,v30,v31,$keyperm
1474         lvx             v24,$x00,$key_          # pre-load round[1]
1475         ?vperm          v31,v31,$out0,$keyperm
1476         lvx             v25,$x10,$key_          # pre-load round[2]
1477
1478         vadduwm         $two,$one,$one
1479         subi            $inp,$inp,15            # undo "caller"
1480         $SHL            $len,$len,4
1481
1482         vadduwm         $out1,$ivec,$one        # counter values ...
1483         vadduwm         $out2,$ivec,$two
1484         vxor            $out0,$ivec,$rndkey0    # ... xored with rndkey[0]
1485          le?li          $idx,8
1486         vadduwm         $out3,$out1,$two
1487         vxor            $out1,$out1,$rndkey0
1488          le?lvsl        $inpperm,0,$idx
1489         vadduwm         $out4,$out2,$two
1490         vxor            $out2,$out2,$rndkey0
1491          le?vspltisb    $tmp,0x0f
1492         vadduwm         $out5,$out3,$two
1493         vxor            $out3,$out3,$rndkey0
1494          le?vxor        $inpperm,$inpperm,$tmp  # transform for lvx_u/stvx_u
1495         vadduwm         $out6,$out4,$two
1496         vxor            $out4,$out4,$rndkey0
1497         vadduwm         $out7,$out5,$two
1498         vxor            $out5,$out5,$rndkey0
1499         vadduwm         $ivec,$out6,$two        # next counter value
1500         vxor            $out6,$out6,$rndkey0
1501         vxor            $out7,$out7,$rndkey0
1502
1503         mtctr           $rounds
1504         b               Loop_ctr32_enc8x
1505 .align  5
1506 Loop_ctr32_enc8x:
1507         vcipher         $out0,$out0,v24
1508         vcipher         $out1,$out1,v24
1509         vcipher         $out2,$out2,v24
1510         vcipher         $out3,$out3,v24
1511         vcipher         $out4,$out4,v24
1512         vcipher         $out5,$out5,v24
1513         vcipher         $out6,$out6,v24
1514         vcipher         $out7,$out7,v24
1515 Loop_ctr32_enc8x_middle:
1516         lvx             v24,$x20,$key_          # round[3]
1517         addi            $key_,$key_,0x20
1518
1519         vcipher         $out0,$out0,v25
1520         vcipher         $out1,$out1,v25
1521         vcipher         $out2,$out2,v25
1522         vcipher         $out3,$out3,v25
1523         vcipher         $out4,$out4,v25
1524         vcipher         $out5,$out5,v25
1525         vcipher         $out6,$out6,v25
1526         vcipher         $out7,$out7,v25
1527         lvx             v25,$x10,$key_          # round[4]
1528         bdnz            Loop_ctr32_enc8x
1529
1530         subic           r11,$len,256            # $len-256, borrow $key_
1531         vcipher         $out0,$out0,v24
1532         vcipher         $out1,$out1,v24
1533         vcipher         $out2,$out2,v24
1534         vcipher         $out3,$out3,v24
1535         vcipher         $out4,$out4,v24
1536         vcipher         $out5,$out5,v24
1537         vcipher         $out6,$out6,v24
1538         vcipher         $out7,$out7,v24
1539
1540         subfe           r0,r0,r0                # borrow?-1:0
1541         vcipher         $out0,$out0,v25
1542         vcipher         $out1,$out1,v25
1543         vcipher         $out2,$out2,v25
1544         vcipher         $out3,$out3,v25
1545         vcipher         $out4,$out4,v25
1546         vcipher         $out5,$out5,v25
1547         vcipher         $out6,$out6,v25
1548         vcipher         $out7,$out7,v25
1549
1550         and             r0,r0,r11
1551         addi            $key_,$sp,$FRAME+15     # rewind $key_
1552         vcipher         $out0,$out0,v26
1553         vcipher         $out1,$out1,v26
1554         vcipher         $out2,$out2,v26
1555         vcipher         $out3,$out3,v26
1556         vcipher         $out4,$out4,v26
1557         vcipher         $out5,$out5,v26
1558         vcipher         $out6,$out6,v26
1559         vcipher         $out7,$out7,v26
1560         lvx             v24,$x00,$key_          # re-pre-load round[1]
1561
1562         subic           $len,$len,129           # $len-=129
1563         vcipher         $out0,$out0,v27
1564         addi            $len,$len,1             # $len-=128 really
1565         vcipher         $out1,$out1,v27
1566         vcipher         $out2,$out2,v27
1567         vcipher         $out3,$out3,v27
1568         vcipher         $out4,$out4,v27
1569         vcipher         $out5,$out5,v27
1570         vcipher         $out6,$out6,v27
1571         vcipher         $out7,$out7,v27
1572         lvx             v25,$x10,$key_          # re-pre-load round[2]
1573
1574         vcipher         $out0,$out0,v28
1575          lvx_u          $in0,$x00,$inp          # load input
1576         vcipher         $out1,$out1,v28
1577          lvx_u          $in1,$x10,$inp
1578         vcipher         $out2,$out2,v28
1579          lvx_u          $in2,$x20,$inp
1580         vcipher         $out3,$out3,v28
1581          lvx_u          $in3,$x30,$inp
1582         vcipher         $out4,$out4,v28
1583          lvx_u          $in4,$x40,$inp
1584         vcipher         $out5,$out5,v28
1585          lvx_u          $in5,$x50,$inp
1586         vcipher         $out6,$out6,v28
1587          lvx_u          $in6,$x60,$inp
1588         vcipher         $out7,$out7,v28
1589          lvx_u          $in7,$x70,$inp
1590          addi           $inp,$inp,0x80
1591
1592         vcipher         $out0,$out0,v29
1593          le?vperm       $in0,$in0,$in0,$inpperm
1594         vcipher         $out1,$out1,v29
1595          le?vperm       $in1,$in1,$in1,$inpperm
1596         vcipher         $out2,$out2,v29
1597          le?vperm       $in2,$in2,$in2,$inpperm
1598         vcipher         $out3,$out3,v29
1599          le?vperm       $in3,$in3,$in3,$inpperm
1600         vcipher         $out4,$out4,v29
1601          le?vperm       $in4,$in4,$in4,$inpperm
1602         vcipher         $out5,$out5,v29
1603          le?vperm       $in5,$in5,$in5,$inpperm
1604         vcipher         $out6,$out6,v29
1605          le?vperm       $in6,$in6,$in6,$inpperm
1606         vcipher         $out7,$out7,v29
1607          le?vperm       $in7,$in7,$in7,$inpperm
1608
1609         add             $inp,$inp,r0            # $inp is adjusted in such
1610                                                 # way that at exit from the
1611                                                 # loop inX-in7 are loaded
1612                                                 # with last "words"
1613         subfe.          r0,r0,r0                # borrow?-1:0
1614         vcipher         $out0,$out0,v30
1615          vxor           $in0,$in0,v31           # xor with last round key
1616         vcipher         $out1,$out1,v30
1617          vxor           $in1,$in1,v31
1618         vcipher         $out2,$out2,v30
1619          vxor           $in2,$in2,v31
1620         vcipher         $out3,$out3,v30
1621          vxor           $in3,$in3,v31
1622         vcipher         $out4,$out4,v30
1623          vxor           $in4,$in4,v31
1624         vcipher         $out5,$out5,v30
1625          vxor           $in5,$in5,v31
1626         vcipher         $out6,$out6,v30
1627          vxor           $in6,$in6,v31
1628         vcipher         $out7,$out7,v30
1629          vxor           $in7,$in7,v31
1630
1631         bne             Lctr32_enc8x_break      # did $len-129 borrow?
1632
1633         vcipherlast     $in0,$out0,$in0
1634         vcipherlast     $in1,$out1,$in1
1635          vadduwm        $out1,$ivec,$one        # counter values ...
1636         vcipherlast     $in2,$out2,$in2
1637          vadduwm        $out2,$ivec,$two
1638          vxor           $out0,$ivec,$rndkey0    # ... xored with rndkey[0]
1639         vcipherlast     $in3,$out3,$in3
1640          vadduwm        $out3,$out1,$two
1641          vxor           $out1,$out1,$rndkey0
1642         vcipherlast     $in4,$out4,$in4
1643          vadduwm        $out4,$out2,$two
1644          vxor           $out2,$out2,$rndkey0
1645         vcipherlast     $in5,$out5,$in5
1646          vadduwm        $out5,$out3,$two
1647          vxor           $out3,$out3,$rndkey0
1648         vcipherlast     $in6,$out6,$in6
1649          vadduwm        $out6,$out4,$two
1650          vxor           $out4,$out4,$rndkey0
1651         vcipherlast     $in7,$out7,$in7
1652          vadduwm        $out7,$out5,$two
1653          vxor           $out5,$out5,$rndkey0
1654         le?vperm        $in0,$in0,$in0,$inpperm
1655          vadduwm        $ivec,$out6,$two        # next counter value
1656          vxor           $out6,$out6,$rndkey0
1657         le?vperm        $in1,$in1,$in1,$inpperm
1658          vxor           $out7,$out7,$rndkey0
1659         mtctr           $rounds
1660
1661          vcipher        $out0,$out0,v24
1662         stvx_u          $in0,$x00,$out
1663         le?vperm        $in2,$in2,$in2,$inpperm
1664          vcipher        $out1,$out1,v24
1665         stvx_u          $in1,$x10,$out
1666         le?vperm        $in3,$in3,$in3,$inpperm
1667          vcipher        $out2,$out2,v24
1668         stvx_u          $in2,$x20,$out
1669         le?vperm        $in4,$in4,$in4,$inpperm
1670          vcipher        $out3,$out3,v24
1671         stvx_u          $in3,$x30,$out
1672         le?vperm        $in5,$in5,$in5,$inpperm
1673          vcipher        $out4,$out4,v24
1674         stvx_u          $in4,$x40,$out
1675         le?vperm        $in6,$in6,$in6,$inpperm
1676          vcipher        $out5,$out5,v24
1677         stvx_u          $in5,$x50,$out
1678         le?vperm        $in7,$in7,$in7,$inpperm
1679          vcipher        $out6,$out6,v24
1680         stvx_u          $in6,$x60,$out
1681          vcipher        $out7,$out7,v24
1682         stvx_u          $in7,$x70,$out
1683         addi            $out,$out,0x80
1684
1685         b               Loop_ctr32_enc8x_middle
1686
1687 .align  5
1688 Lctr32_enc8x_break:
1689         cmpwi           $len,-0x60
1690         blt             Lctr32_enc8x_one
1691         nop
1692         beq             Lctr32_enc8x_two
1693         cmpwi           $len,-0x40
1694         blt             Lctr32_enc8x_three
1695         nop
1696         beq             Lctr32_enc8x_four
1697         cmpwi           $len,-0x20
1698         blt             Lctr32_enc8x_five
1699         nop
1700         beq             Lctr32_enc8x_six
1701         cmpwi           $len,0x00
1702         blt             Lctr32_enc8x_seven
1703
1704 Lctr32_enc8x_eight:
1705         vcipherlast     $out0,$out0,$in0
1706         vcipherlast     $out1,$out1,$in1
1707         vcipherlast     $out2,$out2,$in2
1708         vcipherlast     $out3,$out3,$in3
1709         vcipherlast     $out4,$out4,$in4
1710         vcipherlast     $out5,$out5,$in5
1711         vcipherlast     $out6,$out6,$in6
1712         vcipherlast     $out7,$out7,$in7
1713
1714         le?vperm        $out0,$out0,$out0,$inpperm
1715         le?vperm        $out1,$out1,$out1,$inpperm
1716         stvx_u          $out0,$x00,$out
1717         le?vperm        $out2,$out2,$out2,$inpperm
1718         stvx_u          $out1,$x10,$out
1719         le?vperm        $out3,$out3,$out3,$inpperm
1720         stvx_u          $out2,$x20,$out
1721         le?vperm        $out4,$out4,$out4,$inpperm
1722         stvx_u          $out3,$x30,$out
1723         le?vperm        $out5,$out5,$out5,$inpperm
1724         stvx_u          $out4,$x40,$out
1725         le?vperm        $out6,$out6,$out6,$inpperm
1726         stvx_u          $out5,$x50,$out
1727         le?vperm        $out7,$out7,$out7,$inpperm
1728         stvx_u          $out6,$x60,$out
1729         stvx_u          $out7,$x70,$out
1730         addi            $out,$out,0x80
1731         b               Lctr32_enc8x_done
1732
1733 .align  5
1734 Lctr32_enc8x_seven:
1735         vcipherlast     $out0,$out0,$in1
1736         vcipherlast     $out1,$out1,$in2
1737         vcipherlast     $out2,$out2,$in3
1738         vcipherlast     $out3,$out3,$in4
1739         vcipherlast     $out4,$out4,$in5
1740         vcipherlast     $out5,$out5,$in6
1741         vcipherlast     $out6,$out6,$in7
1742
1743         le?vperm        $out0,$out0,$out0,$inpperm
1744         le?vperm        $out1,$out1,$out1,$inpperm
1745         stvx_u          $out0,$x00,$out
1746         le?vperm        $out2,$out2,$out2,$inpperm
1747         stvx_u          $out1,$x10,$out
1748         le?vperm        $out3,$out3,$out3,$inpperm
1749         stvx_u          $out2,$x20,$out
1750         le?vperm        $out4,$out4,$out4,$inpperm
1751         stvx_u          $out3,$x30,$out
1752         le?vperm        $out5,$out5,$out5,$inpperm
1753         stvx_u          $out4,$x40,$out
1754         le?vperm        $out6,$out6,$out6,$inpperm
1755         stvx_u          $out5,$x50,$out
1756         stvx_u          $out6,$x60,$out
1757         addi            $out,$out,0x70
1758         b               Lctr32_enc8x_done
1759
1760 .align  5
1761 Lctr32_enc8x_six:
1762         vcipherlast     $out0,$out0,$in2
1763         vcipherlast     $out1,$out1,$in3
1764         vcipherlast     $out2,$out2,$in4
1765         vcipherlast     $out3,$out3,$in5
1766         vcipherlast     $out4,$out4,$in6
1767         vcipherlast     $out5,$out5,$in7
1768
1769         le?vperm        $out0,$out0,$out0,$inpperm
1770         le?vperm        $out1,$out1,$out1,$inpperm
1771         stvx_u          $out0,$x00,$out
1772         le?vperm        $out2,$out2,$out2,$inpperm
1773         stvx_u          $out1,$x10,$out
1774         le?vperm        $out3,$out3,$out3,$inpperm
1775         stvx_u          $out2,$x20,$out
1776         le?vperm        $out4,$out4,$out4,$inpperm
1777         stvx_u          $out3,$x30,$out
1778         le?vperm        $out5,$out5,$out5,$inpperm
1779         stvx_u          $out4,$x40,$out
1780         stvx_u          $out5,$x50,$out
1781         addi            $out,$out,0x60
1782         b               Lctr32_enc8x_done
1783
1784 .align  5
1785 Lctr32_enc8x_five:
1786         vcipherlast     $out0,$out0,$in3
1787         vcipherlast     $out1,$out1,$in4
1788         vcipherlast     $out2,$out2,$in5
1789         vcipherlast     $out3,$out3,$in6
1790         vcipherlast     $out4,$out4,$in7
1791
1792         le?vperm        $out0,$out0,$out0,$inpperm
1793         le?vperm        $out1,$out1,$out1,$inpperm
1794         stvx_u          $out0,$x00,$out
1795         le?vperm        $out2,$out2,$out2,$inpperm
1796         stvx_u          $out1,$x10,$out
1797         le?vperm        $out3,$out3,$out3,$inpperm
1798         stvx_u          $out2,$x20,$out
1799         le?vperm        $out4,$out4,$out4,$inpperm
1800         stvx_u          $out3,$x30,$out
1801         stvx_u          $out4,$x40,$out
1802         addi            $out,$out,0x50
1803         b               Lctr32_enc8x_done
1804
1805 .align  5
1806 Lctr32_enc8x_four:
1807         vcipherlast     $out0,$out0,$in4
1808         vcipherlast     $out1,$out1,$in5
1809         vcipherlast     $out2,$out2,$in6
1810         vcipherlast     $out3,$out3,$in7
1811
1812         le?vperm        $out0,$out0,$out0,$inpperm
1813         le?vperm        $out1,$out1,$out1,$inpperm
1814         stvx_u          $out0,$x00,$out
1815         le?vperm        $out2,$out2,$out2,$inpperm
1816         stvx_u          $out1,$x10,$out
1817         le?vperm        $out3,$out3,$out3,$inpperm
1818         stvx_u          $out2,$x20,$out
1819         stvx_u          $out3,$x30,$out
1820         addi            $out,$out,0x40
1821         b               Lctr32_enc8x_done
1822
1823 .align  5
1824 Lctr32_enc8x_three:
1825         vcipherlast     $out0,$out0,$in5
1826         vcipherlast     $out1,$out1,$in6
1827         vcipherlast     $out2,$out2,$in7
1828
1829         le?vperm        $out0,$out0,$out0,$inpperm
1830         le?vperm        $out1,$out1,$out1,$inpperm
1831         stvx_u          $out0,$x00,$out
1832         le?vperm        $out2,$out2,$out2,$inpperm
1833         stvx_u          $out1,$x10,$out
1834         stvx_u          $out2,$x20,$out
1835         addi            $out,$out,0x30
1836         b               Lctr32_enc8x_done
1837
1838 .align  5
1839 Lctr32_enc8x_two:
1840         vcipherlast     $out0,$out0,$in6
1841         vcipherlast     $out1,$out1,$in7
1842
1843         le?vperm        $out0,$out0,$out0,$inpperm
1844         le?vperm        $out1,$out1,$out1,$inpperm
1845         stvx_u          $out0,$x00,$out
1846         stvx_u          $out1,$x10,$out
1847         addi            $out,$out,0x20
1848         b               Lctr32_enc8x_done
1849
1850 .align  5
1851 Lctr32_enc8x_one:
1852         vcipherlast     $out0,$out0,$in7
1853
1854         le?vperm        $out0,$out0,$out0,$inpperm
1855         stvx_u          $out0,0,$out
1856         addi            $out,$out,0x10
1857
1858 Lctr32_enc8x_done:
1859         li              r10,`$FRAME+15`
1860         li              r11,`$FRAME+31`
1861         stvx            $inpperm,r10,$sp        # wipe copies of round keys
1862         addi            r10,r10,32
1863         stvx            $inpperm,r11,$sp
1864         addi            r11,r11,32
1865         stvx            $inpperm,r10,$sp
1866         addi            r10,r10,32
1867         stvx            $inpperm,r11,$sp
1868         addi            r11,r11,32
1869         stvx            $inpperm,r10,$sp
1870         addi            r10,r10,32
1871         stvx            $inpperm,r11,$sp
1872         addi            r11,r11,32
1873         stvx            $inpperm,r10,$sp
1874         addi            r10,r10,32
1875         stvx            $inpperm,r11,$sp
1876         addi            r11,r11,32
1877
1878         mtspr           256,$vrsave
1879         lvx             v20,r10,$sp             # ABI says so
1880         addi            r10,r10,32
1881         lvx             v21,r11,$sp
1882         addi            r11,r11,32
1883         lvx             v22,r10,$sp
1884         addi            r10,r10,32
1885         lvx             v23,r11,$sp
1886         addi            r11,r11,32
1887         lvx             v24,r10,$sp
1888         addi            r10,r10,32
1889         lvx             v25,r11,$sp
1890         addi            r11,r11,32
1891         lvx             v26,r10,$sp
1892         addi            r10,r10,32
1893         lvx             v27,r11,$sp
1894         addi            r11,r11,32
1895         lvx             v28,r10,$sp
1896         addi            r10,r10,32
1897         lvx             v29,r11,$sp
1898         addi            r11,r11,32
1899         lvx             v30,r10,$sp
1900         lvx             v31,r11,$sp
1901         $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1902         $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1903         $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1904         $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1905         $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1906         $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1907         addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1908         blr
1909         .long           0
1910         .byte           0,12,0x04,0,0x80,6,6,0
1911         .long           0
1912 .size   .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
1913 ___
1914 }}      }}}
1915
1916 #########################################################################
1917 {{{     # XTS procedures                                                #
1918 # int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len,   #
1919 #                             const AES_KEY *key1, const AES_KEY *key2, #
1920 #                             [const] unsigned char iv[16]);            #
1921 # If $key2 is NULL, then a "tweak chaining" mode is engaged, in which   #
1922 # input tweak value is assumed to be encrypted already, and last tweak  #
1923 # value, one suitable for consecutive call on same chunk of data, is    #
1924 # written back to original buffer. In addition, in "tweak chaining"     #
1925 # mode only complete input blocks are processed.                        #
1926
1927 my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) =     map("r$_",(3..10));
1928 my ($rndkey0,$rndkey1,$inout) =                         map("v$_",(0..2));
1929 my ($output,$inptail,$inpperm,$leperm,$keyperm) =       map("v$_",(3..7));
1930 my ($tweak,$seven,$eighty7,$tmp,$tweak1) =              map("v$_",(8..12));
1931 my $taillen = $key2;
1932
1933    ($inp,$idx) = ($idx,$inp);                           # reassign
1934
1935 $code.=<<___;
1936 .globl  .${prefix}_xts_encrypt
1937 .align  5
1938 .${prefix}_xts_encrypt:
1939         mr              $inp,r3                         # reassign
1940         li              r3,-1
1941         ${UCMP}i        $len,16
1942         bltlr-
1943
1944         lis             r0,0xfff0
1945         mfspr           r12,256                         # save vrsave
1946         li              r11,0
1947         mtspr           256,r0
1948
1949         vspltisb        $seven,0x07                     # 0x070707..07
1950         le?lvsl         $leperm,r11,r11
1951         le?vspltisb     $tmp,0x0f
1952         le?vxor         $leperm,$leperm,$seven
1953
1954         li              $idx,15
1955         lvx             $tweak,0,$ivp                   # load [unaligned] iv
1956         lvsl            $inpperm,0,$ivp
1957         lvx             $inptail,$idx,$ivp
1958         le?vxor         $inpperm,$inpperm,$tmp
1959         vperm           $tweak,$tweak,$inptail,$inpperm
1960
1961         neg             r11,$inp
1962         lvsr            $inpperm,0,r11                  # prepare for unaligned load
1963         lvx             $inout,0,$inp
1964         addi            $inp,$inp,15                    # 15 is not typo
1965         le?vxor         $inpperm,$inpperm,$tmp
1966
1967         ${UCMP}i        $key2,0                         # key2==NULL?
1968         beq             Lxts_enc_no_key2
1969
1970         ?lvsl           $keyperm,0,$key2                # prepare for unaligned key
1971         lwz             $rounds,240($key2)
1972         srwi            $rounds,$rounds,1
1973         subi            $rounds,$rounds,1
1974         li              $idx,16
1975
1976         lvx             $rndkey0,0,$key2
1977         lvx             $rndkey1,$idx,$key2
1978         addi            $idx,$idx,16
1979         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
1980         vxor            $tweak,$tweak,$rndkey0
1981         lvx             $rndkey0,$idx,$key2
1982         addi            $idx,$idx,16
1983         mtctr           $rounds
1984
1985 Ltweak_xts_enc:
1986         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
1987         vcipher         $tweak,$tweak,$rndkey1
1988         lvx             $rndkey1,$idx,$key2
1989         addi            $idx,$idx,16
1990         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
1991         vcipher         $tweak,$tweak,$rndkey0
1992         lvx             $rndkey0,$idx,$key2
1993         addi            $idx,$idx,16
1994         bdnz            Ltweak_xts_enc
1995
1996         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
1997         vcipher         $tweak,$tweak,$rndkey1
1998         lvx             $rndkey1,$idx,$key2
1999         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2000         vcipherlast     $tweak,$tweak,$rndkey0
2001
2002         li              $ivp,0                          # don't chain the tweak
2003         b               Lxts_enc
2004
2005 Lxts_enc_no_key2:
2006         li              $idx,-16
2007         and             $len,$len,$idx                  # in "tweak chaining"
2008                                                         # mode only complete
2009                                                         # blocks are processed
2010 Lxts_enc:
2011         lvx             $inptail,0,$inp
2012         addi            $inp,$inp,16
2013
2014         ?lvsl           $keyperm,0,$key1                # prepare for unaligned key
2015         lwz             $rounds,240($key1)
2016         srwi            $rounds,$rounds,1
2017         subi            $rounds,$rounds,1
2018         li              $idx,16
2019
2020         vslb            $eighty7,$seven,$seven          # 0x808080..80
2021         vor             $eighty7,$eighty7,$seven        # 0x878787..87
2022         vspltisb        $tmp,1                          # 0x010101..01
2023         vsldoi          $eighty7,$eighty7,$tmp,15       # 0x870101..01
2024
2025         ${UCMP}i        $len,96
2026         bge             _aesp8_xts_encrypt6x
2027
2028         andi.           $taillen,$len,15
2029         subic           r0,$len,32
2030         subi            $taillen,$taillen,16
2031         subfe           r0,r0,r0
2032         and             r0,r0,$taillen
2033         add             $inp,$inp,r0
2034
2035         lvx             $rndkey0,0,$key1
2036         lvx             $rndkey1,$idx,$key1
2037         addi            $idx,$idx,16
2038         vperm           $inout,$inout,$inptail,$inpperm
2039         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2040         vxor            $inout,$inout,$tweak
2041         vxor            $inout,$inout,$rndkey0
2042         lvx             $rndkey0,$idx,$key1
2043         addi            $idx,$idx,16
2044         mtctr           $rounds
2045         b               Loop_xts_enc
2046
2047 .align  5
2048 Loop_xts_enc:
2049         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2050         vcipher         $inout,$inout,$rndkey1
2051         lvx             $rndkey1,$idx,$key1
2052         addi            $idx,$idx,16
2053         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2054         vcipher         $inout,$inout,$rndkey0
2055         lvx             $rndkey0,$idx,$key1
2056         addi            $idx,$idx,16
2057         bdnz            Loop_xts_enc
2058
2059         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2060         vcipher         $inout,$inout,$rndkey1
2061         lvx             $rndkey1,$idx,$key1
2062         li              $idx,16
2063         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2064         vxor            $rndkey0,$rndkey0,$tweak
2065         vcipherlast     $output,$inout,$rndkey0
2066
2067         le?vperm        $tmp,$output,$output,$leperm
2068         be?nop
2069         le?stvx_u       $tmp,0,$out
2070         be?stvx_u       $output,0,$out
2071         addi            $out,$out,16
2072
2073         subic.          $len,$len,16
2074         beq             Lxts_enc_done
2075
2076         vmr             $inout,$inptail
2077         lvx             $inptail,0,$inp
2078         addi            $inp,$inp,16
2079         lvx             $rndkey0,0,$key1
2080         lvx             $rndkey1,$idx,$key1
2081         addi            $idx,$idx,16
2082
2083         subic           r0,$len,32
2084         subfe           r0,r0,r0
2085         and             r0,r0,$taillen
2086         add             $inp,$inp,r0
2087
2088         vsrab           $tmp,$tweak,$seven              # next tweak value
2089         vaddubm         $tweak,$tweak,$tweak
2090         vsldoi          $tmp,$tmp,$tmp,15
2091         vand            $tmp,$tmp,$eighty7
2092         vxor            $tweak,$tweak,$tmp
2093
2094         vperm           $inout,$inout,$inptail,$inpperm
2095         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2096         vxor            $inout,$inout,$tweak
2097         vxor            $output,$output,$rndkey0        # just in case $len<16
2098         vxor            $inout,$inout,$rndkey0
2099         lvx             $rndkey0,$idx,$key1
2100         addi            $idx,$idx,16
2101
2102         mtctr           $rounds
2103         ${UCMP}i        $len,16
2104         bge             Loop_xts_enc
2105
2106         vxor            $output,$output,$tweak
2107         lvsr            $inpperm,0,$len                 # $inpperm is no longer needed
2108         vxor            $inptail,$inptail,$inptail      # $inptail is no longer needed
2109         vspltisb        $tmp,-1
2110         vperm           $inptail,$inptail,$tmp,$inpperm
2111         vsel            $inout,$inout,$output,$inptail
2112
2113         subi            r11,$out,17
2114         subi            $out,$out,16
2115         mtctr           $len
2116         li              $len,16
2117 Loop_xts_enc_steal:
2118         lbzu            r0,1(r11)
2119         stb             r0,16(r11)
2120         bdnz            Loop_xts_enc_steal
2121
2122         mtctr           $rounds
2123         b               Loop_xts_enc                    # one more time...
2124
2125 Lxts_enc_done:
2126         ${UCMP}i        $ivp,0
2127         beq             Lxts_enc_ret
2128
2129         vsrab           $tmp,$tweak,$seven              # next tweak value
2130         vaddubm         $tweak,$tweak,$tweak
2131         vsldoi          $tmp,$tmp,$tmp,15
2132         vand            $tmp,$tmp,$eighty7
2133         vxor            $tweak,$tweak,$tmp
2134
2135         le?vperm        $tweak,$tweak,$tweak,$leperm
2136         stvx_u          $tweak,0,$ivp
2137
2138 Lxts_enc_ret:
2139         mtspr           256,r12                         # restore vrsave
2140         li              r3,0
2141         blr
2142         .long           0
2143         .byte           0,12,0x04,0,0x80,6,6,0
2144         .long           0
2145 .size   .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
2146
2147 .globl  .${prefix}_xts_decrypt
2148 .align  5
2149 .${prefix}_xts_decrypt:
2150         mr              $inp,r3                         # reassign
2151         li              r3,-1
2152         ${UCMP}i        $len,16
2153         bltlr-
2154
2155         lis             r0,0xfff8
2156         mfspr           r12,256                         # save vrsave
2157         li              r11,0
2158         mtspr           256,r0
2159
2160         andi.           r0,$len,15
2161         neg             r0,r0
2162         andi.           r0,r0,16
2163         sub             $len,$len,r0
2164
2165         vspltisb        $seven,0x07                     # 0x070707..07
2166         le?lvsl         $leperm,r11,r11
2167         le?vspltisb     $tmp,0x0f
2168         le?vxor         $leperm,$leperm,$seven
2169
2170         li              $idx,15
2171         lvx             $tweak,0,$ivp                   # load [unaligned] iv
2172         lvsl            $inpperm,0,$ivp
2173         lvx             $inptail,$idx,$ivp
2174         le?vxor         $inpperm,$inpperm,$tmp
2175         vperm           $tweak,$tweak,$inptail,$inpperm
2176
2177         neg             r11,$inp
2178         lvsr            $inpperm,0,r11                  # prepare for unaligned load
2179         lvx             $inout,0,$inp
2180         addi            $inp,$inp,15                    # 15 is not typo
2181         le?vxor         $inpperm,$inpperm,$tmp
2182
2183         ${UCMP}i        $key2,0                         # key2==NULL?
2184         beq             Lxts_dec_no_key2
2185
2186         ?lvsl           $keyperm,0,$key2                # prepare for unaligned key
2187         lwz             $rounds,240($key2)
2188         srwi            $rounds,$rounds,1
2189         subi            $rounds,$rounds,1
2190         li              $idx,16
2191
2192         lvx             $rndkey0,0,$key2
2193         lvx             $rndkey1,$idx,$key2
2194         addi            $idx,$idx,16
2195         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2196         vxor            $tweak,$tweak,$rndkey0
2197         lvx             $rndkey0,$idx,$key2
2198         addi            $idx,$idx,16
2199         mtctr           $rounds
2200
2201 Ltweak_xts_dec:
2202         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2203         vcipher         $tweak,$tweak,$rndkey1
2204         lvx             $rndkey1,$idx,$key2
2205         addi            $idx,$idx,16
2206         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2207         vcipher         $tweak,$tweak,$rndkey0
2208         lvx             $rndkey0,$idx,$key2
2209         addi            $idx,$idx,16
2210         bdnz            Ltweak_xts_dec
2211
2212         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2213         vcipher         $tweak,$tweak,$rndkey1
2214         lvx             $rndkey1,$idx,$key2
2215         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2216         vcipherlast     $tweak,$tweak,$rndkey0
2217
2218         li              $ivp,0                          # don't chain the tweak
2219         b               Lxts_dec
2220
2221 Lxts_dec_no_key2:
2222         neg             $idx,$len
2223         andi.           $idx,$idx,15
2224         add             $len,$len,$idx                  # in "tweak chaining"
2225                                                         # mode only complete
2226                                                         # blocks are processed
2227 Lxts_dec:
2228         lvx             $inptail,0,$inp
2229         addi            $inp,$inp,16
2230
2231         ?lvsl           $keyperm,0,$key1                # prepare for unaligned key
2232         lwz             $rounds,240($key1)
2233         srwi            $rounds,$rounds,1
2234         subi            $rounds,$rounds,1
2235         li              $idx,16
2236
2237         vslb            $eighty7,$seven,$seven          # 0x808080..80
2238         vor             $eighty7,$eighty7,$seven        # 0x878787..87
2239         vspltisb        $tmp,1                          # 0x010101..01
2240         vsldoi          $eighty7,$eighty7,$tmp,15       # 0x870101..01
2241
2242         ${UCMP}i        $len,96
2243         bge             _aesp8_xts_decrypt6x
2244
2245         lvx             $rndkey0,0,$key1
2246         lvx             $rndkey1,$idx,$key1
2247         addi            $idx,$idx,16
2248         vperm           $inout,$inout,$inptail,$inpperm
2249         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2250         vxor            $inout,$inout,$tweak
2251         vxor            $inout,$inout,$rndkey0
2252         lvx             $rndkey0,$idx,$key1
2253         addi            $idx,$idx,16
2254         mtctr           $rounds
2255
2256         ${UCMP}i        $len,16
2257         blt             Ltail_xts_dec
2258         be?b            Loop_xts_dec
2259
2260 .align  5
2261 Loop_xts_dec:
2262         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2263         vncipher        $inout,$inout,$rndkey1
2264         lvx             $rndkey1,$idx,$key1
2265         addi            $idx,$idx,16
2266         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2267         vncipher        $inout,$inout,$rndkey0
2268         lvx             $rndkey0,$idx,$key1
2269         addi            $idx,$idx,16
2270         bdnz            Loop_xts_dec
2271
2272         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2273         vncipher        $inout,$inout,$rndkey1
2274         lvx             $rndkey1,$idx,$key1
2275         li              $idx,16
2276         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2277         vxor            $rndkey0,$rndkey0,$tweak
2278         vncipherlast    $output,$inout,$rndkey0
2279
2280         le?vperm        $tmp,$output,$output,$leperm
2281         be?nop
2282         le?stvx_u       $tmp,0,$out
2283         be?stvx_u       $output,0,$out
2284         addi            $out,$out,16
2285
2286         subic.          $len,$len,16
2287         beq             Lxts_dec_done
2288
2289         vmr             $inout,$inptail
2290         lvx             $inptail,0,$inp
2291         addi            $inp,$inp,16
2292         lvx             $rndkey0,0,$key1
2293         lvx             $rndkey1,$idx,$key1
2294         addi            $idx,$idx,16
2295
2296         vsrab           $tmp,$tweak,$seven              # next tweak value
2297         vaddubm         $tweak,$tweak,$tweak
2298         vsldoi          $tmp,$tmp,$tmp,15
2299         vand            $tmp,$tmp,$eighty7
2300         vxor            $tweak,$tweak,$tmp
2301
2302         vperm           $inout,$inout,$inptail,$inpperm
2303         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2304         vxor            $inout,$inout,$tweak
2305         vxor            $inout,$inout,$rndkey0
2306         lvx             $rndkey0,$idx,$key1
2307         addi            $idx,$idx,16
2308
2309         mtctr           $rounds
2310         ${UCMP}i        $len,16
2311         bge             Loop_xts_dec
2312
2313 Ltail_xts_dec:
2314         vsrab           $tmp,$tweak,$seven              # next tweak value
2315         vaddubm         $tweak1,$tweak,$tweak
2316         vsldoi          $tmp,$tmp,$tmp,15
2317         vand            $tmp,$tmp,$eighty7
2318         vxor            $tweak1,$tweak1,$tmp
2319
2320         subi            $inp,$inp,16
2321         add             $inp,$inp,$len
2322
2323         vxor            $inout,$inout,$tweak            # :-(
2324         vxor            $inout,$inout,$tweak1           # :-)
2325
2326 Loop_xts_dec_short:
2327         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2328         vncipher        $inout,$inout,$rndkey1
2329         lvx             $rndkey1,$idx,$key1
2330         addi            $idx,$idx,16
2331         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2332         vncipher        $inout,$inout,$rndkey0
2333         lvx             $rndkey0,$idx,$key1
2334         addi            $idx,$idx,16
2335         bdnz            Loop_xts_dec_short
2336
2337         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2338         vncipher        $inout,$inout,$rndkey1
2339         lvx             $rndkey1,$idx,$key1
2340         li              $idx,16
2341         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2342         vxor            $rndkey0,$rndkey0,$tweak1
2343         vncipherlast    $output,$inout,$rndkey0
2344
2345         le?vperm        $tmp,$output,$output,$leperm
2346         be?nop
2347         le?stvx_u       $tmp,0,$out
2348         be?stvx_u       $output,0,$out
2349
2350         vmr             $inout,$inptail
2351         lvx             $inptail,0,$inp
2352         #addi           $inp,$inp,16
2353         lvx             $rndkey0,0,$key1
2354         lvx             $rndkey1,$idx,$key1
2355         addi            $idx,$idx,16
2356         vperm           $inout,$inout,$inptail,$inpperm
2357         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2358
2359         lvsr            $inpperm,0,$len                 # $inpperm is no longer needed
2360         vxor            $inptail,$inptail,$inptail      # $inptail is no longer needed
2361         vspltisb        $tmp,-1
2362         vperm           $inptail,$inptail,$tmp,$inpperm
2363         vsel            $inout,$inout,$output,$inptail
2364
2365         vxor            $rndkey0,$rndkey0,$tweak
2366         vxor            $inout,$inout,$rndkey0
2367         lvx             $rndkey0,$idx,$key1
2368         addi            $idx,$idx,16
2369
2370         subi            r11,$out,1
2371         mtctr           $len
2372         li              $len,16
2373 Loop_xts_dec_steal:
2374         lbzu            r0,1(r11)
2375         stb             r0,16(r11)
2376         bdnz            Loop_xts_dec_steal
2377
2378         mtctr           $rounds
2379         b               Loop_xts_dec                    # one more time...
2380
2381 Lxts_dec_done:
2382         ${UCMP}i        $ivp,0
2383         beq             Lxts_dec_ret
2384
2385         vsrab           $tmp,$tweak,$seven              # next tweak value
2386         vaddubm         $tweak,$tweak,$tweak
2387         vsldoi          $tmp,$tmp,$tmp,15
2388         vand            $tmp,$tmp,$eighty7
2389         vxor            $tweak,$tweak,$tmp
2390
2391         le?vperm        $tweak,$tweak,$tweak,$leperm
2392         stvx_u          $tweak,0,$ivp
2393
2394 Lxts_dec_ret:
2395         mtspr           256,r12                         # restore vrsave
2396         li              r3,0
2397         blr
2398         .long           0
2399         .byte           0,12,0x04,0,0x80,6,6,0
2400         .long           0
2401 .size   .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
2402 ___
2403 #########################################################################
2404 {{      # Optimized XTS procedures                                      #
2405 my $key_=$key2;
2406 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
2407     $x00=0 if ($flavour =~ /osx/);
2408 my ($in0,  $in1,  $in2,  $in3,  $in4,  $in5 )=map("v$_",(0..5));
2409 my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
2410 my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
2411 my $rndkey0="v23";      # v24-v25 rotating buffer for first found keys
2412                         # v26-v31 last 6 round keys
2413 my ($keyperm)=($out0);  # aliases with "caller", redundant assignment
2414 my $taillen=$x70;
2415
2416 $code.=<<___;
2417 .align  5
2418 _aesp8_xts_encrypt6x:
2419         $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
2420         mflr            r11
2421         li              r7,`$FRAME+8*16+15`
2422         li              r3,`$FRAME+8*16+31`
2423         $PUSH           r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
2424         stvx            v20,r7,$sp              # ABI says so
2425         addi            r7,r7,32
2426         stvx            v21,r3,$sp
2427         addi            r3,r3,32
2428         stvx            v22,r7,$sp
2429         addi            r7,r7,32
2430         stvx            v23,r3,$sp
2431         addi            r3,r3,32
2432         stvx            v24,r7,$sp
2433         addi            r7,r7,32
2434         stvx            v25,r3,$sp
2435         addi            r3,r3,32
2436         stvx            v26,r7,$sp
2437         addi            r7,r7,32
2438         stvx            v27,r3,$sp
2439         addi            r3,r3,32
2440         stvx            v28,r7,$sp
2441         addi            r7,r7,32
2442         stvx            v29,r3,$sp
2443         addi            r3,r3,32
2444         stvx            v30,r7,$sp
2445         stvx            v31,r3,$sp
2446         li              r0,-1
2447         stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
2448         li              $x10,0x10
2449         $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2450         li              $x20,0x20
2451         $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2452         li              $x30,0x30
2453         $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2454         li              $x40,0x40
2455         $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2456         li              $x50,0x50
2457         $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2458         li              $x60,0x60
2459         $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2460         li              $x70,0x70
2461         mtspr           256,r0
2462
2463         subi            $rounds,$rounds,3       # -4 in total
2464
2465         lvx             $rndkey0,$x00,$key1     # load key schedule
2466         lvx             v30,$x10,$key1
2467         addi            $key1,$key1,0x20
2468         lvx             v31,$x00,$key1
2469         ?vperm          $rndkey0,$rndkey0,v30,$keyperm
2470         addi            $key_,$sp,$FRAME+15
2471         mtctr           $rounds
2472
2473 Load_xts_enc_key:
2474         ?vperm          v24,v30,v31,$keyperm
2475         lvx             v30,$x10,$key1
2476         addi            $key1,$key1,0x20
2477         stvx            v24,$x00,$key_          # off-load round[1]
2478         ?vperm          v25,v31,v30,$keyperm
2479         lvx             v31,$x00,$key1
2480         stvx            v25,$x10,$key_          # off-load round[2]
2481         addi            $key_,$key_,0x20
2482         bdnz            Load_xts_enc_key
2483
2484         lvx             v26,$x10,$key1
2485         ?vperm          v24,v30,v31,$keyperm
2486         lvx             v27,$x20,$key1
2487         stvx            v24,$x00,$key_          # off-load round[3]
2488         ?vperm          v25,v31,v26,$keyperm
2489         lvx             v28,$x30,$key1
2490         stvx            v25,$x10,$key_          # off-load round[4]
2491         addi            $key_,$sp,$FRAME+15     # rewind $key_
2492         ?vperm          v26,v26,v27,$keyperm
2493         lvx             v29,$x40,$key1
2494         ?vperm          v27,v27,v28,$keyperm
2495         lvx             v30,$x50,$key1
2496         ?vperm          v28,v28,v29,$keyperm
2497         lvx             v31,$x60,$key1
2498         ?vperm          v29,v29,v30,$keyperm
2499         lvx             $twk5,$x70,$key1        # borrow $twk5
2500         ?vperm          v30,v30,v31,$keyperm
2501         lvx             v24,$x00,$key_          # pre-load round[1]
2502         ?vperm          v31,v31,$twk5,$keyperm
2503         lvx             v25,$x10,$key_          # pre-load round[2]
2504
2505          vperm          $in0,$inout,$inptail,$inpperm
2506          subi           $inp,$inp,31            # undo "caller"
2507         vxor            $twk0,$tweak,$rndkey0
2508         vsrab           $tmp,$tweak,$seven      # next tweak value
2509         vaddubm         $tweak,$tweak,$tweak
2510         vsldoi          $tmp,$tmp,$tmp,15
2511         vand            $tmp,$tmp,$eighty7
2512          vxor           $out0,$in0,$twk0
2513         vxor            $tweak,$tweak,$tmp
2514
2515          lvx_u          $in1,$x10,$inp
2516         vxor            $twk1,$tweak,$rndkey0
2517         vsrab           $tmp,$tweak,$seven      # next tweak value
2518         vaddubm         $tweak,$tweak,$tweak
2519         vsldoi          $tmp,$tmp,$tmp,15
2520          le?vperm       $in1,$in1,$in1,$leperm
2521         vand            $tmp,$tmp,$eighty7
2522          vxor           $out1,$in1,$twk1
2523         vxor            $tweak,$tweak,$tmp
2524
2525          lvx_u          $in2,$x20,$inp
2526          andi.          $taillen,$len,15
2527         vxor            $twk2,$tweak,$rndkey0
2528         vsrab           $tmp,$tweak,$seven      # next tweak value
2529         vaddubm         $tweak,$tweak,$tweak
2530         vsldoi          $tmp,$tmp,$tmp,15
2531          le?vperm       $in2,$in2,$in2,$leperm
2532         vand            $tmp,$tmp,$eighty7
2533          vxor           $out2,$in2,$twk2
2534         vxor            $tweak,$tweak,$tmp
2535
2536          lvx_u          $in3,$x30,$inp
2537          sub            $len,$len,$taillen
2538         vxor            $twk3,$tweak,$rndkey0
2539         vsrab           $tmp,$tweak,$seven      # next tweak value
2540         vaddubm         $tweak,$tweak,$tweak
2541         vsldoi          $tmp,$tmp,$tmp,15
2542          le?vperm       $in3,$in3,$in3,$leperm
2543         vand            $tmp,$tmp,$eighty7
2544          vxor           $out3,$in3,$twk3
2545         vxor            $tweak,$tweak,$tmp
2546
2547          lvx_u          $in4,$x40,$inp
2548          subi           $len,$len,0x60
2549         vxor            $twk4,$tweak,$rndkey0
2550         vsrab           $tmp,$tweak,$seven      # next tweak value
2551         vaddubm         $tweak,$tweak,$tweak
2552         vsldoi          $tmp,$tmp,$tmp,15
2553          le?vperm       $in4,$in4,$in4,$leperm
2554         vand            $tmp,$tmp,$eighty7
2555          vxor           $out4,$in4,$twk4
2556         vxor            $tweak,$tweak,$tmp
2557
2558          lvx_u          $in5,$x50,$inp
2559          addi           $inp,$inp,0x60
2560         vxor            $twk5,$tweak,$rndkey0
2561         vsrab           $tmp,$tweak,$seven      # next tweak value
2562         vaddubm         $tweak,$tweak,$tweak
2563         vsldoi          $tmp,$tmp,$tmp,15
2564          le?vperm       $in5,$in5,$in5,$leperm
2565         vand            $tmp,$tmp,$eighty7
2566          vxor           $out5,$in5,$twk5
2567         vxor            $tweak,$tweak,$tmp
2568
2569         vxor            v31,v31,$rndkey0
2570         mtctr           $rounds
2571         b               Loop_xts_enc6x
2572
2573 .align  5
2574 Loop_xts_enc6x:
2575         vcipher         $out0,$out0,v24
2576         vcipher         $out1,$out1,v24
2577         vcipher         $out2,$out2,v24
2578         vcipher         $out3,$out3,v24
2579         vcipher         $out4,$out4,v24
2580         vcipher         $out5,$out5,v24
2581         lvx             v24,$x20,$key_          # round[3]
2582         addi            $key_,$key_,0x20
2583
2584         vcipher         $out0,$out0,v25
2585         vcipher         $out1,$out1,v25
2586         vcipher         $out2,$out2,v25
2587         vcipher         $out3,$out3,v25
2588         vcipher         $out4,$out4,v25
2589         vcipher         $out5,$out5,v25
2590         lvx             v25,$x10,$key_          # round[4]
2591         bdnz            Loop_xts_enc6x
2592
2593         subic           $len,$len,96            # $len-=96
2594          vxor           $in0,$twk0,v31          # xor with last round key
2595         vcipher         $out0,$out0,v24
2596         vcipher         $out1,$out1,v24
2597          vsrab          $tmp,$tweak,$seven      # next tweak value
2598          vxor           $twk0,$tweak,$rndkey0
2599          vaddubm        $tweak,$tweak,$tweak
2600         vcipher         $out2,$out2,v24
2601         vcipher         $out3,$out3,v24
2602          vsldoi         $tmp,$tmp,$tmp,15
2603         vcipher         $out4,$out4,v24
2604         vcipher         $out5,$out5,v24
2605
2606         subfe.          r0,r0,r0                # borrow?-1:0
2607          vand           $tmp,$tmp,$eighty7
2608         vcipher         $out0,$out0,v25
2609         vcipher         $out1,$out1,v25
2610          vxor           $tweak,$tweak,$tmp
2611         vcipher         $out2,$out2,v25
2612         vcipher         $out3,$out3,v25
2613          vxor           $in1,$twk1,v31
2614          vsrab          $tmp,$tweak,$seven      # next tweak value
2615          vxor           $twk1,$tweak,$rndkey0
2616         vcipher         $out4,$out4,v25
2617         vcipher         $out5,$out5,v25
2618
2619         and             r0,r0,$len
2620          vaddubm        $tweak,$tweak,$tweak
2621          vsldoi         $tmp,$tmp,$tmp,15
2622         vcipher         $out0,$out0,v26
2623         vcipher         $out1,$out1,v26
2624          vand           $tmp,$tmp,$eighty7
2625         vcipher         $out2,$out2,v26
2626         vcipher         $out3,$out3,v26
2627          vxor           $tweak,$tweak,$tmp
2628         vcipher         $out4,$out4,v26
2629         vcipher         $out5,$out5,v26
2630
2631         add             $inp,$inp,r0            # $inp is adjusted in such
2632                                                 # way that at exit from the
2633                                                 # loop inX-in5 are loaded
2634                                                 # with last "words"
2635          vxor           $in2,$twk2,v31
2636          vsrab          $tmp,$tweak,$seven      # next tweak value
2637          vxor           $twk2,$tweak,$rndkey0
2638          vaddubm        $tweak,$tweak,$tweak
2639         vcipher         $out0,$out0,v27
2640         vcipher         $out1,$out1,v27
2641          vsldoi         $tmp,$tmp,$tmp,15
2642         vcipher         $out2,$out2,v27
2643         vcipher         $out3,$out3,v27
2644          vand           $tmp,$tmp,$eighty7
2645         vcipher         $out4,$out4,v27
2646         vcipher         $out5,$out5,v27
2647
2648         addi            $key_,$sp,$FRAME+15     # rewind $key_
2649          vxor           $tweak,$tweak,$tmp
2650         vcipher         $out0,$out0,v28
2651         vcipher         $out1,$out1,v28
2652          vxor           $in3,$twk3,v31
2653          vsrab          $tmp,$tweak,$seven      # next tweak value
2654          vxor           $twk3,$tweak,$rndkey0
2655         vcipher         $out2,$out2,v28
2656         vcipher         $out3,$out3,v28
2657          vaddubm        $tweak,$tweak,$tweak
2658          vsldoi         $tmp,$tmp,$tmp,15
2659         vcipher         $out4,$out4,v28
2660         vcipher         $out5,$out5,v28
2661         lvx             v24,$x00,$key_          # re-pre-load round[1]
2662          vand           $tmp,$tmp,$eighty7
2663
2664         vcipher         $out0,$out0,v29
2665         vcipher         $out1,$out1,v29
2666          vxor           $tweak,$tweak,$tmp
2667         vcipher         $out2,$out2,v29
2668         vcipher         $out3,$out3,v29
2669          vxor           $in4,$twk4,v31
2670          vsrab          $tmp,$tweak,$seven      # next tweak value
2671          vxor           $twk4,$tweak,$rndkey0
2672         vcipher         $out4,$out4,v29
2673         vcipher         $out5,$out5,v29
2674         lvx             v25,$x10,$key_          # re-pre-load round[2]
2675          vaddubm        $tweak,$tweak,$tweak
2676          vsldoi         $tmp,$tmp,$tmp,15
2677
2678         vcipher         $out0,$out0,v30
2679         vcipher         $out1,$out1,v30
2680          vand           $tmp,$tmp,$eighty7
2681         vcipher         $out2,$out2,v30
2682         vcipher         $out3,$out3,v30
2683          vxor           $tweak,$tweak,$tmp
2684         vcipher         $out4,$out4,v30
2685         vcipher         $out5,$out5,v30
2686          vxor           $in5,$twk5,v31
2687          vsrab          $tmp,$tweak,$seven      # next tweak value
2688          vxor           $twk5,$tweak,$rndkey0
2689
2690         vcipherlast     $out0,$out0,$in0
2691          lvx_u          $in0,$x00,$inp          # load next input block
2692          vaddubm        $tweak,$tweak,$tweak
2693          vsldoi         $tmp,$tmp,$tmp,15
2694         vcipherlast     $out1,$out1,$in1
2695          lvx_u          $in1,$x10,$inp
2696         vcipherlast     $out2,$out2,$in2
2697          le?vperm       $in0,$in0,$in0,$leperm
2698          lvx_u          $in2,$x20,$inp
2699          vand           $tmp,$tmp,$eighty7
2700         vcipherlast     $out3,$out3,$in3
2701          le?vperm       $in1,$in1,$in1,$leperm
2702          lvx_u          $in3,$x30,$inp
2703         vcipherlast     $out4,$out4,$in4
2704          le?vperm       $in2,$in2,$in2,$leperm
2705          lvx_u          $in4,$x40,$inp
2706          vxor           $tweak,$tweak,$tmp
2707         vcipherlast     $tmp,$out5,$in5         # last block might be needed
2708                                                 # in stealing mode
2709          le?vperm       $in3,$in3,$in3,$leperm
2710          lvx_u          $in5,$x50,$inp
2711          addi           $inp,$inp,0x60
2712          le?vperm       $in4,$in4,$in4,$leperm
2713          le?vperm       $in5,$in5,$in5,$leperm
2714
2715         le?vperm        $out0,$out0,$out0,$leperm
2716         le?vperm        $out1,$out1,$out1,$leperm
2717         stvx_u          $out0,$x00,$out         # store output
2718          vxor           $out0,$in0,$twk0
2719         le?vperm        $out2,$out2,$out2,$leperm
2720         stvx_u          $out1,$x10,$out
2721          vxor           $out1,$in1,$twk1
2722         le?vperm        $out3,$out3,$out3,$leperm
2723         stvx_u          $out2,$x20,$out
2724          vxor           $out2,$in2,$twk2
2725         le?vperm        $out4,$out4,$out4,$leperm
2726         stvx_u          $out3,$x30,$out
2727          vxor           $out3,$in3,$twk3
2728         le?vperm        $out5,$tmp,$tmp,$leperm
2729         stvx_u          $out4,$x40,$out
2730          vxor           $out4,$in4,$twk4
2731         le?stvx_u       $out5,$x50,$out
2732         be?stvx_u       $tmp, $x50,$out
2733          vxor           $out5,$in5,$twk5
2734         addi            $out,$out,0x60
2735
2736         mtctr           $rounds
2737         beq             Loop_xts_enc6x          # did $len-=96 borrow?
2738
2739         addic.          $len,$len,0x60
2740         beq             Lxts_enc6x_zero
2741         cmpwi           $len,0x20
2742         blt             Lxts_enc6x_one
2743         nop
2744         beq             Lxts_enc6x_two
2745         cmpwi           $len,0x40
2746         blt             Lxts_enc6x_three
2747         nop
2748         beq             Lxts_enc6x_four
2749
2750 Lxts_enc6x_five:
2751         vxor            $out0,$in1,$twk0
2752         vxor            $out1,$in2,$twk1
2753         vxor            $out2,$in3,$twk2
2754         vxor            $out3,$in4,$twk3
2755         vxor            $out4,$in5,$twk4
2756
2757         bl              _aesp8_xts_enc5x
2758
2759         le?vperm        $out0,$out0,$out0,$leperm
2760         vmr             $twk0,$twk5             # unused tweak
2761         le?vperm        $out1,$out1,$out1,$leperm
2762         stvx_u          $out0,$x00,$out         # store output
2763         le?vperm        $out2,$out2,$out2,$leperm
2764         stvx_u          $out1,$x10,$out
2765         le?vperm        $out3,$out3,$out3,$leperm
2766         stvx_u          $out2,$x20,$out
2767         vxor            $tmp,$out4,$twk5        # last block prep for stealing
2768         le?vperm        $out4,$out4,$out4,$leperm
2769         stvx_u          $out3,$x30,$out
2770         stvx_u          $out4,$x40,$out
2771         addi            $out,$out,0x50
2772         bne             Lxts_enc6x_steal
2773         b               Lxts_enc6x_done
2774
2775 .align  4
2776 Lxts_enc6x_four:
2777         vxor            $out0,$in2,$twk0
2778         vxor            $out1,$in3,$twk1
2779         vxor            $out2,$in4,$twk2
2780         vxor            $out3,$in5,$twk3
2781         vxor            $out4,$out4,$out4
2782
2783         bl              _aesp8_xts_enc5x
2784
2785         le?vperm        $out0,$out0,$out0,$leperm
2786         vmr             $twk0,$twk4             # unused tweak
2787         le?vperm        $out1,$out1,$out1,$leperm
2788         stvx_u          $out0,$x00,$out         # store output
2789         le?vperm        $out2,$out2,$out2,$leperm
2790         stvx_u          $out1,$x10,$out
2791         vxor            $tmp,$out3,$twk4        # last block prep for stealing
2792         le?vperm        $out3,$out3,$out3,$leperm
2793         stvx_u          $out2,$x20,$out
2794         stvx_u          $out3,$x30,$out
2795         addi            $out,$out,0x40
2796         bne             Lxts_enc6x_steal
2797         b               Lxts_enc6x_done
2798
2799 .align  4
2800 Lxts_enc6x_three:
2801         vxor            $out0,$in3,$twk0
2802         vxor            $out1,$in4,$twk1
2803         vxor            $out2,$in5,$twk2
2804         vxor            $out3,$out3,$out3
2805         vxor            $out4,$out4,$out4
2806
2807         bl              _aesp8_xts_enc5x
2808
2809         le?vperm        $out0,$out0,$out0,$leperm
2810         vmr             $twk0,$twk3             # unused tweak
2811         le?vperm        $out1,$out1,$out1,$leperm
2812         stvx_u          $out0,$x00,$out         # store output
2813         vxor            $tmp,$out2,$twk3        # last block prep for stealing
2814         le?vperm        $out2,$out2,$out2,$leperm
2815         stvx_u          $out1,$x10,$out
2816         stvx_u          $out2,$x20,$out
2817         addi            $out,$out,0x30
2818         bne             Lxts_enc6x_steal
2819         b               Lxts_enc6x_done
2820
2821 .align  4
2822 Lxts_enc6x_two:
2823         vxor            $out0,$in4,$twk0
2824         vxor            $out1,$in5,$twk1
2825         vxor            $out2,$out2,$out2
2826         vxor            $out3,$out3,$out3
2827         vxor            $out4,$out4,$out4
2828
2829         bl              _aesp8_xts_enc5x
2830
2831         le?vperm        $out0,$out0,$out0,$leperm
2832         vmr             $twk0,$twk2             # unused tweak
2833         vxor            $tmp,$out1,$twk2        # last block prep for stealing
2834         le?vperm        $out1,$out1,$out1,$leperm
2835         stvx_u          $out0,$x00,$out         # store output
2836         stvx_u          $out1,$x10,$out
2837         addi            $out,$out,0x20
2838         bne             Lxts_enc6x_steal
2839         b               Lxts_enc6x_done
2840
2841 .align  4
2842 Lxts_enc6x_one:
2843         vxor            $out0,$in5,$twk0
2844         nop
2845 Loop_xts_enc1x:
2846         vcipher         $out0,$out0,v24
2847         lvx             v24,$x20,$key_          # round[3]
2848         addi            $key_,$key_,0x20
2849
2850         vcipher         $out0,$out0,v25
2851         lvx             v25,$x10,$key_          # round[4]
2852         bdnz            Loop_xts_enc1x
2853
2854         add             $inp,$inp,$taillen
2855         cmpwi           $taillen,0
2856         vcipher         $out0,$out0,v24
2857
2858         subi            $inp,$inp,16
2859         vcipher         $out0,$out0,v25
2860
2861         lvsr            $inpperm,0,$taillen
2862         vcipher         $out0,$out0,v26
2863
2864         lvx_u           $in0,0,$inp
2865         vcipher         $out0,$out0,v27
2866
2867         addi            $key_,$sp,$FRAME+15     # rewind $key_
2868         vcipher         $out0,$out0,v28
2869         lvx             v24,$x00,$key_          # re-pre-load round[1]
2870
2871         vcipher         $out0,$out0,v29
2872         lvx             v25,$x10,$key_          # re-pre-load round[2]
2873          vxor           $twk0,$twk0,v31
2874
2875         le?vperm        $in0,$in0,$in0,$leperm
2876         vcipher         $out0,$out0,v30
2877
2878         vperm           $in0,$in0,$in0,$inpperm
2879         vcipherlast     $out0,$out0,$twk0
2880
2881         vmr             $twk0,$twk1             # unused tweak
2882         vxor            $tmp,$out0,$twk1        # last block prep for stealing
2883         le?vperm        $out0,$out0,$out0,$leperm
2884         stvx_u          $out0,$x00,$out         # store output
2885         addi            $out,$out,0x10
2886         bne             Lxts_enc6x_steal
2887         b               Lxts_enc6x_done
2888
2889 .align  4
2890 Lxts_enc6x_zero:
2891         cmpwi           $taillen,0
2892         beq             Lxts_enc6x_done
2893
2894         add             $inp,$inp,$taillen
2895         subi            $inp,$inp,16
2896         lvx_u           $in0,0,$inp
2897         lvsr            $inpperm,0,$taillen     # $in5 is no more
2898         le?vperm        $in0,$in0,$in0,$leperm
2899         vperm           $in0,$in0,$in0,$inpperm
2900         vxor            $tmp,$tmp,$twk0
2901 Lxts_enc6x_steal:
2902         vxor            $in0,$in0,$twk0
2903         vxor            $out0,$out0,$out0
2904         vspltisb        $out1,-1
2905         vperm           $out0,$out0,$out1,$inpperm
2906         vsel            $out0,$in0,$tmp,$out0   # $tmp is last block, remember?
2907
2908         subi            r30,$out,17
2909         subi            $out,$out,16
2910         mtctr           $taillen
2911 Loop_xts_enc6x_steal:
2912         lbzu            r0,1(r30)
2913         stb             r0,16(r30)
2914         bdnz            Loop_xts_enc6x_steal
2915
2916         li              $taillen,0
2917         mtctr           $rounds
2918         b               Loop_xts_enc1x          # one more time...
2919
2920 .align  4
2921 Lxts_enc6x_done:
2922         ${UCMP}i        $ivp,0
2923         beq             Lxts_enc6x_ret
2924
2925         vxor            $tweak,$twk0,$rndkey0
2926         le?vperm        $tweak,$tweak,$tweak,$leperm
2927         stvx_u          $tweak,0,$ivp
2928
2929 Lxts_enc6x_ret:
2930         mtlr            r11
2931         li              r10,`$FRAME+15`
2932         li              r11,`$FRAME+31`
2933         stvx            $seven,r10,$sp          # wipe copies of round keys
2934         addi            r10,r10,32
2935         stvx            $seven,r11,$sp
2936         addi            r11,r11,32
2937         stvx            $seven,r10,$sp
2938         addi            r10,r10,32
2939         stvx            $seven,r11,$sp
2940         addi            r11,r11,32
2941         stvx            $seven,r10,$sp
2942         addi            r10,r10,32
2943         stvx            $seven,r11,$sp
2944         addi            r11,r11,32
2945         stvx            $seven,r10,$sp
2946         addi            r10,r10,32
2947         stvx            $seven,r11,$sp
2948         addi            r11,r11,32
2949
2950         mtspr           256,$vrsave
2951         lvx             v20,r10,$sp             # ABI says so
2952         addi            r10,r10,32
2953         lvx             v21,r11,$sp
2954         addi            r11,r11,32
2955         lvx             v22,r10,$sp
2956         addi            r10,r10,32
2957         lvx             v23,r11,$sp
2958         addi            r11,r11,32
2959         lvx             v24,r10,$sp
2960         addi            r10,r10,32
2961         lvx             v25,r11,$sp
2962         addi            r11,r11,32
2963         lvx             v26,r10,$sp
2964         addi            r10,r10,32
2965         lvx             v27,r11,$sp
2966         addi            r11,r11,32
2967         lvx             v28,r10,$sp
2968         addi            r10,r10,32
2969         lvx             v29,r11,$sp
2970         addi            r11,r11,32
2971         lvx             v30,r10,$sp
2972         lvx             v31,r11,$sp
2973         $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2974         $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2975         $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2976         $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2977         $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2978         $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2979         addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
2980         blr
2981         .long           0
2982         .byte           0,12,0x04,1,0x80,6,6,0
2983         .long           0
2984
2985 .align  5
2986 _aesp8_xts_enc5x:
2987         vcipher         $out0,$out0,v24
2988         vcipher         $out1,$out1,v24
2989         vcipher         $out2,$out2,v24
2990         vcipher         $out3,$out3,v24
2991         vcipher         $out4,$out4,v24
2992         lvx             v24,$x20,$key_          # round[3]
2993         addi            $key_,$key_,0x20
2994
2995         vcipher         $out0,$out0,v25
2996         vcipher         $out1,$out1,v25
2997         vcipher         $out2,$out2,v25
2998         vcipher         $out3,$out3,v25
2999         vcipher         $out4,$out4,v25
3000         lvx             v25,$x10,$key_          # round[4]
3001         bdnz            _aesp8_xts_enc5x
3002
3003         add             $inp,$inp,$taillen
3004         cmpwi           $taillen,0
3005         vcipher         $out0,$out0,v24
3006         vcipher         $out1,$out1,v24
3007         vcipher         $out2,$out2,v24
3008         vcipher         $out3,$out3,v24
3009         vcipher         $out4,$out4,v24
3010
3011         subi            $inp,$inp,16
3012         vcipher         $out0,$out0,v25
3013         vcipher         $out1,$out1,v25
3014         vcipher         $out2,$out2,v25
3015         vcipher         $out3,$out3,v25
3016         vcipher         $out4,$out4,v25
3017          vxor           $twk0,$twk0,v31
3018
3019         vcipher         $out0,$out0,v26
3020         lvsr            $inpperm,0,$taillen     # $in5 is no more
3021         vcipher         $out1,$out1,v26
3022         vcipher         $out2,$out2,v26
3023         vcipher         $out3,$out3,v26
3024         vcipher         $out4,$out4,v26
3025          vxor           $in1,$twk1,v31
3026
3027         vcipher         $out0,$out0,v27
3028         lvx_u           $in0,0,$inp
3029         vcipher         $out1,$out1,v27
3030         vcipher         $out2,$out2,v27
3031         vcipher         $out3,$out3,v27
3032         vcipher         $out4,$out4,v27
3033          vxor           $in2,$twk2,v31
3034
3035         addi            $key_,$sp,$FRAME+15     # rewind $key_
3036         vcipher         $out0,$out0,v28
3037         vcipher         $out1,$out1,v28
3038         vcipher         $out2,$out2,v28
3039         vcipher         $out3,$out3,v28
3040         vcipher         $out4,$out4,v28
3041         lvx             v24,$x00,$key_          # re-pre-load round[1]
3042          vxor           $in3,$twk3,v31
3043
3044         vcipher         $out0,$out0,v29
3045         le?vperm        $in0,$in0,$in0,$leperm
3046         vcipher         $out1,$out1,v29
3047         vcipher         $out2,$out2,v29
3048         vcipher         $out3,$out3,v29
3049         vcipher         $out4,$out4,v29
3050         lvx             v25,$x10,$key_          # re-pre-load round[2]
3051          vxor           $in4,$twk4,v31
3052
3053         vcipher         $out0,$out0,v30
3054         vperm           $in0,$in0,$in0,$inpperm
3055         vcipher         $out1,$out1,v30
3056         vcipher         $out2,$out2,v30
3057         vcipher         $out3,$out3,v30
3058         vcipher         $out4,$out4,v30
3059
3060         vcipherlast     $out0,$out0,$twk0
3061         vcipherlast     $out1,$out1,$in1
3062         vcipherlast     $out2,$out2,$in2
3063         vcipherlast     $out3,$out3,$in3
3064         vcipherlast     $out4,$out4,$in4
3065         blr
3066         .long           0
3067         .byte           0,12,0x14,0,0,0,0,0
3068
3069 .align  5
3070 _aesp8_xts_decrypt6x:
3071         $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
3072         mflr            r11
3073         li              r7,`$FRAME+8*16+15`
3074         li              r3,`$FRAME+8*16+31`
3075         $PUSH           r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
3076         stvx            v20,r7,$sp              # ABI says so
3077         addi            r7,r7,32
3078         stvx            v21,r3,$sp
3079         addi            r3,r3,32
3080         stvx            v22,r7,$sp
3081         addi            r7,r7,32
3082         stvx            v23,r3,$sp
3083         addi            r3,r3,32
3084         stvx            v24,r7,$sp
3085         addi            r7,r7,32
3086         stvx            v25,r3,$sp
3087         addi            r3,r3,32
3088         stvx            v26,r7,$sp
3089         addi            r7,r7,32
3090         stvx            v27,r3,$sp
3091         addi            r3,r3,32
3092         stvx            v28,r7,$sp
3093         addi            r7,r7,32
3094         stvx            v29,r3,$sp
3095         addi            r3,r3,32
3096         stvx            v30,r7,$sp
3097         stvx            v31,r3,$sp
3098         li              r0,-1
3099         stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
3100         li              $x10,0x10
3101         $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3102         li              $x20,0x20
3103         $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3104         li              $x30,0x30
3105         $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3106         li              $x40,0x40
3107         $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3108         li              $x50,0x50
3109         $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3110         li              $x60,0x60
3111         $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3112         li              $x70,0x70
3113         mtspr           256,r0
3114
3115         subi            $rounds,$rounds,3       # -4 in total
3116
3117         lvx             $rndkey0,$x00,$key1     # load key schedule
3118         lvx             v30,$x10,$key1
3119         addi            $key1,$key1,0x20
3120         lvx             v31,$x00,$key1
3121         ?vperm          $rndkey0,$rndkey0,v30,$keyperm
3122         addi            $key_,$sp,$FRAME+15
3123         mtctr           $rounds
3124
3125 Load_xts_dec_key:
3126         ?vperm          v24,v30,v31,$keyperm
3127         lvx             v30,$x10,$key1
3128         addi            $key1,$key1,0x20
3129         stvx            v24,$x00,$key_          # off-load round[1]
3130         ?vperm          v25,v31,v30,$keyperm
3131         lvx             v31,$x00,$key1
3132         stvx            v25,$x10,$key_          # off-load round[2]
3133         addi            $key_,$key_,0x20
3134         bdnz            Load_xts_dec_key
3135
3136         lvx             v26,$x10,$key1
3137         ?vperm          v24,v30,v31,$keyperm
3138         lvx             v27,$x20,$key1
3139         stvx            v24,$x00,$key_          # off-load round[3]
3140         ?vperm          v25,v31,v26,$keyperm
3141         lvx             v28,$x30,$key1
3142         stvx            v25,$x10,$key_          # off-load round[4]
3143         addi            $key_,$sp,$FRAME+15     # rewind $key_
3144         ?vperm          v26,v26,v27,$keyperm
3145         lvx             v29,$x40,$key1
3146         ?vperm          v27,v27,v28,$keyperm
3147         lvx             v30,$x50,$key1
3148         ?vperm          v28,v28,v29,$keyperm
3149         lvx             v31,$x60,$key1
3150         ?vperm          v29,v29,v30,$keyperm
3151         lvx             $twk5,$x70,$key1        # borrow $twk5
3152         ?vperm          v30,v30,v31,$keyperm
3153         lvx             v24,$x00,$key_          # pre-load round[1]
3154         ?vperm          v31,v31,$twk5,$keyperm
3155         lvx             v25,$x10,$key_          # pre-load round[2]
3156
3157          vperm          $in0,$inout,$inptail,$inpperm
3158          subi           $inp,$inp,31            # undo "caller"
3159         vxor            $twk0,$tweak,$rndkey0
3160         vsrab           $tmp,$tweak,$seven      # next tweak value
3161         vaddubm         $tweak,$tweak,$tweak
3162         vsldoi          $tmp,$tmp,$tmp,15
3163         vand            $tmp,$tmp,$eighty7
3164          vxor           $out0,$in0,$twk0
3165         vxor            $tweak,$tweak,$tmp
3166
3167          lvx_u          $in1,$x10,$inp
3168         vxor            $twk1,$tweak,$rndkey0
3169         vsrab           $tmp,$tweak,$seven      # next tweak value
3170         vaddubm         $tweak,$tweak,$tweak
3171         vsldoi          $tmp,$tmp,$tmp,15
3172          le?vperm       $in1,$in1,$in1,$leperm
3173         vand            $tmp,$tmp,$eighty7
3174          vxor           $out1,$in1,$twk1
3175         vxor            $tweak,$tweak,$tmp
3176
3177          lvx_u          $in2,$x20,$inp
3178          andi.          $taillen,$len,15
3179         vxor            $twk2,$tweak,$rndkey0
3180         vsrab           $tmp,$tweak,$seven      # next tweak value
3181         vaddubm         $tweak,$tweak,$tweak
3182         vsldoi          $tmp,$tmp,$tmp,15
3183          le?vperm       $in2,$in2,$in2,$leperm
3184         vand            $tmp,$tmp,$eighty7
3185          vxor           $out2,$in2,$twk2
3186         vxor            $tweak,$tweak,$tmp
3187
3188          lvx_u          $in3,$x30,$inp
3189          sub            $len,$len,$taillen
3190         vxor            $twk3,$tweak,$rndkey0
3191         vsrab           $tmp,$tweak,$seven      # next tweak value
3192         vaddubm         $tweak,$tweak,$tweak
3193         vsldoi          $tmp,$tmp,$tmp,15
3194          le?vperm       $in3,$in3,$in3,$leperm
3195         vand            $tmp,$tmp,$eighty7
3196          vxor           $out3,$in3,$twk3
3197         vxor            $tweak,$tweak,$tmp
3198
3199          lvx_u          $in4,$x40,$inp
3200          subi           $len,$len,0x60
3201         vxor            $twk4,$tweak,$rndkey0
3202         vsrab           $tmp,$tweak,$seven      # next tweak value
3203         vaddubm         $tweak,$tweak,$tweak
3204         vsldoi          $tmp,$tmp,$tmp,15
3205          le?vperm       $in4,$in4,$in4,$leperm
3206         vand            $tmp,$tmp,$eighty7
3207          vxor           $out4,$in4,$twk4
3208         vxor            $tweak,$tweak,$tmp
3209
3210          lvx_u          $in5,$x50,$inp
3211          addi           $inp,$inp,0x60
3212         vxor            $twk5,$tweak,$rndkey0
3213         vsrab           $tmp,$tweak,$seven      # next tweak value
3214         vaddubm         $tweak,$tweak,$tweak
3215         vsldoi          $tmp,$tmp,$tmp,15
3216          le?vperm       $in5,$in5,$in5,$leperm
3217         vand            $tmp,$tmp,$eighty7
3218          vxor           $out5,$in5,$twk5
3219         vxor            $tweak,$tweak,$tmp
3220
3221         vxor            v31,v31,$rndkey0
3222         mtctr           $rounds
3223         b               Loop_xts_dec6x
3224
3225 .align  5
3226 Loop_xts_dec6x:
3227         vncipher        $out0,$out0,v24
3228         vncipher        $out1,$out1,v24
3229         vncipher        $out2,$out2,v24
3230         vncipher        $out3,$out3,v24
3231         vncipher        $out4,$out4,v24
3232         vncipher        $out5,$out5,v24
3233         lvx             v24,$x20,$key_          # round[3]
3234         addi            $key_,$key_,0x20
3235
3236         vncipher        $out0,$out0,v25
3237         vncipher        $out1,$out1,v25
3238         vncipher        $out2,$out2,v25
3239         vncipher        $out3,$out3,v25
3240         vncipher        $out4,$out4,v25
3241         vncipher        $out5,$out5,v25
3242         lvx             v25,$x10,$key_          # round[4]
3243         bdnz            Loop_xts_dec6x
3244
3245         subic           $len,$len,96            # $len-=96
3246          vxor           $in0,$twk0,v31          # xor with last round key
3247         vncipher        $out0,$out0,v24
3248         vncipher        $out1,$out1,v24
3249          vsrab          $tmp,$tweak,$seven      # next tweak value
3250          vxor           $twk0,$tweak,$rndkey0
3251          vaddubm        $tweak,$tweak,$tweak
3252         vncipher        $out2,$out2,v24
3253         vncipher        $out3,$out3,v24
3254          vsldoi         $tmp,$tmp,$tmp,15
3255         vncipher        $out4,$out4,v24
3256         vncipher        $out5,$out5,v24
3257
3258         subfe.          r0,r0,r0                # borrow?-1:0
3259          vand           $tmp,$tmp,$eighty7
3260         vncipher        $out0,$out0,v25
3261         vncipher        $out1,$out1,v25
3262          vxor           $tweak,$tweak,$tmp
3263         vncipher        $out2,$out2,v25
3264         vncipher        $out3,$out3,v25
3265          vxor           $in1,$twk1,v31
3266          vsrab          $tmp,$tweak,$seven      # next tweak value
3267          vxor           $twk1,$tweak,$rndkey0
3268         vncipher        $out4,$out4,v25
3269         vncipher        $out5,$out5,v25
3270
3271         and             r0,r0,$len
3272          vaddubm        $tweak,$tweak,$tweak
3273          vsldoi         $tmp,$tmp,$tmp,15
3274         vncipher        $out0,$out0,v26
3275         vncipher        $out1,$out1,v26
3276          vand           $tmp,$tmp,$eighty7
3277         vncipher        $out2,$out2,v26
3278         vncipher        $out3,$out3,v26
3279          vxor           $tweak,$tweak,$tmp
3280         vncipher        $out4,$out4,v26
3281         vncipher        $out5,$out5,v26
3282
3283         add             $inp,$inp,r0            # $inp is adjusted in such
3284                                                 # way that at exit from the
3285                                                 # loop inX-in5 are loaded
3286                                                 # with last "words"
3287          vxor           $in2,$twk2,v31
3288          vsrab          $tmp,$tweak,$seven      # next tweak value
3289          vxor           $twk2,$tweak,$rndkey0
3290          vaddubm        $tweak,$tweak,$tweak
3291         vncipher        $out0,$out0,v27
3292         vncipher        $out1,$out1,v27
3293          vsldoi         $tmp,$tmp,$tmp,15
3294         vncipher        $out2,$out2,v27
3295         vncipher        $out3,$out3,v27
3296          vand           $tmp,$tmp,$eighty7
3297         vncipher        $out4,$out4,v27
3298         vncipher        $out5,$out5,v27
3299
3300         addi            $key_,$sp,$FRAME+15     # rewind $key_
3301          vxor           $tweak,$tweak,$tmp
3302         vncipher        $out0,$out0,v28
3303         vncipher        $out1,$out1,v28
3304          vxor           $in3,$twk3,v31
3305          vsrab          $tmp,$tweak,$seven      # next tweak value
3306          vxor           $twk3,$tweak,$rndkey0
3307         vncipher        $out2,$out2,v28
3308         vncipher        $out3,$out3,v28
3309          vaddubm        $tweak,$tweak,$tweak
3310          vsldoi         $tmp,$tmp,$tmp,15
3311         vncipher        $out4,$out4,v28
3312         vncipher        $out5,$out5,v28
3313         lvx             v24,$x00,$key_          # re-pre-load round[1]
3314          vand           $tmp,$tmp,$eighty7
3315
3316         vncipher        $out0,$out0,v29
3317         vncipher        $out1,$out1,v29
3318          vxor           $tweak,$tweak,$tmp
3319         vncipher        $out2,$out2,v29
3320         vncipher        $out3,$out3,v29
3321          vxor           $in4,$twk4,v31
3322          vsrab          $tmp,$tweak,$seven      # next tweak value
3323          vxor           $twk4,$tweak,$rndkey0
3324         vncipher        $out4,$out4,v29
3325         vncipher        $out5,$out5,v29
3326         lvx             v25,$x10,$key_          # re-pre-load round[2]
3327          vaddubm        $tweak,$tweak,$tweak
3328          vsldoi         $tmp,$tmp,$tmp,15
3329
3330         vncipher        $out0,$out0,v30
3331         vncipher        $out1,$out1,v30
3332          vand           $tmp,$tmp,$eighty7
3333         vncipher        $out2,$out2,v30
3334         vncipher        $out3,$out3,v30
3335          vxor           $tweak,$tweak,$tmp
3336         vncipher        $out4,$out4,v30
3337         vncipher        $out5,$out5,v30
3338          vxor           $in5,$twk5,v31
3339          vsrab          $tmp,$tweak,$seven      # next tweak value
3340          vxor           $twk5,$tweak,$rndkey0
3341
3342         vncipherlast    $out0,$out0,$in0
3343          lvx_u          $in0,$x00,$inp          # load next input block
3344          vaddubm        $tweak,$tweak,$tweak
3345          vsldoi         $tmp,$tmp,$tmp,15
3346         vncipherlast    $out1,$out1,$in1
3347          lvx_u          $in1,$x10,$inp
3348         vncipherlast    $out2,$out2,$in2
3349          le?vperm       $in0,$in0,$in0,$leperm
3350          lvx_u          $in2,$x20,$inp
3351          vand           $tmp,$tmp,$eighty7
3352         vncipherlast    $out3,$out3,$in3
3353          le?vperm       $in1,$in1,$in1,$leperm
3354          lvx_u          $in3,$x30,$inp
3355         vncipherlast    $out4,$out4,$in4
3356          le?vperm       $in2,$in2,$in2,$leperm
3357          lvx_u          $in4,$x40,$inp
3358          vxor           $tweak,$tweak,$tmp
3359         vncipherlast    $out5,$out5,$in5
3360          le?vperm       $in3,$in3,$in3,$leperm
3361          lvx_u          $in5,$x50,$inp
3362          addi           $inp,$inp,0x60
3363          le?vperm       $in4,$in4,$in4,$leperm
3364          le?vperm       $in5,$in5,$in5,$leperm
3365
3366         le?vperm        $out0,$out0,$out0,$leperm
3367         le?vperm        $out1,$out1,$out1,$leperm
3368         stvx_u          $out0,$x00,$out         # store output
3369          vxor           $out0,$in0,$twk0
3370         le?vperm        $out2,$out2,$out2,$leperm
3371         stvx_u          $out1,$x10,$out
3372          vxor           $out1,$in1,$twk1
3373         le?vperm        $out3,$out3,$out3,$leperm
3374         stvx_u          $out2,$x20,$out
3375          vxor           $out2,$in2,$twk2
3376         le?vperm        $out4,$out4,$out4,$leperm
3377         stvx_u          $out3,$x30,$out
3378          vxor           $out3,$in3,$twk3
3379         le?vperm        $out5,$out5,$out5,$leperm
3380         stvx_u          $out4,$x40,$out
3381          vxor           $out4,$in4,$twk4
3382         stvx_u          $out5,$x50,$out
3383          vxor           $out5,$in5,$twk5
3384         addi            $out,$out,0x60
3385
3386         mtctr           $rounds
3387         beq             Loop_xts_dec6x          # did $len-=96 borrow?
3388
3389         addic.          $len,$len,0x60
3390         beq             Lxts_dec6x_zero
3391         cmpwi           $len,0x20
3392         blt             Lxts_dec6x_one
3393         nop
3394         beq             Lxts_dec6x_two
3395         cmpwi           $len,0x40
3396         blt             Lxts_dec6x_three
3397         nop
3398         beq             Lxts_dec6x_four
3399
3400 Lxts_dec6x_five:
3401         vxor            $out0,$in1,$twk0
3402         vxor            $out1,$in2,$twk1
3403         vxor            $out2,$in3,$twk2
3404         vxor            $out3,$in4,$twk3
3405         vxor            $out4,$in5,$twk4
3406
3407         bl              _aesp8_xts_dec5x
3408
3409         le?vperm        $out0,$out0,$out0,$leperm
3410         vmr             $twk0,$twk5             # unused tweak
3411         vxor            $twk1,$tweak,$rndkey0
3412         le?vperm        $out1,$out1,$out1,$leperm
3413         stvx_u          $out0,$x00,$out         # store output
3414         vxor            $out0,$in0,$twk1
3415         le?vperm        $out2,$out2,$out2,$leperm
3416         stvx_u          $out1,$x10,$out
3417         le?vperm        $out3,$out3,$out3,$leperm
3418         stvx_u          $out2,$x20,$out
3419         le?vperm        $out4,$out4,$out4,$leperm
3420         stvx_u          $out3,$x30,$out
3421         stvx_u          $out4,$x40,$out
3422         addi            $out,$out,0x50
3423         bne             Lxts_dec6x_steal
3424         b               Lxts_dec6x_done
3425
3426 .align  4
3427 Lxts_dec6x_four:
3428         vxor            $out0,$in2,$twk0
3429         vxor            $out1,$in3,$twk1
3430         vxor            $out2,$in4,$twk2
3431         vxor            $out3,$in5,$twk3
3432         vxor            $out4,$out4,$out4
3433
3434         bl              _aesp8_xts_dec5x
3435
3436         le?vperm        $out0,$out0,$out0,$leperm
3437         vmr             $twk0,$twk4             # unused tweak
3438         vmr             $twk1,$twk5
3439         le?vperm        $out1,$out1,$out1,$leperm
3440         stvx_u          $out0,$x00,$out         # store output
3441         vxor            $out0,$in0,$twk5
3442         le?vperm        $out2,$out2,$out2,$leperm
3443         stvx_u          $out1,$x10,$out
3444         le?vperm        $out3,$out3,$out3,$leperm
3445         stvx_u          $out2,$x20,$out
3446         stvx_u          $out3,$x30,$out
3447         addi            $out,$out,0x40
3448         bne             Lxts_dec6x_steal
3449         b               Lxts_dec6x_done
3450
3451 .align  4
3452 Lxts_dec6x_three:
3453         vxor            $out0,$in3,$twk0
3454         vxor            $out1,$in4,$twk1
3455         vxor            $out2,$in5,$twk2
3456         vxor            $out3,$out3,$out3
3457         vxor            $out4,$out4,$out4
3458
3459         bl              _aesp8_xts_dec5x
3460
3461         le?vperm        $out0,$out0,$out0,$leperm
3462         vmr             $twk0,$twk3             # unused tweak
3463         vmr             $twk1,$twk4
3464         le?vperm        $out1,$out1,$out1,$leperm
3465         stvx_u          $out0,$x00,$out         # store output
3466         vxor            $out0,$in0,$twk4
3467         le?vperm        $out2,$out2,$out2,$leperm
3468         stvx_u          $out1,$x10,$out
3469         stvx_u          $out2,$x20,$out
3470         addi            $out,$out,0x30
3471         bne             Lxts_dec6x_steal
3472         b               Lxts_dec6x_done
3473
3474 .align  4
3475 Lxts_dec6x_two:
3476         vxor            $out0,$in4,$twk0
3477         vxor            $out1,$in5,$twk1
3478         vxor            $out2,$out2,$out2
3479         vxor            $out3,$out3,$out3
3480         vxor            $out4,$out4,$out4
3481
3482         bl              _aesp8_xts_dec5x
3483
3484         le?vperm        $out0,$out0,$out0,$leperm
3485         vmr             $twk0,$twk2             # unused tweak
3486         vmr             $twk1,$twk3
3487         le?vperm        $out1,$out1,$out1,$leperm
3488         stvx_u          $out0,$x00,$out         # store output
3489         vxor            $out0,$in0,$twk3
3490         stvx_u          $out1,$x10,$out
3491         addi            $out,$out,0x20
3492         bne             Lxts_dec6x_steal
3493         b               Lxts_dec6x_done
3494
3495 .align  4
3496 Lxts_dec6x_one:
3497         vxor            $out0,$in5,$twk0
3498         nop
3499 Loop_xts_dec1x:
3500         vncipher        $out0,$out0,v24
3501         lvx             v24,$x20,$key_          # round[3]
3502         addi            $key_,$key_,0x20
3503
3504         vncipher        $out0,$out0,v25
3505         lvx             v25,$x10,$key_          # round[4]
3506         bdnz            Loop_xts_dec1x
3507
3508         subi            r0,$taillen,1
3509         vncipher        $out0,$out0,v24
3510
3511         andi.           r0,r0,16
3512         cmpwi           $taillen,0
3513         vncipher        $out0,$out0,v25
3514
3515         sub             $inp,$inp,r0
3516         vncipher        $out0,$out0,v26
3517
3518         lvx_u           $in0,0,$inp
3519         vncipher        $out0,$out0,v27
3520
3521         addi            $key_,$sp,$FRAME+15     # rewind $key_
3522         vncipher        $out0,$out0,v28
3523         lvx             v24,$x00,$key_          # re-pre-load round[1]
3524
3525         vncipher        $out0,$out0,v29
3526         lvx             v25,$x10,$key_          # re-pre-load round[2]
3527          vxor           $twk0,$twk0,v31
3528
3529         le?vperm        $in0,$in0,$in0,$leperm
3530         vncipher        $out0,$out0,v30
3531
3532         mtctr           $rounds
3533         vncipherlast    $out0,$out0,$twk0
3534
3535         vmr             $twk0,$twk1             # unused tweak
3536         vmr             $twk1,$twk2
3537         le?vperm        $out0,$out0,$out0,$leperm
3538         stvx_u          $out0,$x00,$out         # store output
3539         addi            $out,$out,0x10
3540         vxor            $out0,$in0,$twk2
3541         bne             Lxts_dec6x_steal
3542         b               Lxts_dec6x_done
3543
3544 .align  4
3545 Lxts_dec6x_zero:
3546         cmpwi           $taillen,0
3547         beq             Lxts_dec6x_done
3548
3549         lvx_u           $in0,0,$inp
3550         le?vperm        $in0,$in0,$in0,$leperm
3551         vxor            $out0,$in0,$twk1
3552 Lxts_dec6x_steal:
3553         vncipher        $out0,$out0,v24
3554         lvx             v24,$x20,$key_          # round[3]
3555         addi            $key_,$key_,0x20
3556
3557         vncipher        $out0,$out0,v25
3558         lvx             v25,$x10,$key_          # round[4]
3559         bdnz            Lxts_dec6x_steal
3560
3561         add             $inp,$inp,$taillen
3562         vncipher        $out0,$out0,v24
3563
3564         cmpwi           $taillen,0
3565         vncipher        $out0,$out0,v25
3566
3567         lvx_u           $in0,0,$inp
3568         vncipher        $out0,$out0,v26
3569
3570         lvsr            $inpperm,0,$taillen     # $in5 is no more
3571         vncipher        $out0,$out0,v27
3572
3573         addi            $key_,$sp,$FRAME+15     # rewind $key_
3574         vncipher        $out0,$out0,v28
3575         lvx             v24,$x00,$key_          # re-pre-load round[1]
3576
3577         vncipher        $out0,$out0,v29
3578         lvx             v25,$x10,$key_          # re-pre-load round[2]
3579          vxor           $twk1,$twk1,v31
3580
3581         le?vperm        $in0,$in0,$in0,$leperm
3582         vncipher        $out0,$out0,v30
3583
3584         vperm           $in0,$in0,$in0,$inpperm
3585         vncipherlast    $tmp,$out0,$twk1
3586
3587         le?vperm        $out0,$tmp,$tmp,$leperm
3588         le?stvx_u       $out0,0,$out
3589         be?stvx_u       $tmp,0,$out
3590
3591         vxor            $out0,$out0,$out0
3592         vspltisb        $out1,-1
3593         vperm           $out0,$out0,$out1,$inpperm
3594         vsel            $out0,$in0,$tmp,$out0
3595         vxor            $out0,$out0,$twk0
3596
3597         subi            r30,$out,1
3598         mtctr           $taillen
3599 Loop_xts_dec6x_steal:
3600         lbzu            r0,1(r30)
3601         stb             r0,16(r30)
3602         bdnz            Loop_xts_dec6x_steal
3603
3604         li              $taillen,0
3605         mtctr           $rounds
3606         b               Loop_xts_dec1x          # one more time...
3607
3608 .align  4
3609 Lxts_dec6x_done:
3610         ${UCMP}i        $ivp,0
3611         beq             Lxts_dec6x_ret
3612
3613         vxor            $tweak,$twk0,$rndkey0
3614         le?vperm        $tweak,$tweak,$tweak,$leperm
3615         stvx_u          $tweak,0,$ivp
3616
3617 Lxts_dec6x_ret:
3618         mtlr            r11
3619         li              r10,`$FRAME+15`
3620         li              r11,`$FRAME+31`
3621         stvx            $seven,r10,$sp          # wipe copies of round keys
3622         addi            r10,r10,32
3623         stvx            $seven,r11,$sp
3624         addi            r11,r11,32
3625         stvx            $seven,r10,$sp
3626         addi            r10,r10,32
3627         stvx            $seven,r11,$sp
3628         addi            r11,r11,32
3629         stvx            $seven,r10,$sp
3630         addi            r10,r10,32
3631         stvx            $seven,r11,$sp
3632         addi            r11,r11,32
3633         stvx            $seven,r10,$sp
3634         addi            r10,r10,32
3635         stvx            $seven,r11,$sp
3636         addi            r11,r11,32
3637
3638         mtspr           256,$vrsave
3639         lvx             v20,r10,$sp             # ABI says so
3640         addi            r10,r10,32
3641         lvx             v21,r11,$sp
3642         addi            r11,r11,32
3643         lvx             v22,r10,$sp
3644         addi            r10,r10,32
3645         lvx             v23,r11,$sp
3646         addi            r11,r11,32
3647         lvx             v24,r10,$sp
3648         addi            r10,r10,32
3649         lvx             v25,r11,$sp
3650         addi            r11,r11,32
3651         lvx             v26,r10,$sp
3652         addi            r10,r10,32
3653         lvx             v27,r11,$sp
3654         addi            r11,r11,32
3655         lvx             v28,r10,$sp
3656         addi            r10,r10,32
3657         lvx             v29,r11,$sp
3658         addi            r11,r11,32
3659         lvx             v30,r10,$sp
3660         lvx             v31,r11,$sp
3661         $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3662         $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3663         $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3664         $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3665         $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3666         $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3667         addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3668         blr
3669         .long           0
3670         .byte           0,12,0x04,1,0x80,6,6,0
3671         .long           0
3672
3673 .align  5
3674 _aesp8_xts_dec5x:
3675         vncipher        $out0,$out0,v24
3676         vncipher        $out1,$out1,v24
3677         vncipher        $out2,$out2,v24
3678         vncipher        $out3,$out3,v24
3679         vncipher        $out4,$out4,v24
3680         lvx             v24,$x20,$key_          # round[3]
3681         addi            $key_,$key_,0x20
3682
3683         vncipher        $out0,$out0,v25
3684         vncipher        $out1,$out1,v25
3685         vncipher        $out2,$out2,v25
3686         vncipher        $out3,$out3,v25
3687         vncipher        $out4,$out4,v25
3688         lvx             v25,$x10,$key_          # round[4]
3689         bdnz            _aesp8_xts_dec5x
3690
3691         subi            r0,$taillen,1
3692         vncipher        $out0,$out0,v24
3693         vncipher        $out1,$out1,v24
3694         vncipher        $out2,$out2,v24
3695         vncipher        $out3,$out3,v24
3696         vncipher        $out4,$out4,v24
3697
3698         andi.           r0,r0,16
3699         cmpwi           $taillen,0
3700         vncipher        $out0,$out0,v25
3701         vncipher        $out1,$out1,v25
3702         vncipher        $out2,$out2,v25
3703         vncipher        $out3,$out3,v25
3704         vncipher        $out4,$out4,v25
3705          vxor           $twk0,$twk0,v31
3706
3707         sub             $inp,$inp,r0
3708         vncipher        $out0,$out0,v26
3709         vncipher        $out1,$out1,v26
3710         vncipher        $out2,$out2,v26
3711         vncipher        $out3,$out3,v26
3712         vncipher        $out4,$out4,v26
3713          vxor           $in1,$twk1,v31
3714
3715         vncipher        $out0,$out0,v27
3716         lvx_u           $in0,0,$inp
3717         vncipher        $out1,$out1,v27
3718         vncipher        $out2,$out2,v27
3719         vncipher        $out3,$out3,v27
3720         vncipher        $out4,$out4,v27
3721          vxor           $in2,$twk2,v31
3722
3723         addi            $key_,$sp,$FRAME+15     # rewind $key_
3724         vncipher        $out0,$out0,v28
3725         vncipher        $out1,$out1,v28
3726         vncipher        $out2,$out2,v28
3727         vncipher        $out3,$out3,v28
3728         vncipher        $out4,$out4,v28
3729         lvx             v24,$x00,$key_          # re-pre-load round[1]
3730          vxor           $in3,$twk3,v31
3731
3732         vncipher        $out0,$out0,v29
3733         le?vperm        $in0,$in0,$in0,$leperm
3734         vncipher        $out1,$out1,v29
3735         vncipher        $out2,$out2,v29
3736         vncipher        $out3,$out3,v29
3737         vncipher        $out4,$out4,v29
3738         lvx             v25,$x10,$key_          # re-pre-load round[2]
3739          vxor           $in4,$twk4,v31
3740
3741         vncipher        $out0,$out0,v30
3742         vncipher        $out1,$out1,v30
3743         vncipher        $out2,$out2,v30
3744         vncipher        $out3,$out3,v30
3745         vncipher        $out4,$out4,v30
3746
3747         vncipherlast    $out0,$out0,$twk0
3748         vncipherlast    $out1,$out1,$in1
3749         vncipherlast    $out2,$out2,$in2
3750         vncipherlast    $out3,$out3,$in3
3751         vncipherlast    $out4,$out4,$in4
3752         mtctr           $rounds
3753         blr
3754         .long           0
3755         .byte           0,12,0x14,0,0,0,0,0
3756 ___
3757 }}      }}}
3758
3759 my $consts=1;
3760 foreach(split("\n",$code)) {
3761         s/\`([^\`]*)\`/eval($1)/geo;
3762
3763         # constants table endian-specific conversion
3764         if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
3765             my $conv=$3;
3766             my @bytes=();
3767
3768             # convert to endian-agnostic format
3769             if ($1 eq "long") {
3770               foreach (split(/,\s*/,$2)) {
3771                 my $l = /^0/?oct:int;
3772                 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
3773               }
3774             } else {
3775                 @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
3776             }
3777
3778             # little-endian conversion
3779             if ($flavour =~ /le$/o) {
3780                 SWITCH: for($conv)  {
3781                     /\?inv/ && do   { @bytes=map($_^0xf,@bytes); last; };
3782                     /\?rev/ && do   { @bytes=reverse(@bytes);    last; };
3783                 }
3784             }
3785
3786             #emit
3787             print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
3788             next;
3789         }
3790         $consts=0 if (m/Lconsts:/o);    # end of table
3791
3792         # instructions prefixed with '?' are endian-specific and need
3793         # to be adjusted accordingly...
3794         if ($flavour =~ /le$/o) {       # little-endian
3795             s/le\?//o           or
3796             s/be\?/#be#/o       or
3797             s/\?lvsr/lvsl/o     or
3798             s/\?lvsl/lvsr/o     or
3799             s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
3800             s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
3801             s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
3802         } else {                        # big-endian
3803             s/le\?/#le#/o       or
3804             s/be\?//o           or
3805             s/\?([a-z]+)/$1/o;
3806         }
3807
3808         print $_,"\n";
3809 }
3810
3811 close STDOUT or die "error closing STDOUT: $!";