488b133250c677edd34a98d3fd15ae9c8e5ae6ad
[openssl.git] / crypto / aes / asm / aesp8-ppc.pl
1 #! /usr/bin/env perl
2 # Copyright 2014-2018 The OpenSSL Project Authors. All Rights Reserved.
3 #
4 # Licensed under the OpenSSL license (the "License").  You may not use
5 # this file except in compliance with the License.  You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
8
9 #
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
16 #
17 # This module implements support for AES instructions as per PowerISA
18 # specification version 2.07, first implemented by POWER8 processor.
19 # The module is endian-agnostic in sense that it supports both big-
20 # and little-endian cases. Data alignment in parallelizable modes is
21 # handled with VSX loads and stores, which implies MSR.VSX flag being
22 # set. It should also be noted that ISA specification doesn't prohibit
23 # alignment exceptions for these instructions on page boundaries.
24 # Initially alignment was handled in pure AltiVec/VMX way [when data
25 # is aligned programmatically, which in turn guarantees exception-
26 # free execution], but it turned to hamper performance when vcipher
27 # instructions are interleaved. It's reckoned that eventual
28 # misalignment penalties at page boundaries are in average lower
29 # than additional overhead in pure AltiVec approach.
30 #
31 # May 2016
32 #
33 # Add XTS subroutine, 9x on little- and 12x improvement on big-endian
34 # systems were measured.
35 #
36 ######################################################################
37 # Current large-block performance in cycles per byte processed with
38 # 128-bit key (less is better).
39 #
40 #               CBC en-/decrypt CTR     XTS
41 # POWER8[le]    3.96/0.72       0.74    1.1
42 # POWER8[be]    3.75/0.65       0.66    1.0
43 # POWER9[le]    4.02/0.86       0.84    1.05
44 # POWER9[be]    3.99/0.78       0.79    0.97
45
46 $flavour = shift;
47
48 if ($flavour =~ /64/) {
49         $SIZE_T =8;
50         $LRSAVE =2*$SIZE_T;
51         $STU    ="stdu";
52         $POP    ="ld";
53         $PUSH   ="std";
54         $UCMP   ="cmpld";
55         $SHL    ="sldi";
56 } elsif ($flavour =~ /32/) {
57         $SIZE_T =4;
58         $LRSAVE =$SIZE_T;
59         $STU    ="stwu";
60         $POP    ="lwz";
61         $PUSH   ="stw";
62         $UCMP   ="cmplw";
63         $SHL    ="slwi";
64 } else { die "nonsense $flavour"; }
65
66 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
67
68 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
69 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
70 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
71 die "can't locate ppc-xlate.pl";
72
73 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
74
75 $FRAME=8*$SIZE_T;
76 $prefix="aes_p8";
77
78 $sp="r1";
79 $vrsave="r12";
80
81 #########################################################################
82 {{{     # Key setup procedures                                          #
83 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
84 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
85 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
86
87 $code.=<<___;
88 .machine        "any"
89
90 .text
91
92 .align  7
93 rcon:
94 .long   0x01000000, 0x01000000, 0x01000000, 0x01000000  ?rev
95 .long   0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000  ?rev
96 .long   0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c  ?rev
97 .long   0,0,0,0                                         ?asis
98 Lconsts:
99         mflr    r0
100         bcl     20,31,\$+4
101         mflr    $ptr     #vvvvv "distance between . and rcon
102         addi    $ptr,$ptr,-0x48
103         mtlr    r0
104         blr
105         .long   0
106         .byte   0,12,0x14,0,0,0,0,0
107 .asciz  "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
108
109 .globl  .${prefix}_set_encrypt_key
110 .align  5
111 .${prefix}_set_encrypt_key:
112 Lset_encrypt_key:
113         mflr            r11
114         $PUSH           r11,$LRSAVE($sp)
115
116         li              $ptr,-1
117         ${UCMP}i        $inp,0
118         beq-            Lenc_key_abort          # if ($inp==0) return -1;
119         ${UCMP}i        $out,0
120         beq-            Lenc_key_abort          # if ($out==0) return -1;
121         li              $ptr,-2
122         cmpwi           $bits,128
123         blt-            Lenc_key_abort
124         cmpwi           $bits,256
125         bgt-            Lenc_key_abort
126         andi.           r0,$bits,0x3f
127         bne-            Lenc_key_abort
128
129         lis             r0,0xfff0
130         mfspr           $vrsave,256
131         mtspr           256,r0
132
133         bl              Lconsts
134         mtlr            r11
135
136         neg             r9,$inp
137         lvx             $in0,0,$inp
138         addi            $inp,$inp,15            # 15 is not typo
139         lvsr            $key,0,r9               # borrow $key
140         li              r8,0x20
141         cmpwi           $bits,192
142         lvx             $in1,0,$inp
143         le?vspltisb     $mask,0x0f              # borrow $mask
144         lvx             $rcon,0,$ptr
145         le?vxor         $key,$key,$mask         # adjust for byte swap
146         lvx             $mask,r8,$ptr
147         addi            $ptr,$ptr,0x10
148         vperm           $in0,$in0,$in1,$key     # align [and byte swap in LE]
149         li              $cnt,8
150         vxor            $zero,$zero,$zero
151         mtctr           $cnt
152
153         ?lvsr           $outperm,0,$out
154         vspltisb        $outmask,-1
155         lvx             $outhead,0,$out
156         ?vperm          $outmask,$zero,$outmask,$outperm
157
158         blt             Loop128
159         addi            $inp,$inp,8
160         beq             L192
161         addi            $inp,$inp,8
162         b               L256
163
164 .align  4
165 Loop128:
166         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
167         vsldoi          $tmp,$zero,$in0,12      # >>32
168          vperm          $outtail,$in0,$in0,$outperm     # rotate
169          vsel           $stage,$outhead,$outtail,$outmask
170          vmr            $outhead,$outtail
171         vcipherlast     $key,$key,$rcon
172          stvx           $stage,0,$out
173          addi           $out,$out,16
174
175         vxor            $in0,$in0,$tmp
176         vsldoi          $tmp,$zero,$tmp,12      # >>32
177         vxor            $in0,$in0,$tmp
178         vsldoi          $tmp,$zero,$tmp,12      # >>32
179         vxor            $in0,$in0,$tmp
180          vadduwm        $rcon,$rcon,$rcon
181         vxor            $in0,$in0,$key
182         bdnz            Loop128
183
184         lvx             $rcon,0,$ptr            # last two round keys
185
186         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
187         vsldoi          $tmp,$zero,$in0,12      # >>32
188          vperm          $outtail,$in0,$in0,$outperm     # rotate
189          vsel           $stage,$outhead,$outtail,$outmask
190          vmr            $outhead,$outtail
191         vcipherlast     $key,$key,$rcon
192          stvx           $stage,0,$out
193          addi           $out,$out,16
194
195         vxor            $in0,$in0,$tmp
196         vsldoi          $tmp,$zero,$tmp,12      # >>32
197         vxor            $in0,$in0,$tmp
198         vsldoi          $tmp,$zero,$tmp,12      # >>32
199         vxor            $in0,$in0,$tmp
200          vadduwm        $rcon,$rcon,$rcon
201         vxor            $in0,$in0,$key
202
203         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
204         vsldoi          $tmp,$zero,$in0,12      # >>32
205          vperm          $outtail,$in0,$in0,$outperm     # rotate
206          vsel           $stage,$outhead,$outtail,$outmask
207          vmr            $outhead,$outtail
208         vcipherlast     $key,$key,$rcon
209          stvx           $stage,0,$out
210          addi           $out,$out,16
211
212         vxor            $in0,$in0,$tmp
213         vsldoi          $tmp,$zero,$tmp,12      # >>32
214         vxor            $in0,$in0,$tmp
215         vsldoi          $tmp,$zero,$tmp,12      # >>32
216         vxor            $in0,$in0,$tmp
217         vxor            $in0,$in0,$key
218          vperm          $outtail,$in0,$in0,$outperm     # rotate
219          vsel           $stage,$outhead,$outtail,$outmask
220          vmr            $outhead,$outtail
221          stvx           $stage,0,$out
222
223         addi            $inp,$out,15            # 15 is not typo
224         addi            $out,$out,0x50
225
226         li              $rounds,10
227         b               Ldone
228
229 .align  4
230 L192:
231         lvx             $tmp,0,$inp
232         li              $cnt,4
233          vperm          $outtail,$in0,$in0,$outperm     # rotate
234          vsel           $stage,$outhead,$outtail,$outmask
235          vmr            $outhead,$outtail
236          stvx           $stage,0,$out
237          addi           $out,$out,16
238         vperm           $in1,$in1,$tmp,$key     # align [and byte swap in LE]
239         vspltisb        $key,8                  # borrow $key
240         mtctr           $cnt
241         vsububm         $mask,$mask,$key        # adjust the mask
242
243 Loop192:
244         vperm           $key,$in1,$in1,$mask    # roate-n-splat
245         vsldoi          $tmp,$zero,$in0,12      # >>32
246         vcipherlast     $key,$key,$rcon
247
248         vxor            $in0,$in0,$tmp
249         vsldoi          $tmp,$zero,$tmp,12      # >>32
250         vxor            $in0,$in0,$tmp
251         vsldoi          $tmp,$zero,$tmp,12      # >>32
252         vxor            $in0,$in0,$tmp
253
254          vsldoi         $stage,$zero,$in1,8
255         vspltw          $tmp,$in0,3
256         vxor            $tmp,$tmp,$in1
257         vsldoi          $in1,$zero,$in1,12      # >>32
258          vadduwm        $rcon,$rcon,$rcon
259         vxor            $in1,$in1,$tmp
260         vxor            $in0,$in0,$key
261         vxor            $in1,$in1,$key
262          vsldoi         $stage,$stage,$in0,8
263
264         vperm           $key,$in1,$in1,$mask    # rotate-n-splat
265         vsldoi          $tmp,$zero,$in0,12      # >>32
266          vperm          $outtail,$stage,$stage,$outperm # rotate
267          vsel           $stage,$outhead,$outtail,$outmask
268          vmr            $outhead,$outtail
269         vcipherlast     $key,$key,$rcon
270          stvx           $stage,0,$out
271          addi           $out,$out,16
272
273          vsldoi         $stage,$in0,$in1,8
274         vxor            $in0,$in0,$tmp
275         vsldoi          $tmp,$zero,$tmp,12      # >>32
276          vperm          $outtail,$stage,$stage,$outperm # rotate
277          vsel           $stage,$outhead,$outtail,$outmask
278          vmr            $outhead,$outtail
279         vxor            $in0,$in0,$tmp
280         vsldoi          $tmp,$zero,$tmp,12      # >>32
281         vxor            $in0,$in0,$tmp
282          stvx           $stage,0,$out
283          addi           $out,$out,16
284
285         vspltw          $tmp,$in0,3
286         vxor            $tmp,$tmp,$in1
287         vsldoi          $in1,$zero,$in1,12      # >>32
288          vadduwm        $rcon,$rcon,$rcon
289         vxor            $in1,$in1,$tmp
290         vxor            $in0,$in0,$key
291         vxor            $in1,$in1,$key
292          vperm          $outtail,$in0,$in0,$outperm     # rotate
293          vsel           $stage,$outhead,$outtail,$outmask
294          vmr            $outhead,$outtail
295          stvx           $stage,0,$out
296          addi           $inp,$out,15            # 15 is not typo
297          addi           $out,$out,16
298         bdnz            Loop192
299
300         li              $rounds,12
301         addi            $out,$out,0x20
302         b               Ldone
303
304 .align  4
305 L256:
306         lvx             $tmp,0,$inp
307         li              $cnt,7
308         li              $rounds,14
309          vperm          $outtail,$in0,$in0,$outperm     # rotate
310          vsel           $stage,$outhead,$outtail,$outmask
311          vmr            $outhead,$outtail
312          stvx           $stage,0,$out
313          addi           $out,$out,16
314         vperm           $in1,$in1,$tmp,$key     # align [and byte swap in LE]
315         mtctr           $cnt
316
317 Loop256:
318         vperm           $key,$in1,$in1,$mask    # rotate-n-splat
319         vsldoi          $tmp,$zero,$in0,12      # >>32
320          vperm          $outtail,$in1,$in1,$outperm     # rotate
321          vsel           $stage,$outhead,$outtail,$outmask
322          vmr            $outhead,$outtail
323         vcipherlast     $key,$key,$rcon
324          stvx           $stage,0,$out
325          addi           $out,$out,16
326
327         vxor            $in0,$in0,$tmp
328         vsldoi          $tmp,$zero,$tmp,12      # >>32
329         vxor            $in0,$in0,$tmp
330         vsldoi          $tmp,$zero,$tmp,12      # >>32
331         vxor            $in0,$in0,$tmp
332          vadduwm        $rcon,$rcon,$rcon
333         vxor            $in0,$in0,$key
334          vperm          $outtail,$in0,$in0,$outperm     # rotate
335          vsel           $stage,$outhead,$outtail,$outmask
336          vmr            $outhead,$outtail
337          stvx           $stage,0,$out
338          addi           $inp,$out,15            # 15 is not typo
339          addi           $out,$out,16
340         bdz             Ldone
341
342         vspltw          $key,$in0,3             # just splat
343         vsldoi          $tmp,$zero,$in1,12      # >>32
344         vsbox           $key,$key
345
346         vxor            $in1,$in1,$tmp
347         vsldoi          $tmp,$zero,$tmp,12      # >>32
348         vxor            $in1,$in1,$tmp
349         vsldoi          $tmp,$zero,$tmp,12      # >>32
350         vxor            $in1,$in1,$tmp
351
352         vxor            $in1,$in1,$key
353         b               Loop256
354
355 .align  4
356 Ldone:
357         lvx             $in1,0,$inp             # redundant in aligned case
358         vsel            $in1,$outhead,$in1,$outmask
359         stvx            $in1,0,$inp
360         li              $ptr,0
361         mtspr           256,$vrsave
362         stw             $rounds,0($out)
363
364 Lenc_key_abort:
365         mr              r3,$ptr
366         blr
367         .long           0
368         .byte           0,12,0x14,1,0,0,3,0
369         .long           0
370 .size   .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
371
372 .globl  .${prefix}_set_decrypt_key
373 .align  5
374 .${prefix}_set_decrypt_key:
375         $STU            $sp,-$FRAME($sp)
376         mflr            r10
377         $PUSH           r10,$FRAME+$LRSAVE($sp)
378         bl              Lset_encrypt_key
379         mtlr            r10
380
381         cmpwi           r3,0
382         bne-            Ldec_key_abort
383
384         slwi            $cnt,$rounds,4
385         subi            $inp,$out,240           # first round key
386         srwi            $rounds,$rounds,1
387         add             $out,$inp,$cnt          # last round key
388         mtctr           $rounds
389
390 Ldeckey:
391         lwz             r0, 0($inp)
392         lwz             r6, 4($inp)
393         lwz             r7, 8($inp)
394         lwz             r8, 12($inp)
395         addi            $inp,$inp,16
396         lwz             r9, 0($out)
397         lwz             r10,4($out)
398         lwz             r11,8($out)
399         lwz             r12,12($out)
400         stw             r0, 0($out)
401         stw             r6, 4($out)
402         stw             r7, 8($out)
403         stw             r8, 12($out)
404         subi            $out,$out,16
405         stw             r9, -16($inp)
406         stw             r10,-12($inp)
407         stw             r11,-8($inp)
408         stw             r12,-4($inp)
409         bdnz            Ldeckey
410
411         xor             r3,r3,r3                # return value
412 Ldec_key_abort:
413         addi            $sp,$sp,$FRAME
414         blr
415         .long           0
416         .byte           0,12,4,1,0x80,0,3,0
417         .long           0
418 .size   .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
419 ___
420 }}}
421 #########################################################################
422 {{{     # Single block en- and decrypt procedures                       #
423 sub gen_block () {
424 my $dir = shift;
425 my $n   = $dir eq "de" ? "n" : "";
426 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
427
428 $code.=<<___;
429 .globl  .${prefix}_${dir}crypt
430 .align  5
431 .${prefix}_${dir}crypt:
432         lwz             $rounds,240($key)
433         lis             r0,0xfc00
434         mfspr           $vrsave,256
435         li              $idx,15                 # 15 is not typo
436         mtspr           256,r0
437
438         lvx             v0,0,$inp
439         neg             r11,$out
440         lvx             v1,$idx,$inp
441         lvsl            v2,0,$inp               # inpperm
442         le?vspltisb     v4,0x0f
443         ?lvsl           v3,0,r11                # outperm
444         le?vxor         v2,v2,v4
445         li              $idx,16
446         vperm           v0,v0,v1,v2             # align [and byte swap in LE]
447         lvx             v1,0,$key
448         ?lvsl           v5,0,$key               # keyperm
449         srwi            $rounds,$rounds,1
450         lvx             v2,$idx,$key
451         addi            $idx,$idx,16
452         subi            $rounds,$rounds,1
453         ?vperm          v1,v1,v2,v5             # align round key
454
455         vxor            v0,v0,v1
456         lvx             v1,$idx,$key
457         addi            $idx,$idx,16
458         mtctr           $rounds
459
460 Loop_${dir}c:
461         ?vperm          v2,v2,v1,v5
462         v${n}cipher     v0,v0,v2
463         lvx             v2,$idx,$key
464         addi            $idx,$idx,16
465         ?vperm          v1,v1,v2,v5
466         v${n}cipher     v0,v0,v1
467         lvx             v1,$idx,$key
468         addi            $idx,$idx,16
469         bdnz            Loop_${dir}c
470
471         ?vperm          v2,v2,v1,v5
472         v${n}cipher     v0,v0,v2
473         lvx             v2,$idx,$key
474         ?vperm          v1,v1,v2,v5
475         v${n}cipherlast v0,v0,v1
476
477         vspltisb        v2,-1
478         vxor            v1,v1,v1
479         li              $idx,15                 # 15 is not typo
480         ?vperm          v2,v1,v2,v3             # outmask
481         le?vxor         v3,v3,v4
482         lvx             v1,0,$out               # outhead
483         vperm           v0,v0,v0,v3             # rotate [and byte swap in LE]
484         vsel            v1,v1,v0,v2
485         lvx             v4,$idx,$out
486         stvx            v1,0,$out
487         vsel            v0,v0,v4,v2
488         stvx            v0,$idx,$out
489
490         mtspr           256,$vrsave
491         blr
492         .long           0
493         .byte           0,12,0x14,0,0,0,3,0
494         .long           0
495 .size   .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
496 ___
497 }
498 &gen_block("en");
499 &gen_block("de");
500 }}}
501 #########################################################################
502 {{{     # CBC en- and decrypt procedures                                #
503 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
504 my ($rndkey0,$rndkey1,$inout,$tmp)=             map("v$_",(0..3));
505 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
506                                                 map("v$_",(4..10));
507 $code.=<<___;
508 .globl  .${prefix}_cbc_encrypt
509 .align  5
510 .${prefix}_cbc_encrypt:
511         ${UCMP}i        $len,16
512         bltlr-
513
514         cmpwi           $enc,0                  # test direction
515         lis             r0,0xffe0
516         mfspr           $vrsave,256
517         mtspr           256,r0
518
519         li              $idx,15
520         vxor            $rndkey0,$rndkey0,$rndkey0
521         le?vspltisb     $tmp,0x0f
522
523         lvx             $ivec,0,$ivp            # load [unaligned] iv
524         lvsl            $inpperm,0,$ivp
525         lvx             $inptail,$idx,$ivp
526         le?vxor         $inpperm,$inpperm,$tmp
527         vperm           $ivec,$ivec,$inptail,$inpperm
528
529         neg             r11,$inp
530         ?lvsl           $keyperm,0,$key         # prepare for unaligned key
531         lwz             $rounds,240($key)
532
533         lvsr            $inpperm,0,r11          # prepare for unaligned load
534         lvx             $inptail,0,$inp
535         addi            $inp,$inp,15            # 15 is not typo
536         le?vxor         $inpperm,$inpperm,$tmp
537
538         ?lvsr           $outperm,0,$out         # prepare for unaligned store
539         vspltisb        $outmask,-1
540         lvx             $outhead,0,$out
541         ?vperm          $outmask,$rndkey0,$outmask,$outperm
542         le?vxor         $outperm,$outperm,$tmp
543
544         srwi            $rounds,$rounds,1
545         li              $idx,16
546         subi            $rounds,$rounds,1
547         beq             Lcbc_dec
548
549 Lcbc_enc:
550         vmr             $inout,$inptail
551         lvx             $inptail,0,$inp
552         addi            $inp,$inp,16
553         mtctr           $rounds
554         subi            $len,$len,16            # len-=16
555
556         lvx             $rndkey0,0,$key
557          vperm          $inout,$inout,$inptail,$inpperm
558         lvx             $rndkey1,$idx,$key
559         addi            $idx,$idx,16
560         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
561         vxor            $inout,$inout,$rndkey0
562         lvx             $rndkey0,$idx,$key
563         addi            $idx,$idx,16
564         vxor            $inout,$inout,$ivec
565
566 Loop_cbc_enc:
567         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
568         vcipher         $inout,$inout,$rndkey1
569         lvx             $rndkey1,$idx,$key
570         addi            $idx,$idx,16
571         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
572         vcipher         $inout,$inout,$rndkey0
573         lvx             $rndkey0,$idx,$key
574         addi            $idx,$idx,16
575         bdnz            Loop_cbc_enc
576
577         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
578         vcipher         $inout,$inout,$rndkey1
579         lvx             $rndkey1,$idx,$key
580         li              $idx,16
581         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
582         vcipherlast     $ivec,$inout,$rndkey0
583         ${UCMP}i        $len,16
584
585         vperm           $tmp,$ivec,$ivec,$outperm
586         vsel            $inout,$outhead,$tmp,$outmask
587         vmr             $outhead,$tmp
588         stvx            $inout,0,$out
589         addi            $out,$out,16
590         bge             Lcbc_enc
591
592         b               Lcbc_done
593
594 .align  4
595 Lcbc_dec:
596         ${UCMP}i        $len,128
597         bge             _aesp8_cbc_decrypt8x
598         vmr             $tmp,$inptail
599         lvx             $inptail,0,$inp
600         addi            $inp,$inp,16
601         mtctr           $rounds
602         subi            $len,$len,16            # len-=16
603
604         lvx             $rndkey0,0,$key
605          vperm          $tmp,$tmp,$inptail,$inpperm
606         lvx             $rndkey1,$idx,$key
607         addi            $idx,$idx,16
608         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
609         vxor            $inout,$tmp,$rndkey0
610         lvx             $rndkey0,$idx,$key
611         addi            $idx,$idx,16
612
613 Loop_cbc_dec:
614         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
615         vncipher        $inout,$inout,$rndkey1
616         lvx             $rndkey1,$idx,$key
617         addi            $idx,$idx,16
618         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
619         vncipher        $inout,$inout,$rndkey0
620         lvx             $rndkey0,$idx,$key
621         addi            $idx,$idx,16
622         bdnz            Loop_cbc_dec
623
624         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
625         vncipher        $inout,$inout,$rndkey1
626         lvx             $rndkey1,$idx,$key
627         li              $idx,16
628         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
629         vncipherlast    $inout,$inout,$rndkey0
630         ${UCMP}i        $len,16
631
632         vxor            $inout,$inout,$ivec
633         vmr             $ivec,$tmp
634         vperm           $tmp,$inout,$inout,$outperm
635         vsel            $inout,$outhead,$tmp,$outmask
636         vmr             $outhead,$tmp
637         stvx            $inout,0,$out
638         addi            $out,$out,16
639         bge             Lcbc_dec
640
641 Lcbc_done:
642         addi            $out,$out,-1
643         lvx             $inout,0,$out           # redundant in aligned case
644         vsel            $inout,$outhead,$inout,$outmask
645         stvx            $inout,0,$out
646
647         neg             $enc,$ivp               # write [unaligned] iv
648         li              $idx,15                 # 15 is not typo
649         vxor            $rndkey0,$rndkey0,$rndkey0
650         vspltisb        $outmask,-1
651         le?vspltisb     $tmp,0x0f
652         ?lvsl           $outperm,0,$enc
653         ?vperm          $outmask,$rndkey0,$outmask,$outperm
654         le?vxor         $outperm,$outperm,$tmp
655         lvx             $outhead,0,$ivp
656         vperm           $ivec,$ivec,$ivec,$outperm
657         vsel            $inout,$outhead,$ivec,$outmask
658         lvx             $inptail,$idx,$ivp
659         stvx            $inout,0,$ivp
660         vsel            $inout,$ivec,$inptail,$outmask
661         stvx            $inout,$idx,$ivp
662
663         mtspr           256,$vrsave
664         blr
665         .long           0
666         .byte           0,12,0x14,0,0,0,6,0
667         .long           0
668 ___
669 #########################################################################
670 {{      # Optimized CBC decrypt procedure                               #
671 my $key_="r11";
672 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
673     $x00=0 if ($flavour =~ /osx/);
674 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
675 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
676 my $rndkey0="v23";      # v24-v25 rotating buffer for first found keys
677                         # v26-v31 last 6 round keys
678 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
679
680 $code.=<<___;
681 .align  5
682 _aesp8_cbc_decrypt8x:
683         $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
684         li              r10,`$FRAME+8*16+15`
685         li              r11,`$FRAME+8*16+31`
686         stvx            v20,r10,$sp             # ABI says so
687         addi            r10,r10,32
688         stvx            v21,r11,$sp
689         addi            r11,r11,32
690         stvx            v22,r10,$sp
691         addi            r10,r10,32
692         stvx            v23,r11,$sp
693         addi            r11,r11,32
694         stvx            v24,r10,$sp
695         addi            r10,r10,32
696         stvx            v25,r11,$sp
697         addi            r11,r11,32
698         stvx            v26,r10,$sp
699         addi            r10,r10,32
700         stvx            v27,r11,$sp
701         addi            r11,r11,32
702         stvx            v28,r10,$sp
703         addi            r10,r10,32
704         stvx            v29,r11,$sp
705         addi            r11,r11,32
706         stvx            v30,r10,$sp
707         stvx            v31,r11,$sp
708         li              r0,-1
709         stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
710         li              $x10,0x10
711         $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
712         li              $x20,0x20
713         $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
714         li              $x30,0x30
715         $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
716         li              $x40,0x40
717         $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
718         li              $x50,0x50
719         $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
720         li              $x60,0x60
721         $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
722         li              $x70,0x70
723         mtspr           256,r0
724
725         subi            $rounds,$rounds,3       # -4 in total
726         subi            $len,$len,128           # bias
727
728         lvx             $rndkey0,$x00,$key      # load key schedule
729         lvx             v30,$x10,$key
730         addi            $key,$key,0x20
731         lvx             v31,$x00,$key
732         ?vperm          $rndkey0,$rndkey0,v30,$keyperm
733         addi            $key_,$sp,$FRAME+15
734         mtctr           $rounds
735
736 Load_cbc_dec_key:
737         ?vperm          v24,v30,v31,$keyperm
738         lvx             v30,$x10,$key
739         addi            $key,$key,0x20
740         stvx            v24,$x00,$key_          # off-load round[1]
741         ?vperm          v25,v31,v30,$keyperm
742         lvx             v31,$x00,$key
743         stvx            v25,$x10,$key_          # off-load round[2]
744         addi            $key_,$key_,0x20
745         bdnz            Load_cbc_dec_key
746
747         lvx             v26,$x10,$key
748         ?vperm          v24,v30,v31,$keyperm
749         lvx             v27,$x20,$key
750         stvx            v24,$x00,$key_          # off-load round[3]
751         ?vperm          v25,v31,v26,$keyperm
752         lvx             v28,$x30,$key
753         stvx            v25,$x10,$key_          # off-load round[4]
754         addi            $key_,$sp,$FRAME+15     # rewind $key_
755         ?vperm          v26,v26,v27,$keyperm
756         lvx             v29,$x40,$key
757         ?vperm          v27,v27,v28,$keyperm
758         lvx             v30,$x50,$key
759         ?vperm          v28,v28,v29,$keyperm
760         lvx             v31,$x60,$key
761         ?vperm          v29,v29,v30,$keyperm
762         lvx             $out0,$x70,$key         # borrow $out0
763         ?vperm          v30,v30,v31,$keyperm
764         lvx             v24,$x00,$key_          # pre-load round[1]
765         ?vperm          v31,v31,$out0,$keyperm
766         lvx             v25,$x10,$key_          # pre-load round[2]
767
768         #lvx            $inptail,0,$inp         # "caller" already did this
769         #addi           $inp,$inp,15            # 15 is not typo
770         subi            $inp,$inp,15            # undo "caller"
771
772          le?li          $idx,8
773         lvx_u           $in0,$x00,$inp          # load first 8 "words"
774          le?lvsl        $inpperm,0,$idx
775          le?vspltisb    $tmp,0x0f
776         lvx_u           $in1,$x10,$inp
777          le?vxor        $inpperm,$inpperm,$tmp  # transform for lvx_u/stvx_u
778         lvx_u           $in2,$x20,$inp
779          le?vperm       $in0,$in0,$in0,$inpperm
780         lvx_u           $in3,$x30,$inp
781          le?vperm       $in1,$in1,$in1,$inpperm
782         lvx_u           $in4,$x40,$inp
783          le?vperm       $in2,$in2,$in2,$inpperm
784         vxor            $out0,$in0,$rndkey0
785         lvx_u           $in5,$x50,$inp
786          le?vperm       $in3,$in3,$in3,$inpperm
787         vxor            $out1,$in1,$rndkey0
788         lvx_u           $in6,$x60,$inp
789          le?vperm       $in4,$in4,$in4,$inpperm
790         vxor            $out2,$in2,$rndkey0
791         lvx_u           $in7,$x70,$inp
792         addi            $inp,$inp,0x80
793          le?vperm       $in5,$in5,$in5,$inpperm
794         vxor            $out3,$in3,$rndkey0
795          le?vperm       $in6,$in6,$in6,$inpperm
796         vxor            $out4,$in4,$rndkey0
797          le?vperm       $in7,$in7,$in7,$inpperm
798         vxor            $out5,$in5,$rndkey0
799         vxor            $out6,$in6,$rndkey0
800         vxor            $out7,$in7,$rndkey0
801
802         mtctr           $rounds
803         b               Loop_cbc_dec8x
804 .align  5
805 Loop_cbc_dec8x:
806         vncipher        $out0,$out0,v24
807         vncipher        $out1,$out1,v24
808         vncipher        $out2,$out2,v24
809         vncipher        $out3,$out3,v24
810         vncipher        $out4,$out4,v24
811         vncipher        $out5,$out5,v24
812         vncipher        $out6,$out6,v24
813         vncipher        $out7,$out7,v24
814         lvx             v24,$x20,$key_          # round[3]
815         addi            $key_,$key_,0x20
816
817         vncipher        $out0,$out0,v25
818         vncipher        $out1,$out1,v25
819         vncipher        $out2,$out2,v25
820         vncipher        $out3,$out3,v25
821         vncipher        $out4,$out4,v25
822         vncipher        $out5,$out5,v25
823         vncipher        $out6,$out6,v25
824         vncipher        $out7,$out7,v25
825         lvx             v25,$x10,$key_          # round[4]
826         bdnz            Loop_cbc_dec8x
827
828         subic           $len,$len,128           # $len-=128
829         vncipher        $out0,$out0,v24
830         vncipher        $out1,$out1,v24
831         vncipher        $out2,$out2,v24
832         vncipher        $out3,$out3,v24
833         vncipher        $out4,$out4,v24
834         vncipher        $out5,$out5,v24
835         vncipher        $out6,$out6,v24
836         vncipher        $out7,$out7,v24
837
838         subfe.          r0,r0,r0                # borrow?-1:0
839         vncipher        $out0,$out0,v25
840         vncipher        $out1,$out1,v25
841         vncipher        $out2,$out2,v25
842         vncipher        $out3,$out3,v25
843         vncipher        $out4,$out4,v25
844         vncipher        $out5,$out5,v25
845         vncipher        $out6,$out6,v25
846         vncipher        $out7,$out7,v25
847
848         and             r0,r0,$len
849         vncipher        $out0,$out0,v26
850         vncipher        $out1,$out1,v26
851         vncipher        $out2,$out2,v26
852         vncipher        $out3,$out3,v26
853         vncipher        $out4,$out4,v26
854         vncipher        $out5,$out5,v26
855         vncipher        $out6,$out6,v26
856         vncipher        $out7,$out7,v26
857
858         add             $inp,$inp,r0            # $inp is adjusted in such
859                                                 # way that at exit from the
860                                                 # loop inX-in7 are loaded
861                                                 # with last "words"
862         vncipher        $out0,$out0,v27
863         vncipher        $out1,$out1,v27
864         vncipher        $out2,$out2,v27
865         vncipher        $out3,$out3,v27
866         vncipher        $out4,$out4,v27
867         vncipher        $out5,$out5,v27
868         vncipher        $out6,$out6,v27
869         vncipher        $out7,$out7,v27
870
871         addi            $key_,$sp,$FRAME+15     # rewind $key_
872         vncipher        $out0,$out0,v28
873         vncipher        $out1,$out1,v28
874         vncipher        $out2,$out2,v28
875         vncipher        $out3,$out3,v28
876         vncipher        $out4,$out4,v28
877         vncipher        $out5,$out5,v28
878         vncipher        $out6,$out6,v28
879         vncipher        $out7,$out7,v28
880         lvx             v24,$x00,$key_          # re-pre-load round[1]
881
882         vncipher        $out0,$out0,v29
883         vncipher        $out1,$out1,v29
884         vncipher        $out2,$out2,v29
885         vncipher        $out3,$out3,v29
886         vncipher        $out4,$out4,v29
887         vncipher        $out5,$out5,v29
888         vncipher        $out6,$out6,v29
889         vncipher        $out7,$out7,v29
890         lvx             v25,$x10,$key_          # re-pre-load round[2]
891
892         vncipher        $out0,$out0,v30
893          vxor           $ivec,$ivec,v31         # xor with last round key
894         vncipher        $out1,$out1,v30
895          vxor           $in0,$in0,v31
896         vncipher        $out2,$out2,v30
897          vxor           $in1,$in1,v31
898         vncipher        $out3,$out3,v30
899          vxor           $in2,$in2,v31
900         vncipher        $out4,$out4,v30
901          vxor           $in3,$in3,v31
902         vncipher        $out5,$out5,v30
903          vxor           $in4,$in4,v31
904         vncipher        $out6,$out6,v30
905          vxor           $in5,$in5,v31
906         vncipher        $out7,$out7,v30
907          vxor           $in6,$in6,v31
908
909         vncipherlast    $out0,$out0,$ivec
910         vncipherlast    $out1,$out1,$in0
911          lvx_u          $in0,$x00,$inp          # load next input block
912         vncipherlast    $out2,$out2,$in1
913          lvx_u          $in1,$x10,$inp
914         vncipherlast    $out3,$out3,$in2
915          le?vperm       $in0,$in0,$in0,$inpperm
916          lvx_u          $in2,$x20,$inp
917         vncipherlast    $out4,$out4,$in3
918          le?vperm       $in1,$in1,$in1,$inpperm
919          lvx_u          $in3,$x30,$inp
920         vncipherlast    $out5,$out5,$in4
921          le?vperm       $in2,$in2,$in2,$inpperm
922          lvx_u          $in4,$x40,$inp
923         vncipherlast    $out6,$out6,$in5
924          le?vperm       $in3,$in3,$in3,$inpperm
925          lvx_u          $in5,$x50,$inp
926         vncipherlast    $out7,$out7,$in6
927          le?vperm       $in4,$in4,$in4,$inpperm
928          lvx_u          $in6,$x60,$inp
929         vmr             $ivec,$in7
930          le?vperm       $in5,$in5,$in5,$inpperm
931          lvx_u          $in7,$x70,$inp
932          addi           $inp,$inp,0x80
933
934         le?vperm        $out0,$out0,$out0,$inpperm
935         le?vperm        $out1,$out1,$out1,$inpperm
936         stvx_u          $out0,$x00,$out
937          le?vperm       $in6,$in6,$in6,$inpperm
938          vxor           $out0,$in0,$rndkey0
939         le?vperm        $out2,$out2,$out2,$inpperm
940         stvx_u          $out1,$x10,$out
941          le?vperm       $in7,$in7,$in7,$inpperm
942          vxor           $out1,$in1,$rndkey0
943         le?vperm        $out3,$out3,$out3,$inpperm
944         stvx_u          $out2,$x20,$out
945          vxor           $out2,$in2,$rndkey0
946         le?vperm        $out4,$out4,$out4,$inpperm
947         stvx_u          $out3,$x30,$out
948          vxor           $out3,$in3,$rndkey0
949         le?vperm        $out5,$out5,$out5,$inpperm
950         stvx_u          $out4,$x40,$out
951          vxor           $out4,$in4,$rndkey0
952         le?vperm        $out6,$out6,$out6,$inpperm
953         stvx_u          $out5,$x50,$out
954          vxor           $out5,$in5,$rndkey0
955         le?vperm        $out7,$out7,$out7,$inpperm
956         stvx_u          $out6,$x60,$out
957          vxor           $out6,$in6,$rndkey0
958         stvx_u          $out7,$x70,$out
959         addi            $out,$out,0x80
960          vxor           $out7,$in7,$rndkey0
961
962         mtctr           $rounds
963         beq             Loop_cbc_dec8x          # did $len-=128 borrow?
964
965         addic.          $len,$len,128
966         beq             Lcbc_dec8x_done
967         nop
968         nop
969
970 Loop_cbc_dec8x_tail:                            # up to 7 "words" tail...
971         vncipher        $out1,$out1,v24
972         vncipher        $out2,$out2,v24
973         vncipher        $out3,$out3,v24
974         vncipher        $out4,$out4,v24
975         vncipher        $out5,$out5,v24
976         vncipher        $out6,$out6,v24
977         vncipher        $out7,$out7,v24
978         lvx             v24,$x20,$key_          # round[3]
979         addi            $key_,$key_,0x20
980
981         vncipher        $out1,$out1,v25
982         vncipher        $out2,$out2,v25
983         vncipher        $out3,$out3,v25
984         vncipher        $out4,$out4,v25
985         vncipher        $out5,$out5,v25
986         vncipher        $out6,$out6,v25
987         vncipher        $out7,$out7,v25
988         lvx             v25,$x10,$key_          # round[4]
989         bdnz            Loop_cbc_dec8x_tail
990
991         vncipher        $out1,$out1,v24
992         vncipher        $out2,$out2,v24
993         vncipher        $out3,$out3,v24
994         vncipher        $out4,$out4,v24
995         vncipher        $out5,$out5,v24
996         vncipher        $out6,$out6,v24
997         vncipher        $out7,$out7,v24
998
999         vncipher        $out1,$out1,v25
1000         vncipher        $out2,$out2,v25
1001         vncipher        $out3,$out3,v25
1002         vncipher        $out4,$out4,v25
1003         vncipher        $out5,$out5,v25
1004         vncipher        $out6,$out6,v25
1005         vncipher        $out7,$out7,v25
1006
1007         vncipher        $out1,$out1,v26
1008         vncipher        $out2,$out2,v26
1009         vncipher        $out3,$out3,v26
1010         vncipher        $out4,$out4,v26
1011         vncipher        $out5,$out5,v26
1012         vncipher        $out6,$out6,v26
1013         vncipher        $out7,$out7,v26
1014
1015         vncipher        $out1,$out1,v27
1016         vncipher        $out2,$out2,v27
1017         vncipher        $out3,$out3,v27
1018         vncipher        $out4,$out4,v27
1019         vncipher        $out5,$out5,v27
1020         vncipher        $out6,$out6,v27
1021         vncipher        $out7,$out7,v27
1022
1023         vncipher        $out1,$out1,v28
1024         vncipher        $out2,$out2,v28
1025         vncipher        $out3,$out3,v28
1026         vncipher        $out4,$out4,v28
1027         vncipher        $out5,$out5,v28
1028         vncipher        $out6,$out6,v28
1029         vncipher        $out7,$out7,v28
1030
1031         vncipher        $out1,$out1,v29
1032         vncipher        $out2,$out2,v29
1033         vncipher        $out3,$out3,v29
1034         vncipher        $out4,$out4,v29
1035         vncipher        $out5,$out5,v29
1036         vncipher        $out6,$out6,v29
1037         vncipher        $out7,$out7,v29
1038
1039         vncipher        $out1,$out1,v30
1040          vxor           $ivec,$ivec,v31         # last round key
1041         vncipher        $out2,$out2,v30
1042          vxor           $in1,$in1,v31
1043         vncipher        $out3,$out3,v30
1044          vxor           $in2,$in2,v31
1045         vncipher        $out4,$out4,v30
1046          vxor           $in3,$in3,v31
1047         vncipher        $out5,$out5,v30
1048          vxor           $in4,$in4,v31
1049         vncipher        $out6,$out6,v30
1050          vxor           $in5,$in5,v31
1051         vncipher        $out7,$out7,v30
1052          vxor           $in6,$in6,v31
1053
1054         cmplwi          $len,32                 # switch($len)
1055         blt             Lcbc_dec8x_one
1056         nop
1057         beq             Lcbc_dec8x_two
1058         cmplwi          $len,64
1059         blt             Lcbc_dec8x_three
1060         nop
1061         beq             Lcbc_dec8x_four
1062         cmplwi          $len,96
1063         blt             Lcbc_dec8x_five
1064         nop
1065         beq             Lcbc_dec8x_six
1066
1067 Lcbc_dec8x_seven:
1068         vncipherlast    $out1,$out1,$ivec
1069         vncipherlast    $out2,$out2,$in1
1070         vncipherlast    $out3,$out3,$in2
1071         vncipherlast    $out4,$out4,$in3
1072         vncipherlast    $out5,$out5,$in4
1073         vncipherlast    $out6,$out6,$in5
1074         vncipherlast    $out7,$out7,$in6
1075         vmr             $ivec,$in7
1076
1077         le?vperm        $out1,$out1,$out1,$inpperm
1078         le?vperm        $out2,$out2,$out2,$inpperm
1079         stvx_u          $out1,$x00,$out
1080         le?vperm        $out3,$out3,$out3,$inpperm
1081         stvx_u          $out2,$x10,$out
1082         le?vperm        $out4,$out4,$out4,$inpperm
1083         stvx_u          $out3,$x20,$out
1084         le?vperm        $out5,$out5,$out5,$inpperm
1085         stvx_u          $out4,$x30,$out
1086         le?vperm        $out6,$out6,$out6,$inpperm
1087         stvx_u          $out5,$x40,$out
1088         le?vperm        $out7,$out7,$out7,$inpperm
1089         stvx_u          $out6,$x50,$out
1090         stvx_u          $out7,$x60,$out
1091         addi            $out,$out,0x70
1092         b               Lcbc_dec8x_done
1093
1094 .align  5
1095 Lcbc_dec8x_six:
1096         vncipherlast    $out2,$out2,$ivec
1097         vncipherlast    $out3,$out3,$in2
1098         vncipherlast    $out4,$out4,$in3
1099         vncipherlast    $out5,$out5,$in4
1100         vncipherlast    $out6,$out6,$in5
1101         vncipherlast    $out7,$out7,$in6
1102         vmr             $ivec,$in7
1103
1104         le?vperm        $out2,$out2,$out2,$inpperm
1105         le?vperm        $out3,$out3,$out3,$inpperm
1106         stvx_u          $out2,$x00,$out
1107         le?vperm        $out4,$out4,$out4,$inpperm
1108         stvx_u          $out3,$x10,$out
1109         le?vperm        $out5,$out5,$out5,$inpperm
1110         stvx_u          $out4,$x20,$out
1111         le?vperm        $out6,$out6,$out6,$inpperm
1112         stvx_u          $out5,$x30,$out
1113         le?vperm        $out7,$out7,$out7,$inpperm
1114         stvx_u          $out6,$x40,$out
1115         stvx_u          $out7,$x50,$out
1116         addi            $out,$out,0x60
1117         b               Lcbc_dec8x_done
1118
1119 .align  5
1120 Lcbc_dec8x_five:
1121         vncipherlast    $out3,$out3,$ivec
1122         vncipherlast    $out4,$out4,$in3
1123         vncipherlast    $out5,$out5,$in4
1124         vncipherlast    $out6,$out6,$in5
1125         vncipherlast    $out7,$out7,$in6
1126         vmr             $ivec,$in7
1127
1128         le?vperm        $out3,$out3,$out3,$inpperm
1129         le?vperm        $out4,$out4,$out4,$inpperm
1130         stvx_u          $out3,$x00,$out
1131         le?vperm        $out5,$out5,$out5,$inpperm
1132         stvx_u          $out4,$x10,$out
1133         le?vperm        $out6,$out6,$out6,$inpperm
1134         stvx_u          $out5,$x20,$out
1135         le?vperm        $out7,$out7,$out7,$inpperm
1136         stvx_u          $out6,$x30,$out
1137         stvx_u          $out7,$x40,$out
1138         addi            $out,$out,0x50
1139         b               Lcbc_dec8x_done
1140
1141 .align  5
1142 Lcbc_dec8x_four:
1143         vncipherlast    $out4,$out4,$ivec
1144         vncipherlast    $out5,$out5,$in4
1145         vncipherlast    $out6,$out6,$in5
1146         vncipherlast    $out7,$out7,$in6
1147         vmr             $ivec,$in7
1148
1149         le?vperm        $out4,$out4,$out4,$inpperm
1150         le?vperm        $out5,$out5,$out5,$inpperm
1151         stvx_u          $out4,$x00,$out
1152         le?vperm        $out6,$out6,$out6,$inpperm
1153         stvx_u          $out5,$x10,$out
1154         le?vperm        $out7,$out7,$out7,$inpperm
1155         stvx_u          $out6,$x20,$out
1156         stvx_u          $out7,$x30,$out
1157         addi            $out,$out,0x40
1158         b               Lcbc_dec8x_done
1159
1160 .align  5
1161 Lcbc_dec8x_three:
1162         vncipherlast    $out5,$out5,$ivec
1163         vncipherlast    $out6,$out6,$in5
1164         vncipherlast    $out7,$out7,$in6
1165         vmr             $ivec,$in7
1166
1167         le?vperm        $out5,$out5,$out5,$inpperm
1168         le?vperm        $out6,$out6,$out6,$inpperm
1169         stvx_u          $out5,$x00,$out
1170         le?vperm        $out7,$out7,$out7,$inpperm
1171         stvx_u          $out6,$x10,$out
1172         stvx_u          $out7,$x20,$out
1173         addi            $out,$out,0x30
1174         b               Lcbc_dec8x_done
1175
1176 .align  5
1177 Lcbc_dec8x_two:
1178         vncipherlast    $out6,$out6,$ivec
1179         vncipherlast    $out7,$out7,$in6
1180         vmr             $ivec,$in7
1181
1182         le?vperm        $out6,$out6,$out6,$inpperm
1183         le?vperm        $out7,$out7,$out7,$inpperm
1184         stvx_u          $out6,$x00,$out
1185         stvx_u          $out7,$x10,$out
1186         addi            $out,$out,0x20
1187         b               Lcbc_dec8x_done
1188
1189 .align  5
1190 Lcbc_dec8x_one:
1191         vncipherlast    $out7,$out7,$ivec
1192         vmr             $ivec,$in7
1193
1194         le?vperm        $out7,$out7,$out7,$inpperm
1195         stvx_u          $out7,0,$out
1196         addi            $out,$out,0x10
1197
1198 Lcbc_dec8x_done:
1199         le?vperm        $ivec,$ivec,$ivec,$inpperm
1200         stvx_u          $ivec,0,$ivp            # write [unaligned] iv
1201
1202         li              r10,`$FRAME+15`
1203         li              r11,`$FRAME+31`
1204         stvx            $inpperm,r10,$sp        # wipe copies of round keys
1205         addi            r10,r10,32
1206         stvx            $inpperm,r11,$sp
1207         addi            r11,r11,32
1208         stvx            $inpperm,r10,$sp
1209         addi            r10,r10,32
1210         stvx            $inpperm,r11,$sp
1211         addi            r11,r11,32
1212         stvx            $inpperm,r10,$sp
1213         addi            r10,r10,32
1214         stvx            $inpperm,r11,$sp
1215         addi            r11,r11,32
1216         stvx            $inpperm,r10,$sp
1217         addi            r10,r10,32
1218         stvx            $inpperm,r11,$sp
1219         addi            r11,r11,32
1220
1221         mtspr           256,$vrsave
1222         lvx             v20,r10,$sp             # ABI says so
1223         addi            r10,r10,32
1224         lvx             v21,r11,$sp
1225         addi            r11,r11,32
1226         lvx             v22,r10,$sp
1227         addi            r10,r10,32
1228         lvx             v23,r11,$sp
1229         addi            r11,r11,32
1230         lvx             v24,r10,$sp
1231         addi            r10,r10,32
1232         lvx             v25,r11,$sp
1233         addi            r11,r11,32
1234         lvx             v26,r10,$sp
1235         addi            r10,r10,32
1236         lvx             v27,r11,$sp
1237         addi            r11,r11,32
1238         lvx             v28,r10,$sp
1239         addi            r10,r10,32
1240         lvx             v29,r11,$sp
1241         addi            r11,r11,32
1242         lvx             v30,r10,$sp
1243         lvx             v31,r11,$sp
1244         $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1245         $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1246         $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1247         $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1248         $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1249         $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1250         addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1251         blr
1252         .long           0
1253         .byte           0,12,0x04,0,0x80,6,6,0
1254         .long           0
1255 .size   .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1256 ___
1257 }}      }}}
1258
1259 #########################################################################
1260 {{{     # CTR procedure[s]                                              #
1261 my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1262 my ($rndkey0,$rndkey1,$inout,$tmp)=             map("v$_",(0..3));
1263 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1264                                                 map("v$_",(4..11));
1265 my $dat=$tmp;
1266
1267 $code.=<<___;
1268 .globl  .${prefix}_ctr32_encrypt_blocks
1269 .align  5
1270 .${prefix}_ctr32_encrypt_blocks:
1271         ${UCMP}i        $len,1
1272         bltlr-
1273
1274         lis             r0,0xfff0
1275         mfspr           $vrsave,256
1276         mtspr           256,r0
1277
1278         li              $idx,15
1279         vxor            $rndkey0,$rndkey0,$rndkey0
1280         le?vspltisb     $tmp,0x0f
1281
1282         lvx             $ivec,0,$ivp            # load [unaligned] iv
1283         lvsl            $inpperm,0,$ivp
1284         lvx             $inptail,$idx,$ivp
1285          vspltisb       $one,1
1286         le?vxor         $inpperm,$inpperm,$tmp
1287         vperm           $ivec,$ivec,$inptail,$inpperm
1288          vsldoi         $one,$rndkey0,$one,1
1289
1290         neg             r11,$inp
1291         ?lvsl           $keyperm,0,$key         # prepare for unaligned key
1292         lwz             $rounds,240($key)
1293
1294         lvsr            $inpperm,0,r11          # prepare for unaligned load
1295         lvx             $inptail,0,$inp
1296         addi            $inp,$inp,15            # 15 is not typo
1297         le?vxor         $inpperm,$inpperm,$tmp
1298
1299         srwi            $rounds,$rounds,1
1300         li              $idx,16
1301         subi            $rounds,$rounds,1
1302
1303         ${UCMP}i        $len,8
1304         bge             _aesp8_ctr32_encrypt8x
1305
1306         ?lvsr           $outperm,0,$out         # prepare for unaligned store
1307         vspltisb        $outmask,-1
1308         lvx             $outhead,0,$out
1309         ?vperm          $outmask,$rndkey0,$outmask,$outperm
1310         le?vxor         $outperm,$outperm,$tmp
1311
1312         lvx             $rndkey0,0,$key
1313         mtctr           $rounds
1314         lvx             $rndkey1,$idx,$key
1315         addi            $idx,$idx,16
1316         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
1317         vxor            $inout,$ivec,$rndkey0
1318         lvx             $rndkey0,$idx,$key
1319         addi            $idx,$idx,16
1320         b               Loop_ctr32_enc
1321
1322 .align  5
1323 Loop_ctr32_enc:
1324         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
1325         vcipher         $inout,$inout,$rndkey1
1326         lvx             $rndkey1,$idx,$key
1327         addi            $idx,$idx,16
1328         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
1329         vcipher         $inout,$inout,$rndkey0
1330         lvx             $rndkey0,$idx,$key
1331         addi            $idx,$idx,16
1332         bdnz            Loop_ctr32_enc
1333
1334         vadduwm         $ivec,$ivec,$one
1335          vmr            $dat,$inptail
1336          lvx            $inptail,0,$inp
1337          addi           $inp,$inp,16
1338          subic.         $len,$len,1             # blocks--
1339
1340         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
1341         vcipher         $inout,$inout,$rndkey1
1342         lvx             $rndkey1,$idx,$key
1343          vperm          $dat,$dat,$inptail,$inpperm
1344          li             $idx,16
1345         ?vperm          $rndkey1,$rndkey0,$rndkey1,$keyperm
1346          lvx            $rndkey0,0,$key
1347         vxor            $dat,$dat,$rndkey1      # last round key
1348         vcipherlast     $inout,$inout,$dat
1349
1350          lvx            $rndkey1,$idx,$key
1351          addi           $idx,$idx,16
1352         vperm           $inout,$inout,$inout,$outperm
1353         vsel            $dat,$outhead,$inout,$outmask
1354          mtctr          $rounds
1355          ?vperm         $rndkey0,$rndkey0,$rndkey1,$keyperm
1356         vmr             $outhead,$inout
1357          vxor           $inout,$ivec,$rndkey0
1358          lvx            $rndkey0,$idx,$key
1359          addi           $idx,$idx,16
1360         stvx            $dat,0,$out
1361         addi            $out,$out,16
1362         bne             Loop_ctr32_enc
1363
1364         addi            $out,$out,-1
1365         lvx             $inout,0,$out           # redundant in aligned case
1366         vsel            $inout,$outhead,$inout,$outmask
1367         stvx            $inout,0,$out
1368
1369         mtspr           256,$vrsave
1370         blr
1371         .long           0
1372         .byte           0,12,0x14,0,0,0,6,0
1373         .long           0
1374 ___
1375 #########################################################################
1376 {{      # Optimized CTR procedure                                       #
1377 my $key_="r11";
1378 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1379     $x00=0 if ($flavour =~ /osx/);
1380 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1381 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1382 my $rndkey0="v23";      # v24-v25 rotating buffer for first found keys
1383                         # v26-v31 last 6 round keys
1384 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1385 my ($two,$three,$four)=($outhead,$outperm,$outmask);
1386
1387 $code.=<<___;
1388 .align  5
1389 _aesp8_ctr32_encrypt8x:
1390         $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1391         li              r10,`$FRAME+8*16+15`
1392         li              r11,`$FRAME+8*16+31`
1393         stvx            v20,r10,$sp             # ABI says so
1394         addi            r10,r10,32
1395         stvx            v21,r11,$sp
1396         addi            r11,r11,32
1397         stvx            v22,r10,$sp
1398         addi            r10,r10,32
1399         stvx            v23,r11,$sp
1400         addi            r11,r11,32
1401         stvx            v24,r10,$sp
1402         addi            r10,r10,32
1403         stvx            v25,r11,$sp
1404         addi            r11,r11,32
1405         stvx            v26,r10,$sp
1406         addi            r10,r10,32
1407         stvx            v27,r11,$sp
1408         addi            r11,r11,32
1409         stvx            v28,r10,$sp
1410         addi            r10,r10,32
1411         stvx            v29,r11,$sp
1412         addi            r11,r11,32
1413         stvx            v30,r10,$sp
1414         stvx            v31,r11,$sp
1415         li              r0,-1
1416         stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
1417         li              $x10,0x10
1418         $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1419         li              $x20,0x20
1420         $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1421         li              $x30,0x30
1422         $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1423         li              $x40,0x40
1424         $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1425         li              $x50,0x50
1426         $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1427         li              $x60,0x60
1428         $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1429         li              $x70,0x70
1430         mtspr           256,r0
1431
1432         subi            $rounds,$rounds,3       # -4 in total
1433
1434         lvx             $rndkey0,$x00,$key      # load key schedule
1435         lvx             v30,$x10,$key
1436         addi            $key,$key,0x20
1437         lvx             v31,$x00,$key
1438         ?vperm          $rndkey0,$rndkey0,v30,$keyperm
1439         addi            $key_,$sp,$FRAME+15
1440         mtctr           $rounds
1441
1442 Load_ctr32_enc_key:
1443         ?vperm          v24,v30,v31,$keyperm
1444         lvx             v30,$x10,$key
1445         addi            $key,$key,0x20
1446         stvx            v24,$x00,$key_          # off-load round[1]
1447         ?vperm          v25,v31,v30,$keyperm
1448         lvx             v31,$x00,$key
1449         stvx            v25,$x10,$key_          # off-load round[2]
1450         addi            $key_,$key_,0x20
1451         bdnz            Load_ctr32_enc_key
1452
1453         lvx             v26,$x10,$key
1454         ?vperm          v24,v30,v31,$keyperm
1455         lvx             v27,$x20,$key
1456         stvx            v24,$x00,$key_          # off-load round[3]
1457         ?vperm          v25,v31,v26,$keyperm
1458         lvx             v28,$x30,$key
1459         stvx            v25,$x10,$key_          # off-load round[4]
1460         addi            $key_,$sp,$FRAME+15     # rewind $key_
1461         ?vperm          v26,v26,v27,$keyperm
1462         lvx             v29,$x40,$key
1463         ?vperm          v27,v27,v28,$keyperm
1464         lvx             v30,$x50,$key
1465         ?vperm          v28,v28,v29,$keyperm
1466         lvx             v31,$x60,$key
1467         ?vperm          v29,v29,v30,$keyperm
1468         lvx             $out0,$x70,$key         # borrow $out0
1469         ?vperm          v30,v30,v31,$keyperm
1470         lvx             v24,$x00,$key_          # pre-load round[1]
1471         ?vperm          v31,v31,$out0,$keyperm
1472         lvx             v25,$x10,$key_          # pre-load round[2]
1473
1474         vadduwm         $two,$one,$one
1475         subi            $inp,$inp,15            # undo "caller"
1476         $SHL            $len,$len,4
1477
1478         vadduwm         $out1,$ivec,$one        # counter values ...
1479         vadduwm         $out2,$ivec,$two
1480         vxor            $out0,$ivec,$rndkey0    # ... xored with rndkey[0]
1481          le?li          $idx,8
1482         vadduwm         $out3,$out1,$two
1483         vxor            $out1,$out1,$rndkey0
1484          le?lvsl        $inpperm,0,$idx
1485         vadduwm         $out4,$out2,$two
1486         vxor            $out2,$out2,$rndkey0
1487          le?vspltisb    $tmp,0x0f
1488         vadduwm         $out5,$out3,$two
1489         vxor            $out3,$out3,$rndkey0
1490          le?vxor        $inpperm,$inpperm,$tmp  # transform for lvx_u/stvx_u
1491         vadduwm         $out6,$out4,$two
1492         vxor            $out4,$out4,$rndkey0
1493         vadduwm         $out7,$out5,$two
1494         vxor            $out5,$out5,$rndkey0
1495         vadduwm         $ivec,$out6,$two        # next counter value
1496         vxor            $out6,$out6,$rndkey0
1497         vxor            $out7,$out7,$rndkey0
1498
1499         mtctr           $rounds
1500         b               Loop_ctr32_enc8x
1501 .align  5
1502 Loop_ctr32_enc8x:
1503         vcipher         $out0,$out0,v24
1504         vcipher         $out1,$out1,v24
1505         vcipher         $out2,$out2,v24
1506         vcipher         $out3,$out3,v24
1507         vcipher         $out4,$out4,v24
1508         vcipher         $out5,$out5,v24
1509         vcipher         $out6,$out6,v24
1510         vcipher         $out7,$out7,v24
1511 Loop_ctr32_enc8x_middle:
1512         lvx             v24,$x20,$key_          # round[3]
1513         addi            $key_,$key_,0x20
1514
1515         vcipher         $out0,$out0,v25
1516         vcipher         $out1,$out1,v25
1517         vcipher         $out2,$out2,v25
1518         vcipher         $out3,$out3,v25
1519         vcipher         $out4,$out4,v25
1520         vcipher         $out5,$out5,v25
1521         vcipher         $out6,$out6,v25
1522         vcipher         $out7,$out7,v25
1523         lvx             v25,$x10,$key_          # round[4]
1524         bdnz            Loop_ctr32_enc8x
1525
1526         subic           r11,$len,256            # $len-256, borrow $key_
1527         vcipher         $out0,$out0,v24
1528         vcipher         $out1,$out1,v24
1529         vcipher         $out2,$out2,v24
1530         vcipher         $out3,$out3,v24
1531         vcipher         $out4,$out4,v24
1532         vcipher         $out5,$out5,v24
1533         vcipher         $out6,$out6,v24
1534         vcipher         $out7,$out7,v24
1535
1536         subfe           r0,r0,r0                # borrow?-1:0
1537         vcipher         $out0,$out0,v25
1538         vcipher         $out1,$out1,v25
1539         vcipher         $out2,$out2,v25
1540         vcipher         $out3,$out3,v25
1541         vcipher         $out4,$out4,v25
1542         vcipher         $out5,$out5,v25
1543         vcipher         $out6,$out6,v25
1544         vcipher         $out7,$out7,v25
1545
1546         and             r0,r0,r11
1547         addi            $key_,$sp,$FRAME+15     # rewind $key_
1548         vcipher         $out0,$out0,v26
1549         vcipher         $out1,$out1,v26
1550         vcipher         $out2,$out2,v26
1551         vcipher         $out3,$out3,v26
1552         vcipher         $out4,$out4,v26
1553         vcipher         $out5,$out5,v26
1554         vcipher         $out6,$out6,v26
1555         vcipher         $out7,$out7,v26
1556         lvx             v24,$x00,$key_          # re-pre-load round[1]
1557
1558         subic           $len,$len,129           # $len-=129
1559         vcipher         $out0,$out0,v27
1560         addi            $len,$len,1             # $len-=128 really
1561         vcipher         $out1,$out1,v27
1562         vcipher         $out2,$out2,v27
1563         vcipher         $out3,$out3,v27
1564         vcipher         $out4,$out4,v27
1565         vcipher         $out5,$out5,v27
1566         vcipher         $out6,$out6,v27
1567         vcipher         $out7,$out7,v27
1568         lvx             v25,$x10,$key_          # re-pre-load round[2]
1569
1570         vcipher         $out0,$out0,v28
1571          lvx_u          $in0,$x00,$inp          # load input
1572         vcipher         $out1,$out1,v28
1573          lvx_u          $in1,$x10,$inp
1574         vcipher         $out2,$out2,v28
1575          lvx_u          $in2,$x20,$inp
1576         vcipher         $out3,$out3,v28
1577          lvx_u          $in3,$x30,$inp
1578         vcipher         $out4,$out4,v28
1579          lvx_u          $in4,$x40,$inp
1580         vcipher         $out5,$out5,v28
1581          lvx_u          $in5,$x50,$inp
1582         vcipher         $out6,$out6,v28
1583          lvx_u          $in6,$x60,$inp
1584         vcipher         $out7,$out7,v28
1585          lvx_u          $in7,$x70,$inp
1586          addi           $inp,$inp,0x80
1587
1588         vcipher         $out0,$out0,v29
1589          le?vperm       $in0,$in0,$in0,$inpperm
1590         vcipher         $out1,$out1,v29
1591          le?vperm       $in1,$in1,$in1,$inpperm
1592         vcipher         $out2,$out2,v29
1593          le?vperm       $in2,$in2,$in2,$inpperm
1594         vcipher         $out3,$out3,v29
1595          le?vperm       $in3,$in3,$in3,$inpperm
1596         vcipher         $out4,$out4,v29
1597          le?vperm       $in4,$in4,$in4,$inpperm
1598         vcipher         $out5,$out5,v29
1599          le?vperm       $in5,$in5,$in5,$inpperm
1600         vcipher         $out6,$out6,v29
1601          le?vperm       $in6,$in6,$in6,$inpperm
1602         vcipher         $out7,$out7,v29
1603          le?vperm       $in7,$in7,$in7,$inpperm
1604
1605         add             $inp,$inp,r0            # $inp is adjusted in such
1606                                                 # way that at exit from the
1607                                                 # loop inX-in7 are loaded
1608                                                 # with last "words"
1609         subfe.          r0,r0,r0                # borrow?-1:0
1610         vcipher         $out0,$out0,v30
1611          vxor           $in0,$in0,v31           # xor with last round key
1612         vcipher         $out1,$out1,v30
1613          vxor           $in1,$in1,v31
1614         vcipher         $out2,$out2,v30
1615          vxor           $in2,$in2,v31
1616         vcipher         $out3,$out3,v30
1617          vxor           $in3,$in3,v31
1618         vcipher         $out4,$out4,v30
1619          vxor           $in4,$in4,v31
1620         vcipher         $out5,$out5,v30
1621          vxor           $in5,$in5,v31
1622         vcipher         $out6,$out6,v30
1623          vxor           $in6,$in6,v31
1624         vcipher         $out7,$out7,v30
1625          vxor           $in7,$in7,v31
1626
1627         bne             Lctr32_enc8x_break      # did $len-129 borrow?
1628
1629         vcipherlast     $in0,$out0,$in0
1630         vcipherlast     $in1,$out1,$in1
1631          vadduwm        $out1,$ivec,$one        # counter values ...
1632         vcipherlast     $in2,$out2,$in2
1633          vadduwm        $out2,$ivec,$two
1634          vxor           $out0,$ivec,$rndkey0    # ... xored with rndkey[0]
1635         vcipherlast     $in3,$out3,$in3
1636          vadduwm        $out3,$out1,$two
1637          vxor           $out1,$out1,$rndkey0
1638         vcipherlast     $in4,$out4,$in4
1639          vadduwm        $out4,$out2,$two
1640          vxor           $out2,$out2,$rndkey0
1641         vcipherlast     $in5,$out5,$in5
1642          vadduwm        $out5,$out3,$two
1643          vxor           $out3,$out3,$rndkey0
1644         vcipherlast     $in6,$out6,$in6
1645          vadduwm        $out6,$out4,$two
1646          vxor           $out4,$out4,$rndkey0
1647         vcipherlast     $in7,$out7,$in7
1648          vadduwm        $out7,$out5,$two
1649          vxor           $out5,$out5,$rndkey0
1650         le?vperm        $in0,$in0,$in0,$inpperm
1651          vadduwm        $ivec,$out6,$two        # next counter value
1652          vxor           $out6,$out6,$rndkey0
1653         le?vperm        $in1,$in1,$in1,$inpperm
1654          vxor           $out7,$out7,$rndkey0
1655         mtctr           $rounds
1656
1657          vcipher        $out0,$out0,v24
1658         stvx_u          $in0,$x00,$out
1659         le?vperm        $in2,$in2,$in2,$inpperm
1660          vcipher        $out1,$out1,v24
1661         stvx_u          $in1,$x10,$out
1662         le?vperm        $in3,$in3,$in3,$inpperm
1663          vcipher        $out2,$out2,v24
1664         stvx_u          $in2,$x20,$out
1665         le?vperm        $in4,$in4,$in4,$inpperm
1666          vcipher        $out3,$out3,v24
1667         stvx_u          $in3,$x30,$out
1668         le?vperm        $in5,$in5,$in5,$inpperm
1669          vcipher        $out4,$out4,v24
1670         stvx_u          $in4,$x40,$out
1671         le?vperm        $in6,$in6,$in6,$inpperm
1672          vcipher        $out5,$out5,v24
1673         stvx_u          $in5,$x50,$out
1674         le?vperm        $in7,$in7,$in7,$inpperm
1675          vcipher        $out6,$out6,v24
1676         stvx_u          $in6,$x60,$out
1677          vcipher        $out7,$out7,v24
1678         stvx_u          $in7,$x70,$out
1679         addi            $out,$out,0x80
1680
1681         b               Loop_ctr32_enc8x_middle
1682
1683 .align  5
1684 Lctr32_enc8x_break:
1685         cmpwi           $len,-0x60
1686         blt             Lctr32_enc8x_one
1687         nop
1688         beq             Lctr32_enc8x_two
1689         cmpwi           $len,-0x40
1690         blt             Lctr32_enc8x_three
1691         nop
1692         beq             Lctr32_enc8x_four
1693         cmpwi           $len,-0x20
1694         blt             Lctr32_enc8x_five
1695         nop
1696         beq             Lctr32_enc8x_six
1697         cmpwi           $len,0x00
1698         blt             Lctr32_enc8x_seven
1699
1700 Lctr32_enc8x_eight:
1701         vcipherlast     $out0,$out0,$in0
1702         vcipherlast     $out1,$out1,$in1
1703         vcipherlast     $out2,$out2,$in2
1704         vcipherlast     $out3,$out3,$in3
1705         vcipherlast     $out4,$out4,$in4
1706         vcipherlast     $out5,$out5,$in5
1707         vcipherlast     $out6,$out6,$in6
1708         vcipherlast     $out7,$out7,$in7
1709
1710         le?vperm        $out0,$out0,$out0,$inpperm
1711         le?vperm        $out1,$out1,$out1,$inpperm
1712         stvx_u          $out0,$x00,$out
1713         le?vperm        $out2,$out2,$out2,$inpperm
1714         stvx_u          $out1,$x10,$out
1715         le?vperm        $out3,$out3,$out3,$inpperm
1716         stvx_u          $out2,$x20,$out
1717         le?vperm        $out4,$out4,$out4,$inpperm
1718         stvx_u          $out3,$x30,$out
1719         le?vperm        $out5,$out5,$out5,$inpperm
1720         stvx_u          $out4,$x40,$out
1721         le?vperm        $out6,$out6,$out6,$inpperm
1722         stvx_u          $out5,$x50,$out
1723         le?vperm        $out7,$out7,$out7,$inpperm
1724         stvx_u          $out6,$x60,$out
1725         stvx_u          $out7,$x70,$out
1726         addi            $out,$out,0x80
1727         b               Lctr32_enc8x_done
1728
1729 .align  5
1730 Lctr32_enc8x_seven:
1731         vcipherlast     $out0,$out0,$in1
1732         vcipherlast     $out1,$out1,$in2
1733         vcipherlast     $out2,$out2,$in3
1734         vcipherlast     $out3,$out3,$in4
1735         vcipherlast     $out4,$out4,$in5
1736         vcipherlast     $out5,$out5,$in6
1737         vcipherlast     $out6,$out6,$in7
1738
1739         le?vperm        $out0,$out0,$out0,$inpperm
1740         le?vperm        $out1,$out1,$out1,$inpperm
1741         stvx_u          $out0,$x00,$out
1742         le?vperm        $out2,$out2,$out2,$inpperm
1743         stvx_u          $out1,$x10,$out
1744         le?vperm        $out3,$out3,$out3,$inpperm
1745         stvx_u          $out2,$x20,$out
1746         le?vperm        $out4,$out4,$out4,$inpperm
1747         stvx_u          $out3,$x30,$out
1748         le?vperm        $out5,$out5,$out5,$inpperm
1749         stvx_u          $out4,$x40,$out
1750         le?vperm        $out6,$out6,$out6,$inpperm
1751         stvx_u          $out5,$x50,$out
1752         stvx_u          $out6,$x60,$out
1753         addi            $out,$out,0x70
1754         b               Lctr32_enc8x_done
1755
1756 .align  5
1757 Lctr32_enc8x_six:
1758         vcipherlast     $out0,$out0,$in2
1759         vcipherlast     $out1,$out1,$in3
1760         vcipherlast     $out2,$out2,$in4
1761         vcipherlast     $out3,$out3,$in5
1762         vcipherlast     $out4,$out4,$in6
1763         vcipherlast     $out5,$out5,$in7
1764
1765         le?vperm        $out0,$out0,$out0,$inpperm
1766         le?vperm        $out1,$out1,$out1,$inpperm
1767         stvx_u          $out0,$x00,$out
1768         le?vperm        $out2,$out2,$out2,$inpperm
1769         stvx_u          $out1,$x10,$out
1770         le?vperm        $out3,$out3,$out3,$inpperm
1771         stvx_u          $out2,$x20,$out
1772         le?vperm        $out4,$out4,$out4,$inpperm
1773         stvx_u          $out3,$x30,$out
1774         le?vperm        $out5,$out5,$out5,$inpperm
1775         stvx_u          $out4,$x40,$out
1776         stvx_u          $out5,$x50,$out
1777         addi            $out,$out,0x60
1778         b               Lctr32_enc8x_done
1779
1780 .align  5
1781 Lctr32_enc8x_five:
1782         vcipherlast     $out0,$out0,$in3
1783         vcipherlast     $out1,$out1,$in4
1784         vcipherlast     $out2,$out2,$in5
1785         vcipherlast     $out3,$out3,$in6
1786         vcipherlast     $out4,$out4,$in7
1787
1788         le?vperm        $out0,$out0,$out0,$inpperm
1789         le?vperm        $out1,$out1,$out1,$inpperm
1790         stvx_u          $out0,$x00,$out
1791         le?vperm        $out2,$out2,$out2,$inpperm
1792         stvx_u          $out1,$x10,$out
1793         le?vperm        $out3,$out3,$out3,$inpperm
1794         stvx_u          $out2,$x20,$out
1795         le?vperm        $out4,$out4,$out4,$inpperm
1796         stvx_u          $out3,$x30,$out
1797         stvx_u          $out4,$x40,$out
1798         addi            $out,$out,0x50
1799         b               Lctr32_enc8x_done
1800
1801 .align  5
1802 Lctr32_enc8x_four:
1803         vcipherlast     $out0,$out0,$in4
1804         vcipherlast     $out1,$out1,$in5
1805         vcipherlast     $out2,$out2,$in6
1806         vcipherlast     $out3,$out3,$in7
1807
1808         le?vperm        $out0,$out0,$out0,$inpperm
1809         le?vperm        $out1,$out1,$out1,$inpperm
1810         stvx_u          $out0,$x00,$out
1811         le?vperm        $out2,$out2,$out2,$inpperm
1812         stvx_u          $out1,$x10,$out
1813         le?vperm        $out3,$out3,$out3,$inpperm
1814         stvx_u          $out2,$x20,$out
1815         stvx_u          $out3,$x30,$out
1816         addi            $out,$out,0x40
1817         b               Lctr32_enc8x_done
1818
1819 .align  5
1820 Lctr32_enc8x_three:
1821         vcipherlast     $out0,$out0,$in5
1822         vcipherlast     $out1,$out1,$in6
1823         vcipherlast     $out2,$out2,$in7
1824
1825         le?vperm        $out0,$out0,$out0,$inpperm
1826         le?vperm        $out1,$out1,$out1,$inpperm
1827         stvx_u          $out0,$x00,$out
1828         le?vperm        $out2,$out2,$out2,$inpperm
1829         stvx_u          $out1,$x10,$out
1830         stvx_u          $out2,$x20,$out
1831         addi            $out,$out,0x30
1832         b               Lcbc_dec8x_done
1833
1834 .align  5
1835 Lctr32_enc8x_two:
1836         vcipherlast     $out0,$out0,$in6
1837         vcipherlast     $out1,$out1,$in7
1838
1839         le?vperm        $out0,$out0,$out0,$inpperm
1840         le?vperm        $out1,$out1,$out1,$inpperm
1841         stvx_u          $out0,$x00,$out
1842         stvx_u          $out1,$x10,$out
1843         addi            $out,$out,0x20
1844         b               Lcbc_dec8x_done
1845
1846 .align  5
1847 Lctr32_enc8x_one:
1848         vcipherlast     $out0,$out0,$in7
1849
1850         le?vperm        $out0,$out0,$out0,$inpperm
1851         stvx_u          $out0,0,$out
1852         addi            $out,$out,0x10
1853
1854 Lctr32_enc8x_done:
1855         li              r10,`$FRAME+15`
1856         li              r11,`$FRAME+31`
1857         stvx            $inpperm,r10,$sp        # wipe copies of round keys
1858         addi            r10,r10,32
1859         stvx            $inpperm,r11,$sp
1860         addi            r11,r11,32
1861         stvx            $inpperm,r10,$sp
1862         addi            r10,r10,32
1863         stvx            $inpperm,r11,$sp
1864         addi            r11,r11,32
1865         stvx            $inpperm,r10,$sp
1866         addi            r10,r10,32
1867         stvx            $inpperm,r11,$sp
1868         addi            r11,r11,32
1869         stvx            $inpperm,r10,$sp
1870         addi            r10,r10,32
1871         stvx            $inpperm,r11,$sp
1872         addi            r11,r11,32
1873
1874         mtspr           256,$vrsave
1875         lvx             v20,r10,$sp             # ABI says so
1876         addi            r10,r10,32
1877         lvx             v21,r11,$sp
1878         addi            r11,r11,32
1879         lvx             v22,r10,$sp
1880         addi            r10,r10,32
1881         lvx             v23,r11,$sp
1882         addi            r11,r11,32
1883         lvx             v24,r10,$sp
1884         addi            r10,r10,32
1885         lvx             v25,r11,$sp
1886         addi            r11,r11,32
1887         lvx             v26,r10,$sp
1888         addi            r10,r10,32
1889         lvx             v27,r11,$sp
1890         addi            r11,r11,32
1891         lvx             v28,r10,$sp
1892         addi            r10,r10,32
1893         lvx             v29,r11,$sp
1894         addi            r11,r11,32
1895         lvx             v30,r10,$sp
1896         lvx             v31,r11,$sp
1897         $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1898         $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1899         $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1900         $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1901         $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1902         $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1903         addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1904         blr
1905         .long           0
1906         .byte           0,12,0x04,0,0x80,6,6,0
1907         .long           0
1908 .size   .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
1909 ___
1910 }}      }}}
1911
1912 #########################################################################
1913 {{{     # XTS procedures                                                #
1914 # int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len,   #
1915 #                             const AES_KEY *key1, const AES_KEY *key2, #
1916 #                             [const] unsigned char iv[16]);            #
1917 # If $key2 is NULL, then a "tweak chaining" mode is engaged, in which   #
1918 # input tweak value is assumed to be encrypted already, and last tweak  #
1919 # value, one suitable for consecutive call on same chunk of data, is    #
1920 # written back to original buffer. In addition, in "tweak chaining"     #
1921 # mode only complete input blocks are processed.                        #
1922
1923 my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) =     map("r$_",(3..10));
1924 my ($rndkey0,$rndkey1,$inout) =                         map("v$_",(0..2));
1925 my ($output,$inptail,$inpperm,$leperm,$keyperm) =       map("v$_",(3..7));
1926 my ($tweak,$seven,$eighty7,$tmp,$tweak1) =              map("v$_",(8..12));
1927 my $taillen = $key2;
1928
1929    ($inp,$idx) = ($idx,$inp);                           # reassign
1930
1931 $code.=<<___;
1932 .globl  .${prefix}_xts_encrypt
1933 .align  5
1934 .${prefix}_xts_encrypt:
1935         mr              $inp,r3                         # reassign
1936         li              r3,-1
1937         ${UCMP}i        $len,16
1938         bltlr-
1939
1940         lis             r0,0xfff0
1941         mfspr           r12,256                         # save vrsave
1942         li              r11,0
1943         mtspr           256,r0
1944
1945         vspltisb        $seven,0x07                     # 0x070707..07
1946         le?lvsl         $leperm,r11,r11
1947         le?vspltisb     $tmp,0x0f
1948         le?vxor         $leperm,$leperm,$seven
1949
1950         li              $idx,15
1951         lvx             $tweak,0,$ivp                   # load [unaligned] iv
1952         lvsl            $inpperm,0,$ivp
1953         lvx             $inptail,$idx,$ivp
1954         le?vxor         $inpperm,$inpperm,$tmp
1955         vperm           $tweak,$tweak,$inptail,$inpperm
1956
1957         neg             r11,$inp
1958         lvsr            $inpperm,0,r11                  # prepare for unaligned load
1959         lvx             $inout,0,$inp
1960         addi            $inp,$inp,15                    # 15 is not typo
1961         le?vxor         $inpperm,$inpperm,$tmp
1962
1963         ${UCMP}i        $key2,0                         # key2==NULL?
1964         beq             Lxts_enc_no_key2
1965
1966         ?lvsl           $keyperm,0,$key2                # prepare for unaligned key
1967         lwz             $rounds,240($key2)
1968         srwi            $rounds,$rounds,1
1969         subi            $rounds,$rounds,1
1970         li              $idx,16
1971
1972         lvx             $rndkey0,0,$key2
1973         lvx             $rndkey1,$idx,$key2
1974         addi            $idx,$idx,16
1975         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
1976         vxor            $tweak,$tweak,$rndkey0
1977         lvx             $rndkey0,$idx,$key2
1978         addi            $idx,$idx,16
1979         mtctr           $rounds
1980
1981 Ltweak_xts_enc:
1982         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
1983         vcipher         $tweak,$tweak,$rndkey1
1984         lvx             $rndkey1,$idx,$key2
1985         addi            $idx,$idx,16
1986         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
1987         vcipher         $tweak,$tweak,$rndkey0
1988         lvx             $rndkey0,$idx,$key2
1989         addi            $idx,$idx,16
1990         bdnz            Ltweak_xts_enc
1991
1992         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
1993         vcipher         $tweak,$tweak,$rndkey1
1994         lvx             $rndkey1,$idx,$key2
1995         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
1996         vcipherlast     $tweak,$tweak,$rndkey0
1997
1998         li              $ivp,0                          # don't chain the tweak
1999         b               Lxts_enc
2000
2001 Lxts_enc_no_key2:
2002         li              $idx,-16
2003         and             $len,$len,$idx                  # in "tweak chaining"
2004                                                         # mode only complete
2005                                                         # blocks are processed
2006 Lxts_enc:
2007         lvx             $inptail,0,$inp
2008         addi            $inp,$inp,16
2009
2010         ?lvsl           $keyperm,0,$key1                # prepare for unaligned key
2011         lwz             $rounds,240($key1)
2012         srwi            $rounds,$rounds,1
2013         subi            $rounds,$rounds,1
2014         li              $idx,16
2015
2016         vslb            $eighty7,$seven,$seven          # 0x808080..80
2017         vor             $eighty7,$eighty7,$seven        # 0x878787..87
2018         vspltisb        $tmp,1                          # 0x010101..01
2019         vsldoi          $eighty7,$eighty7,$tmp,15       # 0x870101..01
2020
2021         ${UCMP}i        $len,96
2022         bge             _aesp8_xts_encrypt6x
2023
2024         andi.           $taillen,$len,15
2025         subic           r0,$len,32
2026         subi            $taillen,$taillen,16
2027         subfe           r0,r0,r0
2028         and             r0,r0,$taillen
2029         add             $inp,$inp,r0
2030
2031         lvx             $rndkey0,0,$key1
2032         lvx             $rndkey1,$idx,$key1
2033         addi            $idx,$idx,16
2034         vperm           $inout,$inout,$inptail,$inpperm
2035         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2036         vxor            $inout,$inout,$tweak
2037         vxor            $inout,$inout,$rndkey0
2038         lvx             $rndkey0,$idx,$key1
2039         addi            $idx,$idx,16
2040         mtctr           $rounds
2041         b               Loop_xts_enc
2042
2043 .align  5
2044 Loop_xts_enc:
2045         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2046         vcipher         $inout,$inout,$rndkey1
2047         lvx             $rndkey1,$idx,$key1
2048         addi            $idx,$idx,16
2049         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2050         vcipher         $inout,$inout,$rndkey0
2051         lvx             $rndkey0,$idx,$key1
2052         addi            $idx,$idx,16
2053         bdnz            Loop_xts_enc
2054
2055         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2056         vcipher         $inout,$inout,$rndkey1
2057         lvx             $rndkey1,$idx,$key1
2058         li              $idx,16
2059         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2060         vxor            $rndkey0,$rndkey0,$tweak
2061         vcipherlast     $output,$inout,$rndkey0
2062
2063         le?vperm        $tmp,$output,$output,$leperm
2064         be?nop
2065         le?stvx_u       $tmp,0,$out
2066         be?stvx_u       $output,0,$out
2067         addi            $out,$out,16
2068
2069         subic.          $len,$len,16
2070         beq             Lxts_enc_done
2071
2072         vmr             $inout,$inptail
2073         lvx             $inptail,0,$inp
2074         addi            $inp,$inp,16
2075         lvx             $rndkey0,0,$key1
2076         lvx             $rndkey1,$idx,$key1
2077         addi            $idx,$idx,16
2078
2079         subic           r0,$len,32
2080         subfe           r0,r0,r0
2081         and             r0,r0,$taillen
2082         add             $inp,$inp,r0
2083
2084         vsrab           $tmp,$tweak,$seven              # next tweak value
2085         vaddubm         $tweak,$tweak,$tweak
2086         vsldoi          $tmp,$tmp,$tmp,15
2087         vand            $tmp,$tmp,$eighty7
2088         vxor            $tweak,$tweak,$tmp
2089
2090         vperm           $inout,$inout,$inptail,$inpperm
2091         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2092         vxor            $inout,$inout,$tweak
2093         vxor            $output,$output,$rndkey0        # just in case $len<16
2094         vxor            $inout,$inout,$rndkey0
2095         lvx             $rndkey0,$idx,$key1
2096         addi            $idx,$idx,16
2097
2098         mtctr           $rounds
2099         ${UCMP}i        $len,16
2100         bge             Loop_xts_enc
2101
2102         vxor            $output,$output,$tweak
2103         lvsr            $inpperm,0,$len                 # $inpperm is no longer needed
2104         vxor            $inptail,$inptail,$inptail      # $inptail is no longer needed
2105         vspltisb        $tmp,-1
2106         vperm           $inptail,$inptail,$tmp,$inpperm
2107         vsel            $inout,$inout,$output,$inptail
2108
2109         subi            r11,$out,17
2110         subi            $out,$out,16
2111         mtctr           $len
2112         li              $len,16
2113 Loop_xts_enc_steal:
2114         lbzu            r0,1(r11)
2115         stb             r0,16(r11)
2116         bdnz            Loop_xts_enc_steal
2117
2118         mtctr           $rounds
2119         b               Loop_xts_enc                    # one more time...
2120
2121 Lxts_enc_done:
2122         ${UCMP}i        $ivp,0
2123         beq             Lxts_enc_ret
2124
2125         vsrab           $tmp,$tweak,$seven              # next tweak value
2126         vaddubm         $tweak,$tweak,$tweak
2127         vsldoi          $tmp,$tmp,$tmp,15
2128         vand            $tmp,$tmp,$eighty7
2129         vxor            $tweak,$tweak,$tmp
2130
2131         le?vperm        $tweak,$tweak,$tweak,$leperm
2132         stvx_u          $tweak,0,$ivp
2133
2134 Lxts_enc_ret:
2135         mtspr           256,r12                         # restore vrsave
2136         li              r3,0
2137         blr
2138         .long           0
2139         .byte           0,12,0x04,0,0x80,6,6,0
2140         .long           0
2141 .size   .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
2142
2143 .globl  .${prefix}_xts_decrypt
2144 .align  5
2145 .${prefix}_xts_decrypt:
2146         mr              $inp,r3                         # reassign
2147         li              r3,-1
2148         ${UCMP}i        $len,16
2149         bltlr-
2150
2151         lis             r0,0xfff8
2152         mfspr           r12,256                         # save vrsave
2153         li              r11,0
2154         mtspr           256,r0
2155
2156         andi.           r0,$len,15
2157         neg             r0,r0
2158         andi.           r0,r0,16
2159         sub             $len,$len,r0
2160
2161         vspltisb        $seven,0x07                     # 0x070707..07
2162         le?lvsl         $leperm,r11,r11
2163         le?vspltisb     $tmp,0x0f
2164         le?vxor         $leperm,$leperm,$seven
2165
2166         li              $idx,15
2167         lvx             $tweak,0,$ivp                   # load [unaligned] iv
2168         lvsl            $inpperm,0,$ivp
2169         lvx             $inptail,$idx,$ivp
2170         le?vxor         $inpperm,$inpperm,$tmp
2171         vperm           $tweak,$tweak,$inptail,$inpperm
2172
2173         neg             r11,$inp
2174         lvsr            $inpperm,0,r11                  # prepare for unaligned load
2175         lvx             $inout,0,$inp
2176         addi            $inp,$inp,15                    # 15 is not typo
2177         le?vxor         $inpperm,$inpperm,$tmp
2178
2179         ${UCMP}i        $key2,0                         # key2==NULL?
2180         beq             Lxts_dec_no_key2
2181
2182         ?lvsl           $keyperm,0,$key2                # prepare for unaligned key
2183         lwz             $rounds,240($key2)
2184         srwi            $rounds,$rounds,1
2185         subi            $rounds,$rounds,1
2186         li              $idx,16
2187
2188         lvx             $rndkey0,0,$key2
2189         lvx             $rndkey1,$idx,$key2
2190         addi            $idx,$idx,16
2191         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2192         vxor            $tweak,$tweak,$rndkey0
2193         lvx             $rndkey0,$idx,$key2
2194         addi            $idx,$idx,16
2195         mtctr           $rounds
2196
2197 Ltweak_xts_dec:
2198         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2199         vcipher         $tweak,$tweak,$rndkey1
2200         lvx             $rndkey1,$idx,$key2
2201         addi            $idx,$idx,16
2202         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2203         vcipher         $tweak,$tweak,$rndkey0
2204         lvx             $rndkey0,$idx,$key2
2205         addi            $idx,$idx,16
2206         bdnz            Ltweak_xts_dec
2207
2208         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2209         vcipher         $tweak,$tweak,$rndkey1
2210         lvx             $rndkey1,$idx,$key2
2211         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2212         vcipherlast     $tweak,$tweak,$rndkey0
2213
2214         li              $ivp,0                          # don't chain the tweak
2215         b               Lxts_dec
2216
2217 Lxts_dec_no_key2:
2218         neg             $idx,$len
2219         andi.           $idx,$idx,15
2220         add             $len,$len,$idx                  # in "tweak chaining"
2221                                                         # mode only complete
2222                                                         # blocks are processed
2223 Lxts_dec:
2224         lvx             $inptail,0,$inp
2225         addi            $inp,$inp,16
2226
2227         ?lvsl           $keyperm,0,$key1                # prepare for unaligned key
2228         lwz             $rounds,240($key1)
2229         srwi            $rounds,$rounds,1
2230         subi            $rounds,$rounds,1
2231         li              $idx,16
2232
2233         vslb            $eighty7,$seven,$seven          # 0x808080..80
2234         vor             $eighty7,$eighty7,$seven        # 0x878787..87
2235         vspltisb        $tmp,1                          # 0x010101..01
2236         vsldoi          $eighty7,$eighty7,$tmp,15       # 0x870101..01
2237
2238         ${UCMP}i        $len,96
2239         bge             _aesp8_xts_decrypt6x
2240
2241         lvx             $rndkey0,0,$key1
2242         lvx             $rndkey1,$idx,$key1
2243         addi            $idx,$idx,16
2244         vperm           $inout,$inout,$inptail,$inpperm
2245         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2246         vxor            $inout,$inout,$tweak
2247         vxor            $inout,$inout,$rndkey0
2248         lvx             $rndkey0,$idx,$key1
2249         addi            $idx,$idx,16
2250         mtctr           $rounds
2251
2252         ${UCMP}i        $len,16
2253         blt             Ltail_xts_dec
2254         be?b            Loop_xts_dec
2255
2256 .align  5
2257 Loop_xts_dec:
2258         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2259         vncipher        $inout,$inout,$rndkey1
2260         lvx             $rndkey1,$idx,$key1
2261         addi            $idx,$idx,16
2262         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2263         vncipher        $inout,$inout,$rndkey0
2264         lvx             $rndkey0,$idx,$key1
2265         addi            $idx,$idx,16
2266         bdnz            Loop_xts_dec
2267
2268         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2269         vncipher        $inout,$inout,$rndkey1
2270         lvx             $rndkey1,$idx,$key1
2271         li              $idx,16
2272         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2273         vxor            $rndkey0,$rndkey0,$tweak
2274         vncipherlast    $output,$inout,$rndkey0
2275
2276         le?vperm        $tmp,$output,$output,$leperm
2277         be?nop
2278         le?stvx_u       $tmp,0,$out
2279         be?stvx_u       $output,0,$out
2280         addi            $out,$out,16
2281
2282         subic.          $len,$len,16
2283         beq             Lxts_dec_done
2284
2285         vmr             $inout,$inptail
2286         lvx             $inptail,0,$inp
2287         addi            $inp,$inp,16
2288         lvx             $rndkey0,0,$key1
2289         lvx             $rndkey1,$idx,$key1
2290         addi            $idx,$idx,16
2291
2292         vsrab           $tmp,$tweak,$seven              # next tweak value
2293         vaddubm         $tweak,$tweak,$tweak
2294         vsldoi          $tmp,$tmp,$tmp,15
2295         vand            $tmp,$tmp,$eighty7
2296         vxor            $tweak,$tweak,$tmp
2297
2298         vperm           $inout,$inout,$inptail,$inpperm
2299         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2300         vxor            $inout,$inout,$tweak
2301         vxor            $inout,$inout,$rndkey0
2302         lvx             $rndkey0,$idx,$key1
2303         addi            $idx,$idx,16
2304
2305         mtctr           $rounds
2306         ${UCMP}i        $len,16
2307         bge             Loop_xts_dec
2308
2309 Ltail_xts_dec:
2310         vsrab           $tmp,$tweak,$seven              # next tweak value
2311         vaddubm         $tweak1,$tweak,$tweak
2312         vsldoi          $tmp,$tmp,$tmp,15
2313         vand            $tmp,$tmp,$eighty7
2314         vxor            $tweak1,$tweak1,$tmp
2315
2316         subi            $inp,$inp,16
2317         add             $inp,$inp,$len
2318
2319         vxor            $inout,$inout,$tweak            # :-(
2320         vxor            $inout,$inout,$tweak1           # :-)
2321
2322 Loop_xts_dec_short:
2323         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2324         vncipher        $inout,$inout,$rndkey1
2325         lvx             $rndkey1,$idx,$key1
2326         addi            $idx,$idx,16
2327         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2328         vncipher        $inout,$inout,$rndkey0
2329         lvx             $rndkey0,$idx,$key1
2330         addi            $idx,$idx,16
2331         bdnz            Loop_xts_dec_short
2332
2333         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2334         vncipher        $inout,$inout,$rndkey1
2335         lvx             $rndkey1,$idx,$key1
2336         li              $idx,16
2337         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2338         vxor            $rndkey0,$rndkey0,$tweak1
2339         vncipherlast    $output,$inout,$rndkey0
2340
2341         le?vperm        $tmp,$output,$output,$leperm
2342         be?nop
2343         le?stvx_u       $tmp,0,$out
2344         be?stvx_u       $output,0,$out
2345
2346         vmr             $inout,$inptail
2347         lvx             $inptail,0,$inp
2348         #addi           $inp,$inp,16
2349         lvx             $rndkey0,0,$key1
2350         lvx             $rndkey1,$idx,$key1
2351         addi            $idx,$idx,16
2352         vperm           $inout,$inout,$inptail,$inpperm
2353         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2354
2355         lvsr            $inpperm,0,$len                 # $inpperm is no longer needed
2356         vxor            $inptail,$inptail,$inptail      # $inptail is no longer needed
2357         vspltisb        $tmp,-1
2358         vperm           $inptail,$inptail,$tmp,$inpperm
2359         vsel            $inout,$inout,$output,$inptail
2360
2361         vxor            $rndkey0,$rndkey0,$tweak
2362         vxor            $inout,$inout,$rndkey0
2363         lvx             $rndkey0,$idx,$key1
2364         addi            $idx,$idx,16
2365
2366         subi            r11,$out,1
2367         mtctr           $len
2368         li              $len,16
2369 Loop_xts_dec_steal:
2370         lbzu            r0,1(r11)
2371         stb             r0,16(r11)
2372         bdnz            Loop_xts_dec_steal
2373
2374         mtctr           $rounds
2375         b               Loop_xts_dec                    # one more time...
2376
2377 Lxts_dec_done:
2378         ${UCMP}i        $ivp,0
2379         beq             Lxts_dec_ret
2380
2381         vsrab           $tmp,$tweak,$seven              # next tweak value
2382         vaddubm         $tweak,$tweak,$tweak
2383         vsldoi          $tmp,$tmp,$tmp,15
2384         vand            $tmp,$tmp,$eighty7
2385         vxor            $tweak,$tweak,$tmp
2386
2387         le?vperm        $tweak,$tweak,$tweak,$leperm
2388         stvx_u          $tweak,0,$ivp
2389
2390 Lxts_dec_ret:
2391         mtspr           256,r12                         # restore vrsave
2392         li              r3,0
2393         blr
2394         .long           0
2395         .byte           0,12,0x04,0,0x80,6,6,0
2396         .long           0
2397 .size   .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
2398 ___
2399 #########################################################################
2400 {{      # Optimized XTS procedures                                      #
2401 my $key_=$key2;
2402 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
2403     $x00=0 if ($flavour =~ /osx/);
2404 my ($in0,  $in1,  $in2,  $in3,  $in4,  $in5 )=map("v$_",(0..5));
2405 my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
2406 my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
2407 my $rndkey0="v23";      # v24-v25 rotating buffer for first found keys
2408                         # v26-v31 last 6 round keys
2409 my ($keyperm)=($out0);  # aliases with "caller", redundant assignment
2410 my $taillen=$x70;
2411
2412 $code.=<<___;
2413 .align  5
2414 _aesp8_xts_encrypt6x:
2415         $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
2416         mflr            r11
2417         li              r7,`$FRAME+8*16+15`
2418         li              r3,`$FRAME+8*16+31`
2419         $PUSH           r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
2420         stvx            v20,r7,$sp              # ABI says so
2421         addi            r7,r7,32
2422         stvx            v21,r3,$sp
2423         addi            r3,r3,32
2424         stvx            v22,r7,$sp
2425         addi            r7,r7,32
2426         stvx            v23,r3,$sp
2427         addi            r3,r3,32
2428         stvx            v24,r7,$sp
2429         addi            r7,r7,32
2430         stvx            v25,r3,$sp
2431         addi            r3,r3,32
2432         stvx            v26,r7,$sp
2433         addi            r7,r7,32
2434         stvx            v27,r3,$sp
2435         addi            r3,r3,32
2436         stvx            v28,r7,$sp
2437         addi            r7,r7,32
2438         stvx            v29,r3,$sp
2439         addi            r3,r3,32
2440         stvx            v30,r7,$sp
2441         stvx            v31,r3,$sp
2442         li              r0,-1
2443         stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
2444         li              $x10,0x10
2445         $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2446         li              $x20,0x20
2447         $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2448         li              $x30,0x30
2449         $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2450         li              $x40,0x40
2451         $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2452         li              $x50,0x50
2453         $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2454         li              $x60,0x60
2455         $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2456         li              $x70,0x70
2457         mtspr           256,r0
2458
2459         subi            $rounds,$rounds,3       # -4 in total
2460
2461         lvx             $rndkey0,$x00,$key1     # load key schedule
2462         lvx             v30,$x10,$key1
2463         addi            $key1,$key1,0x20
2464         lvx             v31,$x00,$key1
2465         ?vperm          $rndkey0,$rndkey0,v30,$keyperm
2466         addi            $key_,$sp,$FRAME+15
2467         mtctr           $rounds
2468
2469 Load_xts_enc_key:
2470         ?vperm          v24,v30,v31,$keyperm
2471         lvx             v30,$x10,$key1
2472         addi            $key1,$key1,0x20
2473         stvx            v24,$x00,$key_          # off-load round[1]
2474         ?vperm          v25,v31,v30,$keyperm
2475         lvx             v31,$x00,$key1
2476         stvx            v25,$x10,$key_          # off-load round[2]
2477         addi            $key_,$key_,0x20
2478         bdnz            Load_xts_enc_key
2479
2480         lvx             v26,$x10,$key1
2481         ?vperm          v24,v30,v31,$keyperm
2482         lvx             v27,$x20,$key1
2483         stvx            v24,$x00,$key_          # off-load round[3]
2484         ?vperm          v25,v31,v26,$keyperm
2485         lvx             v28,$x30,$key1
2486         stvx            v25,$x10,$key_          # off-load round[4]
2487         addi            $key_,$sp,$FRAME+15     # rewind $key_
2488         ?vperm          v26,v26,v27,$keyperm
2489         lvx             v29,$x40,$key1
2490         ?vperm          v27,v27,v28,$keyperm
2491         lvx             v30,$x50,$key1
2492         ?vperm          v28,v28,v29,$keyperm
2493         lvx             v31,$x60,$key1
2494         ?vperm          v29,v29,v30,$keyperm
2495         lvx             $twk5,$x70,$key1        # borrow $twk5
2496         ?vperm          v30,v30,v31,$keyperm
2497         lvx             v24,$x00,$key_          # pre-load round[1]
2498         ?vperm          v31,v31,$twk5,$keyperm
2499         lvx             v25,$x10,$key_          # pre-load round[2]
2500
2501          vperm          $in0,$inout,$inptail,$inpperm
2502          subi           $inp,$inp,31            # undo "caller"
2503         vxor            $twk0,$tweak,$rndkey0
2504         vsrab           $tmp,$tweak,$seven      # next tweak value
2505         vaddubm         $tweak,$tweak,$tweak
2506         vsldoi          $tmp,$tmp,$tmp,15
2507         vand            $tmp,$tmp,$eighty7
2508          vxor           $out0,$in0,$twk0
2509         vxor            $tweak,$tweak,$tmp
2510
2511          lvx_u          $in1,$x10,$inp
2512         vxor            $twk1,$tweak,$rndkey0
2513         vsrab           $tmp,$tweak,$seven      # next tweak value
2514         vaddubm         $tweak,$tweak,$tweak
2515         vsldoi          $tmp,$tmp,$tmp,15
2516          le?vperm       $in1,$in1,$in1,$leperm
2517         vand            $tmp,$tmp,$eighty7
2518          vxor           $out1,$in1,$twk1
2519         vxor            $tweak,$tweak,$tmp
2520
2521          lvx_u          $in2,$x20,$inp
2522          andi.          $taillen,$len,15
2523         vxor            $twk2,$tweak,$rndkey0
2524         vsrab           $tmp,$tweak,$seven      # next tweak value
2525         vaddubm         $tweak,$tweak,$tweak
2526         vsldoi          $tmp,$tmp,$tmp,15
2527          le?vperm       $in2,$in2,$in2,$leperm
2528         vand            $tmp,$tmp,$eighty7
2529          vxor           $out2,$in2,$twk2
2530         vxor            $tweak,$tweak,$tmp
2531
2532          lvx_u          $in3,$x30,$inp
2533          sub            $len,$len,$taillen
2534         vxor            $twk3,$tweak,$rndkey0
2535         vsrab           $tmp,$tweak,$seven      # next tweak value
2536         vaddubm         $tweak,$tweak,$tweak
2537         vsldoi          $tmp,$tmp,$tmp,15
2538          le?vperm       $in3,$in3,$in3,$leperm
2539         vand            $tmp,$tmp,$eighty7
2540          vxor           $out3,$in3,$twk3
2541         vxor            $tweak,$tweak,$tmp
2542
2543          lvx_u          $in4,$x40,$inp
2544          subi           $len,$len,0x60
2545         vxor            $twk4,$tweak,$rndkey0
2546         vsrab           $tmp,$tweak,$seven      # next tweak value
2547         vaddubm         $tweak,$tweak,$tweak
2548         vsldoi          $tmp,$tmp,$tmp,15
2549          le?vperm       $in4,$in4,$in4,$leperm
2550         vand            $tmp,$tmp,$eighty7
2551          vxor           $out4,$in4,$twk4
2552         vxor            $tweak,$tweak,$tmp
2553
2554          lvx_u          $in5,$x50,$inp
2555          addi           $inp,$inp,0x60
2556         vxor            $twk5,$tweak,$rndkey0
2557         vsrab           $tmp,$tweak,$seven      # next tweak value
2558         vaddubm         $tweak,$tweak,$tweak
2559         vsldoi          $tmp,$tmp,$tmp,15
2560          le?vperm       $in5,$in5,$in5,$leperm
2561         vand            $tmp,$tmp,$eighty7
2562          vxor           $out5,$in5,$twk5
2563         vxor            $tweak,$tweak,$tmp
2564
2565         vxor            v31,v31,$rndkey0
2566         mtctr           $rounds
2567         b               Loop_xts_enc6x
2568
2569 .align  5
2570 Loop_xts_enc6x:
2571         vcipher         $out0,$out0,v24
2572         vcipher         $out1,$out1,v24
2573         vcipher         $out2,$out2,v24
2574         vcipher         $out3,$out3,v24
2575         vcipher         $out4,$out4,v24
2576         vcipher         $out5,$out5,v24
2577         lvx             v24,$x20,$key_          # round[3]
2578         addi            $key_,$key_,0x20
2579
2580         vcipher         $out0,$out0,v25
2581         vcipher         $out1,$out1,v25
2582         vcipher         $out2,$out2,v25
2583         vcipher         $out3,$out3,v25
2584         vcipher         $out4,$out4,v25
2585         vcipher         $out5,$out5,v25
2586         lvx             v25,$x10,$key_          # round[4]
2587         bdnz            Loop_xts_enc6x
2588
2589         subic           $len,$len,96            # $len-=96
2590          vxor           $in0,$twk0,v31          # xor with last round key
2591         vcipher         $out0,$out0,v24
2592         vcipher         $out1,$out1,v24
2593          vsrab          $tmp,$tweak,$seven      # next tweak value
2594          vxor           $twk0,$tweak,$rndkey0
2595          vaddubm        $tweak,$tweak,$tweak
2596         vcipher         $out2,$out2,v24
2597         vcipher         $out3,$out3,v24
2598          vsldoi         $tmp,$tmp,$tmp,15
2599         vcipher         $out4,$out4,v24
2600         vcipher         $out5,$out5,v24
2601
2602         subfe.          r0,r0,r0                # borrow?-1:0
2603          vand           $tmp,$tmp,$eighty7
2604         vcipher         $out0,$out0,v25
2605         vcipher         $out1,$out1,v25
2606          vxor           $tweak,$tweak,$tmp
2607         vcipher         $out2,$out2,v25
2608         vcipher         $out3,$out3,v25
2609          vxor           $in1,$twk1,v31
2610          vsrab          $tmp,$tweak,$seven      # next tweak value
2611          vxor           $twk1,$tweak,$rndkey0
2612         vcipher         $out4,$out4,v25
2613         vcipher         $out5,$out5,v25
2614
2615         and             r0,r0,$len
2616          vaddubm        $tweak,$tweak,$tweak
2617          vsldoi         $tmp,$tmp,$tmp,15
2618         vcipher         $out0,$out0,v26
2619         vcipher         $out1,$out1,v26
2620          vand           $tmp,$tmp,$eighty7
2621         vcipher         $out2,$out2,v26
2622         vcipher         $out3,$out3,v26
2623          vxor           $tweak,$tweak,$tmp
2624         vcipher         $out4,$out4,v26
2625         vcipher         $out5,$out5,v26
2626
2627         add             $inp,$inp,r0            # $inp is adjusted in such
2628                                                 # way that at exit from the
2629                                                 # loop inX-in5 are loaded
2630                                                 # with last "words"
2631          vxor           $in2,$twk2,v31
2632          vsrab          $tmp,$tweak,$seven      # next tweak value
2633          vxor           $twk2,$tweak,$rndkey0
2634          vaddubm        $tweak,$tweak,$tweak
2635         vcipher         $out0,$out0,v27
2636         vcipher         $out1,$out1,v27
2637          vsldoi         $tmp,$tmp,$tmp,15
2638         vcipher         $out2,$out2,v27
2639         vcipher         $out3,$out3,v27
2640          vand           $tmp,$tmp,$eighty7
2641         vcipher         $out4,$out4,v27
2642         vcipher         $out5,$out5,v27
2643
2644         addi            $key_,$sp,$FRAME+15     # rewind $key_
2645          vxor           $tweak,$tweak,$tmp
2646         vcipher         $out0,$out0,v28
2647         vcipher         $out1,$out1,v28
2648          vxor           $in3,$twk3,v31
2649          vsrab          $tmp,$tweak,$seven      # next tweak value
2650          vxor           $twk3,$tweak,$rndkey0
2651         vcipher         $out2,$out2,v28
2652         vcipher         $out3,$out3,v28
2653          vaddubm        $tweak,$tweak,$tweak
2654          vsldoi         $tmp,$tmp,$tmp,15
2655         vcipher         $out4,$out4,v28
2656         vcipher         $out5,$out5,v28
2657         lvx             v24,$x00,$key_          # re-pre-load round[1]
2658          vand           $tmp,$tmp,$eighty7
2659
2660         vcipher         $out0,$out0,v29
2661         vcipher         $out1,$out1,v29
2662          vxor           $tweak,$tweak,$tmp
2663         vcipher         $out2,$out2,v29
2664         vcipher         $out3,$out3,v29
2665          vxor           $in4,$twk4,v31
2666          vsrab          $tmp,$tweak,$seven      # next tweak value
2667          vxor           $twk4,$tweak,$rndkey0
2668         vcipher         $out4,$out4,v29
2669         vcipher         $out5,$out5,v29
2670         lvx             v25,$x10,$key_          # re-pre-load round[2]
2671          vaddubm        $tweak,$tweak,$tweak
2672          vsldoi         $tmp,$tmp,$tmp,15
2673
2674         vcipher         $out0,$out0,v30
2675         vcipher         $out1,$out1,v30
2676          vand           $tmp,$tmp,$eighty7
2677         vcipher         $out2,$out2,v30
2678         vcipher         $out3,$out3,v30
2679          vxor           $tweak,$tweak,$tmp
2680         vcipher         $out4,$out4,v30
2681         vcipher         $out5,$out5,v30
2682          vxor           $in5,$twk5,v31
2683          vsrab          $tmp,$tweak,$seven      # next tweak value
2684          vxor           $twk5,$tweak,$rndkey0
2685
2686         vcipherlast     $out0,$out0,$in0
2687          lvx_u          $in0,$x00,$inp          # load next input block
2688          vaddubm        $tweak,$tweak,$tweak
2689          vsldoi         $tmp,$tmp,$tmp,15
2690         vcipherlast     $out1,$out1,$in1
2691          lvx_u          $in1,$x10,$inp
2692         vcipherlast     $out2,$out2,$in2
2693          le?vperm       $in0,$in0,$in0,$leperm
2694          lvx_u          $in2,$x20,$inp
2695          vand           $tmp,$tmp,$eighty7
2696         vcipherlast     $out3,$out3,$in3
2697          le?vperm       $in1,$in1,$in1,$leperm
2698          lvx_u          $in3,$x30,$inp
2699         vcipherlast     $out4,$out4,$in4
2700          le?vperm       $in2,$in2,$in2,$leperm
2701          lvx_u          $in4,$x40,$inp
2702          vxor           $tweak,$tweak,$tmp
2703         vcipherlast     $tmp,$out5,$in5         # last block might be needed
2704                                                 # in stealing mode
2705          le?vperm       $in3,$in3,$in3,$leperm
2706          lvx_u          $in5,$x50,$inp
2707          addi           $inp,$inp,0x60
2708          le?vperm       $in4,$in4,$in4,$leperm
2709          le?vperm       $in5,$in5,$in5,$leperm
2710
2711         le?vperm        $out0,$out0,$out0,$leperm
2712         le?vperm        $out1,$out1,$out1,$leperm
2713         stvx_u          $out0,$x00,$out         # store output
2714          vxor           $out0,$in0,$twk0
2715         le?vperm        $out2,$out2,$out2,$leperm
2716         stvx_u          $out1,$x10,$out
2717          vxor           $out1,$in1,$twk1
2718         le?vperm        $out3,$out3,$out3,$leperm
2719         stvx_u          $out2,$x20,$out
2720          vxor           $out2,$in2,$twk2
2721         le?vperm        $out4,$out4,$out4,$leperm
2722         stvx_u          $out3,$x30,$out
2723          vxor           $out3,$in3,$twk3
2724         le?vperm        $out5,$tmp,$tmp,$leperm
2725         stvx_u          $out4,$x40,$out
2726          vxor           $out4,$in4,$twk4
2727         le?stvx_u       $out5,$x50,$out
2728         be?stvx_u       $tmp, $x50,$out
2729          vxor           $out5,$in5,$twk5
2730         addi            $out,$out,0x60
2731
2732         mtctr           $rounds
2733         beq             Loop_xts_enc6x          # did $len-=96 borrow?
2734
2735         addic.          $len,$len,0x60
2736         beq             Lxts_enc6x_zero
2737         cmpwi           $len,0x20
2738         blt             Lxts_enc6x_one
2739         nop
2740         beq             Lxts_enc6x_two
2741         cmpwi           $len,0x40
2742         blt             Lxts_enc6x_three
2743         nop
2744         beq             Lxts_enc6x_four
2745
2746 Lxts_enc6x_five:
2747         vxor            $out0,$in1,$twk0
2748         vxor            $out1,$in2,$twk1
2749         vxor            $out2,$in3,$twk2
2750         vxor            $out3,$in4,$twk3
2751         vxor            $out4,$in5,$twk4
2752
2753         bl              _aesp8_xts_enc5x
2754
2755         le?vperm        $out0,$out0,$out0,$leperm
2756         vmr             $twk0,$twk5             # unused tweak
2757         le?vperm        $out1,$out1,$out1,$leperm
2758         stvx_u          $out0,$x00,$out         # store output
2759         le?vperm        $out2,$out2,$out2,$leperm
2760         stvx_u          $out1,$x10,$out
2761         le?vperm        $out3,$out3,$out3,$leperm
2762         stvx_u          $out2,$x20,$out
2763         vxor            $tmp,$out4,$twk5        # last block prep for stealing
2764         le?vperm        $out4,$out4,$out4,$leperm
2765         stvx_u          $out3,$x30,$out
2766         stvx_u          $out4,$x40,$out
2767         addi            $out,$out,0x50
2768         bne             Lxts_enc6x_steal
2769         b               Lxts_enc6x_done
2770
2771 .align  4
2772 Lxts_enc6x_four:
2773         vxor            $out0,$in2,$twk0
2774         vxor            $out1,$in3,$twk1
2775         vxor            $out2,$in4,$twk2
2776         vxor            $out3,$in5,$twk3
2777         vxor            $out4,$out4,$out4
2778
2779         bl              _aesp8_xts_enc5x
2780
2781         le?vperm        $out0,$out0,$out0,$leperm
2782         vmr             $twk0,$twk4             # unused tweak
2783         le?vperm        $out1,$out1,$out1,$leperm
2784         stvx_u          $out0,$x00,$out         # store output
2785         le?vperm        $out2,$out2,$out2,$leperm
2786         stvx_u          $out1,$x10,$out
2787         vxor            $tmp,$out3,$twk4        # last block prep for stealing
2788         le?vperm        $out3,$out3,$out3,$leperm
2789         stvx_u          $out2,$x20,$out
2790         stvx_u          $out3,$x30,$out
2791         addi            $out,$out,0x40
2792         bne             Lxts_enc6x_steal
2793         b               Lxts_enc6x_done
2794
2795 .align  4
2796 Lxts_enc6x_three:
2797         vxor            $out0,$in3,$twk0
2798         vxor            $out1,$in4,$twk1
2799         vxor            $out2,$in5,$twk2
2800         vxor            $out3,$out3,$out3
2801         vxor            $out4,$out4,$out4
2802
2803         bl              _aesp8_xts_enc5x
2804
2805         le?vperm        $out0,$out0,$out0,$leperm
2806         vmr             $twk0,$twk3             # unused tweak
2807         le?vperm        $out1,$out1,$out1,$leperm
2808         stvx_u          $out0,$x00,$out         # store output
2809         vxor            $tmp,$out2,$twk3        # last block prep for stealing
2810         le?vperm        $out2,$out2,$out2,$leperm
2811         stvx_u          $out1,$x10,$out
2812         stvx_u          $out2,$x20,$out
2813         addi            $out,$out,0x30
2814         bne             Lxts_enc6x_steal
2815         b               Lxts_enc6x_done
2816
2817 .align  4
2818 Lxts_enc6x_two:
2819         vxor            $out0,$in4,$twk0
2820         vxor            $out1,$in5,$twk1
2821         vxor            $out2,$out2,$out2
2822         vxor            $out3,$out3,$out3
2823         vxor            $out4,$out4,$out4
2824
2825         bl              _aesp8_xts_enc5x
2826
2827         le?vperm        $out0,$out0,$out0,$leperm
2828         vmr             $twk0,$twk2             # unused tweak
2829         vxor            $tmp,$out1,$twk2        # last block prep for stealing
2830         le?vperm        $out1,$out1,$out1,$leperm
2831         stvx_u          $out0,$x00,$out         # store output
2832         stvx_u          $out1,$x10,$out
2833         addi            $out,$out,0x20
2834         bne             Lxts_enc6x_steal
2835         b               Lxts_enc6x_done
2836
2837 .align  4
2838 Lxts_enc6x_one:
2839         vxor            $out0,$in5,$twk0
2840         nop
2841 Loop_xts_enc1x:
2842         vcipher         $out0,$out0,v24
2843         lvx             v24,$x20,$key_          # round[3]
2844         addi            $key_,$key_,0x20
2845
2846         vcipher         $out0,$out0,v25
2847         lvx             v25,$x10,$key_          # round[4]
2848         bdnz            Loop_xts_enc1x
2849
2850         add             $inp,$inp,$taillen
2851         cmpwi           $taillen,0
2852         vcipher         $out0,$out0,v24
2853
2854         subi            $inp,$inp,16
2855         vcipher         $out0,$out0,v25
2856
2857         lvsr            $inpperm,0,$taillen
2858         vcipher         $out0,$out0,v26
2859
2860         lvx_u           $in0,0,$inp
2861         vcipher         $out0,$out0,v27
2862
2863         addi            $key_,$sp,$FRAME+15     # rewind $key_
2864         vcipher         $out0,$out0,v28
2865         lvx             v24,$x00,$key_          # re-pre-load round[1]
2866
2867         vcipher         $out0,$out0,v29
2868         lvx             v25,$x10,$key_          # re-pre-load round[2]
2869          vxor           $twk0,$twk0,v31
2870
2871         le?vperm        $in0,$in0,$in0,$leperm
2872         vcipher         $out0,$out0,v30
2873
2874         vperm           $in0,$in0,$in0,$inpperm
2875         vcipherlast     $out0,$out0,$twk0
2876
2877         vmr             $twk0,$twk1             # unused tweak
2878         vxor            $tmp,$out0,$twk1        # last block prep for stealing
2879         le?vperm        $out0,$out0,$out0,$leperm
2880         stvx_u          $out0,$x00,$out         # store output
2881         addi            $out,$out,0x10
2882         bne             Lxts_enc6x_steal
2883         b               Lxts_enc6x_done
2884
2885 .align  4
2886 Lxts_enc6x_zero:
2887         cmpwi           $taillen,0
2888         beq             Lxts_enc6x_done
2889
2890         add             $inp,$inp,$taillen
2891         subi            $inp,$inp,16
2892         lvx_u           $in0,0,$inp
2893         lvsr            $inpperm,0,$taillen     # $in5 is no more
2894         le?vperm        $in0,$in0,$in0,$leperm
2895         vperm           $in0,$in0,$in0,$inpperm
2896         vxor            $tmp,$tmp,$twk0
2897 Lxts_enc6x_steal:
2898         vxor            $in0,$in0,$twk0
2899         vxor            $out0,$out0,$out0
2900         vspltisb        $out1,-1
2901         vperm           $out0,$out0,$out1,$inpperm
2902         vsel            $out0,$in0,$tmp,$out0   # $tmp is last block, remember?
2903
2904         subi            r30,$out,17
2905         subi            $out,$out,16
2906         mtctr           $taillen
2907 Loop_xts_enc6x_steal:
2908         lbzu            r0,1(r30)
2909         stb             r0,16(r30)
2910         bdnz            Loop_xts_enc6x_steal
2911
2912         li              $taillen,0
2913         mtctr           $rounds
2914         b               Loop_xts_enc1x          # one more time...
2915
2916 .align  4
2917 Lxts_enc6x_done:
2918         ${UCMP}i        $ivp,0
2919         beq             Lxts_enc6x_ret
2920
2921         vxor            $tweak,$twk0,$rndkey0
2922         le?vperm        $tweak,$tweak,$tweak,$leperm
2923         stvx_u          $tweak,0,$ivp
2924
2925 Lxts_enc6x_ret:
2926         mtlr            r11
2927         li              r10,`$FRAME+15`
2928         li              r11,`$FRAME+31`
2929         stvx            $seven,r10,$sp          # wipe copies of round keys
2930         addi            r10,r10,32
2931         stvx            $seven,r11,$sp
2932         addi            r11,r11,32
2933         stvx            $seven,r10,$sp
2934         addi            r10,r10,32
2935         stvx            $seven,r11,$sp
2936         addi            r11,r11,32
2937         stvx            $seven,r10,$sp
2938         addi            r10,r10,32
2939         stvx            $seven,r11,$sp
2940         addi            r11,r11,32
2941         stvx            $seven,r10,$sp
2942         addi            r10,r10,32
2943         stvx            $seven,r11,$sp
2944         addi            r11,r11,32
2945
2946         mtspr           256,$vrsave
2947         lvx             v20,r10,$sp             # ABI says so
2948         addi            r10,r10,32
2949         lvx             v21,r11,$sp
2950         addi            r11,r11,32
2951         lvx             v22,r10,$sp
2952         addi            r10,r10,32
2953         lvx             v23,r11,$sp
2954         addi            r11,r11,32
2955         lvx             v24,r10,$sp
2956         addi            r10,r10,32
2957         lvx             v25,r11,$sp
2958         addi            r11,r11,32
2959         lvx             v26,r10,$sp
2960         addi            r10,r10,32
2961         lvx             v27,r11,$sp
2962         addi            r11,r11,32
2963         lvx             v28,r10,$sp
2964         addi            r10,r10,32
2965         lvx             v29,r11,$sp
2966         addi            r11,r11,32
2967         lvx             v30,r10,$sp
2968         lvx             v31,r11,$sp
2969         $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2970         $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2971         $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2972         $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2973         $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2974         $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2975         addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
2976         blr
2977         .long           0
2978         .byte           0,12,0x04,1,0x80,6,6,0
2979         .long           0
2980
2981 .align  5
2982 _aesp8_xts_enc5x:
2983         vcipher         $out0,$out0,v24
2984         vcipher         $out1,$out1,v24
2985         vcipher         $out2,$out2,v24
2986         vcipher         $out3,$out3,v24
2987         vcipher         $out4,$out4,v24
2988         lvx             v24,$x20,$key_          # round[3]
2989         addi            $key_,$key_,0x20
2990
2991         vcipher         $out0,$out0,v25
2992         vcipher         $out1,$out1,v25
2993         vcipher         $out2,$out2,v25
2994         vcipher         $out3,$out3,v25
2995         vcipher         $out4,$out4,v25
2996         lvx             v25,$x10,$key_          # round[4]
2997         bdnz            _aesp8_xts_enc5x
2998
2999         add             $inp,$inp,$taillen
3000         cmpwi           $taillen,0
3001         vcipher         $out0,$out0,v24
3002         vcipher         $out1,$out1,v24
3003         vcipher         $out2,$out2,v24
3004         vcipher         $out3,$out3,v24
3005         vcipher         $out4,$out4,v24
3006
3007         subi            $inp,$inp,16
3008         vcipher         $out0,$out0,v25
3009         vcipher         $out1,$out1,v25
3010         vcipher         $out2,$out2,v25
3011         vcipher         $out3,$out3,v25
3012         vcipher         $out4,$out4,v25
3013          vxor           $twk0,$twk0,v31
3014
3015         vcipher         $out0,$out0,v26
3016         lvsr            $inpperm,0,$taillen     # $in5 is no more
3017         vcipher         $out1,$out1,v26
3018         vcipher         $out2,$out2,v26
3019         vcipher         $out3,$out3,v26
3020         vcipher         $out4,$out4,v26
3021          vxor           $in1,$twk1,v31
3022
3023         vcipher         $out0,$out0,v27
3024         lvx_u           $in0,0,$inp
3025         vcipher         $out1,$out1,v27
3026         vcipher         $out2,$out2,v27
3027         vcipher         $out3,$out3,v27
3028         vcipher         $out4,$out4,v27
3029          vxor           $in2,$twk2,v31
3030
3031         addi            $key_,$sp,$FRAME+15     # rewind $key_
3032         vcipher         $out0,$out0,v28
3033         vcipher         $out1,$out1,v28
3034         vcipher         $out2,$out2,v28
3035         vcipher         $out3,$out3,v28
3036         vcipher         $out4,$out4,v28
3037         lvx             v24,$x00,$key_          # re-pre-load round[1]
3038          vxor           $in3,$twk3,v31
3039
3040         vcipher         $out0,$out0,v29
3041         le?vperm        $in0,$in0,$in0,$leperm
3042         vcipher         $out1,$out1,v29
3043         vcipher         $out2,$out2,v29
3044         vcipher         $out3,$out3,v29
3045         vcipher         $out4,$out4,v29
3046         lvx             v25,$x10,$key_          # re-pre-load round[2]
3047          vxor           $in4,$twk4,v31
3048
3049         vcipher         $out0,$out0,v30
3050         vperm           $in0,$in0,$in0,$inpperm
3051         vcipher         $out1,$out1,v30
3052         vcipher         $out2,$out2,v30
3053         vcipher         $out3,$out3,v30
3054         vcipher         $out4,$out4,v30
3055
3056         vcipherlast     $out0,$out0,$twk0
3057         vcipherlast     $out1,$out1,$in1
3058         vcipherlast     $out2,$out2,$in2
3059         vcipherlast     $out3,$out3,$in3
3060         vcipherlast     $out4,$out4,$in4
3061         blr
3062         .long           0
3063         .byte           0,12,0x14,0,0,0,0,0
3064
3065 .align  5
3066 _aesp8_xts_decrypt6x:
3067         $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
3068         mflr            r11
3069         li              r7,`$FRAME+8*16+15`
3070         li              r3,`$FRAME+8*16+31`
3071         $PUSH           r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
3072         stvx            v20,r7,$sp              # ABI says so
3073         addi            r7,r7,32
3074         stvx            v21,r3,$sp
3075         addi            r3,r3,32
3076         stvx            v22,r7,$sp
3077         addi            r7,r7,32
3078         stvx            v23,r3,$sp
3079         addi            r3,r3,32
3080         stvx            v24,r7,$sp
3081         addi            r7,r7,32
3082         stvx            v25,r3,$sp
3083         addi            r3,r3,32
3084         stvx            v26,r7,$sp
3085         addi            r7,r7,32
3086         stvx            v27,r3,$sp
3087         addi            r3,r3,32
3088         stvx            v28,r7,$sp
3089         addi            r7,r7,32
3090         stvx            v29,r3,$sp
3091         addi            r3,r3,32
3092         stvx            v30,r7,$sp
3093         stvx            v31,r3,$sp
3094         li              r0,-1
3095         stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
3096         li              $x10,0x10
3097         $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3098         li              $x20,0x20
3099         $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3100         li              $x30,0x30
3101         $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3102         li              $x40,0x40
3103         $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3104         li              $x50,0x50
3105         $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3106         li              $x60,0x60
3107         $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3108         li              $x70,0x70
3109         mtspr           256,r0
3110
3111         subi            $rounds,$rounds,3       # -4 in total
3112
3113         lvx             $rndkey0,$x00,$key1     # load key schedule
3114         lvx             v30,$x10,$key1
3115         addi            $key1,$key1,0x20
3116         lvx             v31,$x00,$key1
3117         ?vperm          $rndkey0,$rndkey0,v30,$keyperm
3118         addi            $key_,$sp,$FRAME+15
3119         mtctr           $rounds
3120
3121 Load_xts_dec_key:
3122         ?vperm          v24,v30,v31,$keyperm
3123         lvx             v30,$x10,$key1
3124         addi            $key1,$key1,0x20
3125         stvx            v24,$x00,$key_          # off-load round[1]
3126         ?vperm          v25,v31,v30,$keyperm
3127         lvx             v31,$x00,$key1
3128         stvx            v25,$x10,$key_          # off-load round[2]
3129         addi            $key_,$key_,0x20
3130         bdnz            Load_xts_dec_key
3131
3132         lvx             v26,$x10,$key1
3133         ?vperm          v24,v30,v31,$keyperm
3134         lvx             v27,$x20,$key1
3135         stvx            v24,$x00,$key_          # off-load round[3]
3136         ?vperm          v25,v31,v26,$keyperm
3137         lvx             v28,$x30,$key1
3138         stvx            v25,$x10,$key_          # off-load round[4]
3139         addi            $key_,$sp,$FRAME+15     # rewind $key_
3140         ?vperm          v26,v26,v27,$keyperm
3141         lvx             v29,$x40,$key1
3142         ?vperm          v27,v27,v28,$keyperm
3143         lvx             v30,$x50,$key1
3144         ?vperm          v28,v28,v29,$keyperm
3145         lvx             v31,$x60,$key1
3146         ?vperm          v29,v29,v30,$keyperm
3147         lvx             $twk5,$x70,$key1        # borrow $twk5
3148         ?vperm          v30,v30,v31,$keyperm
3149         lvx             v24,$x00,$key_          # pre-load round[1]
3150         ?vperm          v31,v31,$twk5,$keyperm
3151         lvx             v25,$x10,$key_          # pre-load round[2]
3152
3153          vperm          $in0,$inout,$inptail,$inpperm
3154          subi           $inp,$inp,31            # undo "caller"
3155         vxor            $twk0,$tweak,$rndkey0
3156         vsrab           $tmp,$tweak,$seven      # next tweak value
3157         vaddubm         $tweak,$tweak,$tweak
3158         vsldoi          $tmp,$tmp,$tmp,15
3159         vand            $tmp,$tmp,$eighty7
3160          vxor           $out0,$in0,$twk0
3161         vxor            $tweak,$tweak,$tmp
3162
3163          lvx_u          $in1,$x10,$inp
3164         vxor            $twk1,$tweak,$rndkey0
3165         vsrab           $tmp,$tweak,$seven      # next tweak value
3166         vaddubm         $tweak,$tweak,$tweak
3167         vsldoi          $tmp,$tmp,$tmp,15
3168          le?vperm       $in1,$in1,$in1,$leperm
3169         vand            $tmp,$tmp,$eighty7
3170          vxor           $out1,$in1,$twk1
3171         vxor            $tweak,$tweak,$tmp
3172
3173          lvx_u          $in2,$x20,$inp
3174          andi.          $taillen,$len,15
3175         vxor            $twk2,$tweak,$rndkey0
3176         vsrab           $tmp,$tweak,$seven      # next tweak value
3177         vaddubm         $tweak,$tweak,$tweak
3178         vsldoi          $tmp,$tmp,$tmp,15
3179          le?vperm       $in2,$in2,$in2,$leperm
3180         vand            $tmp,$tmp,$eighty7
3181          vxor           $out2,$in2,$twk2
3182         vxor            $tweak,$tweak,$tmp
3183
3184          lvx_u          $in3,$x30,$inp
3185          sub            $len,$len,$taillen
3186         vxor            $twk3,$tweak,$rndkey0
3187         vsrab           $tmp,$tweak,$seven      # next tweak value
3188         vaddubm         $tweak,$tweak,$tweak
3189         vsldoi          $tmp,$tmp,$tmp,15
3190          le?vperm       $in3,$in3,$in3,$leperm
3191         vand            $tmp,$tmp,$eighty7
3192          vxor           $out3,$in3,$twk3
3193         vxor            $tweak,$tweak,$tmp
3194
3195          lvx_u          $in4,$x40,$inp
3196          subi           $len,$len,0x60
3197         vxor            $twk4,$tweak,$rndkey0
3198         vsrab           $tmp,$tweak,$seven      # next tweak value
3199         vaddubm         $tweak,$tweak,$tweak
3200         vsldoi          $tmp,$tmp,$tmp,15
3201          le?vperm       $in4,$in4,$in4,$leperm
3202         vand            $tmp,$tmp,$eighty7
3203          vxor           $out4,$in4,$twk4
3204         vxor            $tweak,$tweak,$tmp
3205
3206          lvx_u          $in5,$x50,$inp
3207          addi           $inp,$inp,0x60
3208         vxor            $twk5,$tweak,$rndkey0
3209         vsrab           $tmp,$tweak,$seven      # next tweak value
3210         vaddubm         $tweak,$tweak,$tweak
3211         vsldoi          $tmp,$tmp,$tmp,15
3212          le?vperm       $in5,$in5,$in5,$leperm
3213         vand            $tmp,$tmp,$eighty7
3214          vxor           $out5,$in5,$twk5
3215         vxor            $tweak,$tweak,$tmp
3216
3217         vxor            v31,v31,$rndkey0
3218         mtctr           $rounds
3219         b               Loop_xts_dec6x
3220
3221 .align  5
3222 Loop_xts_dec6x:
3223         vncipher        $out0,$out0,v24
3224         vncipher        $out1,$out1,v24
3225         vncipher        $out2,$out2,v24
3226         vncipher        $out3,$out3,v24
3227         vncipher        $out4,$out4,v24
3228         vncipher        $out5,$out5,v24
3229         lvx             v24,$x20,$key_          # round[3]
3230         addi            $key_,$key_,0x20
3231
3232         vncipher        $out0,$out0,v25
3233         vncipher        $out1,$out1,v25
3234         vncipher        $out2,$out2,v25
3235         vncipher        $out3,$out3,v25
3236         vncipher        $out4,$out4,v25
3237         vncipher        $out5,$out5,v25
3238         lvx             v25,$x10,$key_          # round[4]
3239         bdnz            Loop_xts_dec6x
3240
3241         subic           $len,$len,96            # $len-=96
3242          vxor           $in0,$twk0,v31          # xor with last round key
3243         vncipher        $out0,$out0,v24
3244         vncipher        $out1,$out1,v24
3245          vsrab          $tmp,$tweak,$seven      # next tweak value
3246          vxor           $twk0,$tweak,$rndkey0
3247          vaddubm        $tweak,$tweak,$tweak
3248         vncipher        $out2,$out2,v24
3249         vncipher        $out3,$out3,v24
3250          vsldoi         $tmp,$tmp,$tmp,15
3251         vncipher        $out4,$out4,v24
3252         vncipher        $out5,$out5,v24
3253
3254         subfe.          r0,r0,r0                # borrow?-1:0
3255          vand           $tmp,$tmp,$eighty7
3256         vncipher        $out0,$out0,v25
3257         vncipher        $out1,$out1,v25
3258          vxor           $tweak,$tweak,$tmp
3259         vncipher        $out2,$out2,v25
3260         vncipher        $out3,$out3,v25
3261          vxor           $in1,$twk1,v31
3262          vsrab          $tmp,$tweak,$seven      # next tweak value
3263          vxor           $twk1,$tweak,$rndkey0
3264         vncipher        $out4,$out4,v25
3265         vncipher        $out5,$out5,v25
3266
3267         and             r0,r0,$len
3268          vaddubm        $tweak,$tweak,$tweak
3269          vsldoi         $tmp,$tmp,$tmp,15
3270         vncipher        $out0,$out0,v26
3271         vncipher        $out1,$out1,v26
3272          vand           $tmp,$tmp,$eighty7
3273         vncipher        $out2,$out2,v26
3274         vncipher        $out3,$out3,v26
3275          vxor           $tweak,$tweak,$tmp
3276         vncipher        $out4,$out4,v26
3277         vncipher        $out5,$out5,v26
3278
3279         add             $inp,$inp,r0            # $inp is adjusted in such
3280                                                 # way that at exit from the
3281                                                 # loop inX-in5 are loaded
3282                                                 # with last "words"
3283          vxor           $in2,$twk2,v31
3284          vsrab          $tmp,$tweak,$seven      # next tweak value
3285          vxor           $twk2,$tweak,$rndkey0
3286          vaddubm        $tweak,$tweak,$tweak
3287         vncipher        $out0,$out0,v27
3288         vncipher        $out1,$out1,v27
3289          vsldoi         $tmp,$tmp,$tmp,15
3290         vncipher        $out2,$out2,v27
3291         vncipher        $out3,$out3,v27
3292          vand           $tmp,$tmp,$eighty7
3293         vncipher        $out4,$out4,v27
3294         vncipher        $out5,$out5,v27
3295
3296         addi            $key_,$sp,$FRAME+15     # rewind $key_
3297          vxor           $tweak,$tweak,$tmp
3298         vncipher        $out0,$out0,v28
3299         vncipher        $out1,$out1,v28
3300          vxor           $in3,$twk3,v31
3301          vsrab          $tmp,$tweak,$seven      # next tweak value
3302          vxor           $twk3,$tweak,$rndkey0
3303         vncipher        $out2,$out2,v28
3304         vncipher        $out3,$out3,v28
3305          vaddubm        $tweak,$tweak,$tweak
3306          vsldoi         $tmp,$tmp,$tmp,15
3307         vncipher        $out4,$out4,v28
3308         vncipher        $out5,$out5,v28
3309         lvx             v24,$x00,$key_          # re-pre-load round[1]
3310          vand           $tmp,$tmp,$eighty7
3311
3312         vncipher        $out0,$out0,v29
3313         vncipher        $out1,$out1,v29
3314          vxor           $tweak,$tweak,$tmp
3315         vncipher        $out2,$out2,v29
3316         vncipher        $out3,$out3,v29
3317          vxor           $in4,$twk4,v31
3318          vsrab          $tmp,$tweak,$seven      # next tweak value
3319          vxor           $twk4,$tweak,$rndkey0
3320         vncipher        $out4,$out4,v29
3321         vncipher        $out5,$out5,v29
3322         lvx             v25,$x10,$key_          # re-pre-load round[2]
3323          vaddubm        $tweak,$tweak,$tweak
3324          vsldoi         $tmp,$tmp,$tmp,15
3325
3326         vncipher        $out0,$out0,v30
3327         vncipher        $out1,$out1,v30
3328          vand           $tmp,$tmp,$eighty7
3329         vncipher        $out2,$out2,v30
3330         vncipher        $out3,$out3,v30
3331          vxor           $tweak,$tweak,$tmp
3332         vncipher        $out4,$out4,v30
3333         vncipher        $out5,$out5,v30
3334          vxor           $in5,$twk5,v31
3335          vsrab          $tmp,$tweak,$seven      # next tweak value
3336          vxor           $twk5,$tweak,$rndkey0
3337
3338         vncipherlast    $out0,$out0,$in0
3339          lvx_u          $in0,$x00,$inp          # load next input block
3340          vaddubm        $tweak,$tweak,$tweak
3341          vsldoi         $tmp,$tmp,$tmp,15
3342         vncipherlast    $out1,$out1,$in1
3343          lvx_u          $in1,$x10,$inp
3344         vncipherlast    $out2,$out2,$in2
3345          le?vperm       $in0,$in0,$in0,$leperm
3346          lvx_u          $in2,$x20,$inp
3347          vand           $tmp,$tmp,$eighty7
3348         vncipherlast    $out3,$out3,$in3
3349          le?vperm       $in1,$in1,$in1,$leperm
3350          lvx_u          $in3,$x30,$inp
3351         vncipherlast    $out4,$out4,$in4
3352          le?vperm       $in2,$in2,$in2,$leperm
3353          lvx_u          $in4,$x40,$inp
3354          vxor           $tweak,$tweak,$tmp
3355         vncipherlast    $out5,$out5,$in5
3356          le?vperm       $in3,$in3,$in3,$leperm
3357          lvx_u          $in5,$x50,$inp
3358          addi           $inp,$inp,0x60
3359          le?vperm       $in4,$in4,$in4,$leperm
3360          le?vperm       $in5,$in5,$in5,$leperm
3361
3362         le?vperm        $out0,$out0,$out0,$leperm
3363         le?vperm        $out1,$out1,$out1,$leperm
3364         stvx_u          $out0,$x00,$out         # store output
3365          vxor           $out0,$in0,$twk0
3366         le?vperm        $out2,$out2,$out2,$leperm
3367         stvx_u          $out1,$x10,$out
3368          vxor           $out1,$in1,$twk1
3369         le?vperm        $out3,$out3,$out3,$leperm
3370         stvx_u          $out2,$x20,$out
3371          vxor           $out2,$in2,$twk2
3372         le?vperm        $out4,$out4,$out4,$leperm
3373         stvx_u          $out3,$x30,$out
3374          vxor           $out3,$in3,$twk3
3375         le?vperm        $out5,$out5,$out5,$leperm
3376         stvx_u          $out4,$x40,$out
3377          vxor           $out4,$in4,$twk4
3378         stvx_u          $out5,$x50,$out
3379          vxor           $out5,$in5,$twk5
3380         addi            $out,$out,0x60
3381
3382         mtctr           $rounds
3383         beq             Loop_xts_dec6x          # did $len-=96 borrow?
3384
3385         addic.          $len,$len,0x60
3386         beq             Lxts_dec6x_zero
3387         cmpwi           $len,0x20
3388         blt             Lxts_dec6x_one
3389         nop
3390         beq             Lxts_dec6x_two
3391         cmpwi           $len,0x40
3392         blt             Lxts_dec6x_three
3393         nop
3394         beq             Lxts_dec6x_four
3395
3396 Lxts_dec6x_five:
3397         vxor            $out0,$in1,$twk0
3398         vxor            $out1,$in2,$twk1
3399         vxor            $out2,$in3,$twk2
3400         vxor            $out3,$in4,$twk3
3401         vxor            $out4,$in5,$twk4
3402
3403         bl              _aesp8_xts_dec5x
3404
3405         le?vperm        $out0,$out0,$out0,$leperm
3406         vmr             $twk0,$twk5             # unused tweak
3407         vxor            $twk1,$tweak,$rndkey0
3408         le?vperm        $out1,$out1,$out1,$leperm
3409         stvx_u          $out0,$x00,$out         # store output
3410         vxor            $out0,$in0,$twk1
3411         le?vperm        $out2,$out2,$out2,$leperm
3412         stvx_u          $out1,$x10,$out
3413         le?vperm        $out3,$out3,$out3,$leperm
3414         stvx_u          $out2,$x20,$out
3415         le?vperm        $out4,$out4,$out4,$leperm
3416         stvx_u          $out3,$x30,$out
3417         stvx_u          $out4,$x40,$out
3418         addi            $out,$out,0x50
3419         bne             Lxts_dec6x_steal
3420         b               Lxts_dec6x_done
3421
3422 .align  4
3423 Lxts_dec6x_four:
3424         vxor            $out0,$in2,$twk0
3425         vxor            $out1,$in3,$twk1
3426         vxor            $out2,$in4,$twk2
3427         vxor            $out3,$in5,$twk3
3428         vxor            $out4,$out4,$out4
3429
3430         bl              _aesp8_xts_dec5x
3431
3432         le?vperm        $out0,$out0,$out0,$leperm
3433         vmr             $twk0,$twk4             # unused tweak
3434         vmr             $twk1,$twk5
3435         le?vperm        $out1,$out1,$out1,$leperm
3436         stvx_u          $out0,$x00,$out         # store output
3437         vxor            $out0,$in0,$twk5
3438         le?vperm        $out2,$out2,$out2,$leperm
3439         stvx_u          $out1,$x10,$out
3440         le?vperm        $out3,$out3,$out3,$leperm
3441         stvx_u          $out2,$x20,$out
3442         stvx_u          $out3,$x30,$out
3443         addi            $out,$out,0x40
3444         bne             Lxts_dec6x_steal
3445         b               Lxts_dec6x_done
3446
3447 .align  4
3448 Lxts_dec6x_three:
3449         vxor            $out0,$in3,$twk0
3450         vxor            $out1,$in4,$twk1
3451         vxor            $out2,$in5,$twk2
3452         vxor            $out3,$out3,$out3
3453         vxor            $out4,$out4,$out4
3454
3455         bl              _aesp8_xts_dec5x
3456
3457         le?vperm        $out0,$out0,$out0,$leperm
3458         vmr             $twk0,$twk3             # unused tweak
3459         vmr             $twk1,$twk4
3460         le?vperm        $out1,$out1,$out1,$leperm
3461         stvx_u          $out0,$x00,$out         # store output
3462         vxor            $out0,$in0,$twk4
3463         le?vperm        $out2,$out2,$out2,$leperm
3464         stvx_u          $out1,$x10,$out
3465         stvx_u          $out2,$x20,$out
3466         addi            $out,$out,0x30
3467         bne             Lxts_dec6x_steal
3468         b               Lxts_dec6x_done
3469
3470 .align  4
3471 Lxts_dec6x_two:
3472         vxor            $out0,$in4,$twk0
3473         vxor            $out1,$in5,$twk1
3474         vxor            $out2,$out2,$out2
3475         vxor            $out3,$out3,$out3
3476         vxor            $out4,$out4,$out4
3477
3478         bl              _aesp8_xts_dec5x
3479
3480         le?vperm        $out0,$out0,$out0,$leperm
3481         vmr             $twk0,$twk2             # unused tweak
3482         vmr             $twk1,$twk3
3483         le?vperm        $out1,$out1,$out1,$leperm
3484         stvx_u          $out0,$x00,$out         # store output
3485         vxor            $out0,$in0,$twk3
3486         stvx_u          $out1,$x10,$out
3487         addi            $out,$out,0x20
3488         bne             Lxts_dec6x_steal
3489         b               Lxts_dec6x_done
3490
3491 .align  4
3492 Lxts_dec6x_one:
3493         vxor            $out0,$in5,$twk0
3494         nop
3495 Loop_xts_dec1x:
3496         vncipher        $out0,$out0,v24
3497         lvx             v24,$x20,$key_          # round[3]
3498         addi            $key_,$key_,0x20
3499
3500         vncipher        $out0,$out0,v25
3501         lvx             v25,$x10,$key_          # round[4]
3502         bdnz            Loop_xts_dec1x
3503
3504         subi            r0,$taillen,1
3505         vncipher        $out0,$out0,v24
3506
3507         andi.           r0,r0,16
3508         cmpwi           $taillen,0
3509         vncipher        $out0,$out0,v25
3510
3511         sub             $inp,$inp,r0
3512         vncipher        $out0,$out0,v26
3513
3514         lvx_u           $in0,0,$inp
3515         vncipher        $out0,$out0,v27
3516
3517         addi            $key_,$sp,$FRAME+15     # rewind $key_
3518         vncipher        $out0,$out0,v28
3519         lvx             v24,$x00,$key_          # re-pre-load round[1]
3520
3521         vncipher        $out0,$out0,v29
3522         lvx             v25,$x10,$key_          # re-pre-load round[2]
3523          vxor           $twk0,$twk0,v31
3524
3525         le?vperm        $in0,$in0,$in0,$leperm
3526         vncipher        $out0,$out0,v30
3527
3528         mtctr           $rounds
3529         vncipherlast    $out0,$out0,$twk0
3530
3531         vmr             $twk0,$twk1             # unused tweak
3532         vmr             $twk1,$twk2
3533         le?vperm        $out0,$out0,$out0,$leperm
3534         stvx_u          $out0,$x00,$out         # store output
3535         addi            $out,$out,0x10
3536         vxor            $out0,$in0,$twk2
3537         bne             Lxts_dec6x_steal
3538         b               Lxts_dec6x_done
3539
3540 .align  4
3541 Lxts_dec6x_zero:
3542         cmpwi           $taillen,0
3543         beq             Lxts_dec6x_done
3544
3545         lvx_u           $in0,0,$inp
3546         le?vperm        $in0,$in0,$in0,$leperm
3547         vxor            $out0,$in0,$twk1
3548 Lxts_dec6x_steal:
3549         vncipher        $out0,$out0,v24
3550         lvx             v24,$x20,$key_          # round[3]
3551         addi            $key_,$key_,0x20
3552
3553         vncipher        $out0,$out0,v25
3554         lvx             v25,$x10,$key_          # round[4]
3555         bdnz            Lxts_dec6x_steal
3556
3557         add             $inp,$inp,$taillen
3558         vncipher        $out0,$out0,v24
3559
3560         cmpwi           $taillen,0
3561         vncipher        $out0,$out0,v25
3562
3563         lvx_u           $in0,0,$inp
3564         vncipher        $out0,$out0,v26
3565
3566         lvsr            $inpperm,0,$taillen     # $in5 is no more
3567         vncipher        $out0,$out0,v27
3568
3569         addi            $key_,$sp,$FRAME+15     # rewind $key_
3570         vncipher        $out0,$out0,v28
3571         lvx             v24,$x00,$key_          # re-pre-load round[1]
3572
3573         vncipher        $out0,$out0,v29
3574         lvx             v25,$x10,$key_          # re-pre-load round[2]
3575          vxor           $twk1,$twk1,v31
3576
3577         le?vperm        $in0,$in0,$in0,$leperm
3578         vncipher        $out0,$out0,v30
3579
3580         vperm           $in0,$in0,$in0,$inpperm
3581         vncipherlast    $tmp,$out0,$twk1
3582
3583         le?vperm        $out0,$tmp,$tmp,$leperm
3584         le?stvx_u       $out0,0,$out
3585         be?stvx_u       $tmp,0,$out
3586
3587         vxor            $out0,$out0,$out0
3588         vspltisb        $out1,-1
3589         vperm           $out0,$out0,$out1,$inpperm
3590         vsel            $out0,$in0,$tmp,$out0
3591         vxor            $out0,$out0,$twk0
3592
3593         subi            r30,$out,1
3594         mtctr           $taillen
3595 Loop_xts_dec6x_steal:
3596         lbzu            r0,1(r30)
3597         stb             r0,16(r30)
3598         bdnz            Loop_xts_dec6x_steal
3599
3600         li              $taillen,0
3601         mtctr           $rounds
3602         b               Loop_xts_dec1x          # one more time...
3603
3604 .align  4
3605 Lxts_dec6x_done:
3606         ${UCMP}i        $ivp,0
3607         beq             Lxts_dec6x_ret
3608
3609         vxor            $tweak,$twk0,$rndkey0
3610         le?vperm        $tweak,$tweak,$tweak,$leperm
3611         stvx_u          $tweak,0,$ivp
3612
3613 Lxts_dec6x_ret:
3614         mtlr            r11
3615         li              r10,`$FRAME+15`
3616         li              r11,`$FRAME+31`
3617         stvx            $seven,r10,$sp          # wipe copies of round keys
3618         addi            r10,r10,32
3619         stvx            $seven,r11,$sp
3620         addi            r11,r11,32
3621         stvx            $seven,r10,$sp
3622         addi            r10,r10,32
3623         stvx            $seven,r11,$sp
3624         addi            r11,r11,32
3625         stvx            $seven,r10,$sp
3626         addi            r10,r10,32
3627         stvx            $seven,r11,$sp
3628         addi            r11,r11,32
3629         stvx            $seven,r10,$sp
3630         addi            r10,r10,32
3631         stvx            $seven,r11,$sp
3632         addi            r11,r11,32
3633
3634         mtspr           256,$vrsave
3635         lvx             v20,r10,$sp             # ABI says so
3636         addi            r10,r10,32
3637         lvx             v21,r11,$sp
3638         addi            r11,r11,32
3639         lvx             v22,r10,$sp
3640         addi            r10,r10,32
3641         lvx             v23,r11,$sp
3642         addi            r11,r11,32
3643         lvx             v24,r10,$sp
3644         addi            r10,r10,32
3645         lvx             v25,r11,$sp
3646         addi            r11,r11,32
3647         lvx             v26,r10,$sp
3648         addi            r10,r10,32
3649         lvx             v27,r11,$sp
3650         addi            r11,r11,32
3651         lvx             v28,r10,$sp
3652         addi            r10,r10,32
3653         lvx             v29,r11,$sp
3654         addi            r11,r11,32
3655         lvx             v30,r10,$sp
3656         lvx             v31,r11,$sp
3657         $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3658         $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3659         $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3660         $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3661         $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3662         $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3663         addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3664         blr
3665         .long           0
3666         .byte           0,12,0x04,1,0x80,6,6,0
3667         .long           0
3668
3669 .align  5
3670 _aesp8_xts_dec5x:
3671         vncipher        $out0,$out0,v24
3672         vncipher        $out1,$out1,v24
3673         vncipher        $out2,$out2,v24
3674         vncipher        $out3,$out3,v24
3675         vncipher        $out4,$out4,v24
3676         lvx             v24,$x20,$key_          # round[3]
3677         addi            $key_,$key_,0x20
3678
3679         vncipher        $out0,$out0,v25
3680         vncipher        $out1,$out1,v25
3681         vncipher        $out2,$out2,v25
3682         vncipher        $out3,$out3,v25
3683         vncipher        $out4,$out4,v25
3684         lvx             v25,$x10,$key_          # round[4]
3685         bdnz            _aesp8_xts_dec5x
3686
3687         subi            r0,$taillen,1
3688         vncipher        $out0,$out0,v24
3689         vncipher        $out1,$out1,v24
3690         vncipher        $out2,$out2,v24
3691         vncipher        $out3,$out3,v24
3692         vncipher        $out4,$out4,v24
3693
3694         andi.           r0,r0,16
3695         cmpwi           $taillen,0
3696         vncipher        $out0,$out0,v25
3697         vncipher        $out1,$out1,v25
3698         vncipher        $out2,$out2,v25
3699         vncipher        $out3,$out3,v25
3700         vncipher        $out4,$out4,v25
3701          vxor           $twk0,$twk0,v31
3702
3703         sub             $inp,$inp,r0
3704         vncipher        $out0,$out0,v26
3705         vncipher        $out1,$out1,v26
3706         vncipher        $out2,$out2,v26
3707         vncipher        $out3,$out3,v26
3708         vncipher        $out4,$out4,v26
3709          vxor           $in1,$twk1,v31
3710
3711         vncipher        $out0,$out0,v27
3712         lvx_u           $in0,0,$inp
3713         vncipher        $out1,$out1,v27
3714         vncipher        $out2,$out2,v27
3715         vncipher        $out3,$out3,v27
3716         vncipher        $out4,$out4,v27
3717          vxor           $in2,$twk2,v31
3718
3719         addi            $key_,$sp,$FRAME+15     # rewind $key_
3720         vncipher        $out0,$out0,v28
3721         vncipher        $out1,$out1,v28
3722         vncipher        $out2,$out2,v28
3723         vncipher        $out3,$out3,v28
3724         vncipher        $out4,$out4,v28
3725         lvx             v24,$x00,$key_          # re-pre-load round[1]
3726          vxor           $in3,$twk3,v31
3727
3728         vncipher        $out0,$out0,v29
3729         le?vperm        $in0,$in0,$in0,$leperm
3730         vncipher        $out1,$out1,v29
3731         vncipher        $out2,$out2,v29
3732         vncipher        $out3,$out3,v29
3733         vncipher        $out4,$out4,v29
3734         lvx             v25,$x10,$key_          # re-pre-load round[2]
3735          vxor           $in4,$twk4,v31
3736
3737         vncipher        $out0,$out0,v30
3738         vncipher        $out1,$out1,v30
3739         vncipher        $out2,$out2,v30
3740         vncipher        $out3,$out3,v30
3741         vncipher        $out4,$out4,v30
3742
3743         vncipherlast    $out0,$out0,$twk0
3744         vncipherlast    $out1,$out1,$in1
3745         vncipherlast    $out2,$out2,$in2
3746         vncipherlast    $out3,$out3,$in3
3747         vncipherlast    $out4,$out4,$in4
3748         mtctr           $rounds
3749         blr
3750         .long           0
3751         .byte           0,12,0x14,0,0,0,0,0
3752 ___
3753 }}      }}}
3754
3755 my $consts=1;
3756 foreach(split("\n",$code)) {
3757         s/\`([^\`]*)\`/eval($1)/geo;
3758
3759         # constants table endian-specific conversion
3760         if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
3761             my $conv=$3;
3762             my @bytes=();
3763
3764             # convert to endian-agnostic format
3765             if ($1 eq "long") {
3766               foreach (split(/,\s*/,$2)) {
3767                 my $l = /^0/?oct:int;
3768                 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
3769               }
3770             } else {
3771                 @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
3772             }
3773
3774             # little-endian conversion
3775             if ($flavour =~ /le$/o) {
3776                 SWITCH: for($conv)  {
3777                     /\?inv/ && do   { @bytes=map($_^0xf,@bytes); last; };
3778                     /\?rev/ && do   { @bytes=reverse(@bytes);    last; };
3779                 }
3780             }
3781
3782             #emit
3783             print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
3784             next;
3785         }
3786         $consts=0 if (m/Lconsts:/o);    # end of table
3787
3788         # instructions prefixed with '?' are endian-specific and need
3789         # to be adjusted accordingly...
3790         if ($flavour =~ /le$/o) {       # little-endian
3791             s/le\?//o           or
3792             s/be\?/#be#/o       or
3793             s/\?lvsr/lvsl/o     or
3794             s/\?lvsl/lvsr/o     or
3795             s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
3796             s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
3797             s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
3798         } else {                        # big-endian
3799             s/le\?/#le#/o       or
3800             s/be\?//o           or
3801             s/\?([a-z]+)/$1/o;
3802         }
3803
3804         print $_,"\n";
3805 }
3806
3807 close STDOUT;