Update copyright year
[openssl.git] / crypto / aes / asm / aesp8-ppc.pl
1 #! /usr/bin/env perl
2 # Copyright 2014-2018 The OpenSSL Project Authors. All Rights Reserved.
3 #
4 # Licensed under the OpenSSL license (the "License").  You may not use
5 # this file except in compliance with the License.  You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
8
9 #
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
16 #
17 # This module implements support for AES instructions as per PowerISA
18 # specification version 2.07, first implemented by POWER8 processor.
19 # The module is endian-agnostic in sense that it supports both big-
20 # and little-endian cases. Data alignment in parallelizable modes is
21 # handled with VSX loads and stores, which implies MSR.VSX flag being
22 # set. It should also be noted that ISA specification doesn't prohibit
23 # alignment exceptions for these instructions on page boundaries.
24 # Initially alignment was handled in pure AltiVec/VMX way [when data
25 # is aligned programmatically, which in turn guarantees exception-
26 # free execution], but it turned to hamper performance when vcipher
27 # instructions are interleaved. It's reckoned that eventual
28 # misalignment penalties at page boundaries are in average lower
29 # than additional overhead in pure AltiVec approach.
30 #
31 # May 2016
32 #
33 # Add XTS subroutine, 9x on little- and 12x improvement on big-endian
34 # systems were measured.
35 #
36 ######################################################################
37 # Current large-block performance in cycles per byte processed with
38 # 128-bit key (less is better).
39 #
40 #               CBC en-/decrypt CTR     XTS
41 # POWER8[le]    3.96/0.72       0.74    1.1
42 # POWER8[be]    3.75/0.65       0.66    1.0
43 # POWER9[le]    3.05/0.65       0.65    0.80
44
45 $flavour = shift;
46
47 if ($flavour =~ /64/) {
48         $SIZE_T =8;
49         $LRSAVE =2*$SIZE_T;
50         $STU    ="stdu";
51         $POP    ="ld";
52         $PUSH   ="std";
53         $UCMP   ="cmpld";
54         $SHL    ="sldi";
55 } elsif ($flavour =~ /32/) {
56         $SIZE_T =4;
57         $LRSAVE =$SIZE_T;
58         $STU    ="stwu";
59         $POP    ="lwz";
60         $PUSH   ="stw";
61         $UCMP   ="cmplw";
62         $SHL    ="slwi";
63 } else { die "nonsense $flavour"; }
64
65 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
66
67 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
68 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
69 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
70 die "can't locate ppc-xlate.pl";
71
72 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
73
74 $FRAME=8*$SIZE_T;
75 $prefix="aes_p8";
76
77 $sp="r1";
78 $vrsave="r12";
79
80 #########################################################################
81 {{{     # Key setup procedures                                          #
82 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
83 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
84 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
85
86 $code.=<<___;
87 .machine        "any"
88
89 .text
90
91 .align  7
92 rcon:
93 .long   0x01000000, 0x01000000, 0x01000000, 0x01000000  ?rev
94 .long   0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000  ?rev
95 .long   0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c  ?rev
96 .long   0,0,0,0                                         ?asis
97 Lconsts:
98         mflr    r0
99         bcl     20,31,\$+4
100         mflr    $ptr     #vvvvv "distance between . and rcon
101         addi    $ptr,$ptr,-0x48
102         mtlr    r0
103         blr
104         .long   0
105         .byte   0,12,0x14,0,0,0,0,0
106 .asciz  "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
107
108 .globl  .${prefix}_set_encrypt_key
109 .align  5
110 .${prefix}_set_encrypt_key:
111 Lset_encrypt_key:
112         mflr            r11
113         $PUSH           r11,$LRSAVE($sp)
114
115         li              $ptr,-1
116         ${UCMP}i        $inp,0
117         beq-            Lenc_key_abort          # if ($inp==0) return -1;
118         ${UCMP}i        $out,0
119         beq-            Lenc_key_abort          # if ($out==0) return -1;
120         li              $ptr,-2
121         cmpwi           $bits,128
122         blt-            Lenc_key_abort
123         cmpwi           $bits,256
124         bgt-            Lenc_key_abort
125         andi.           r0,$bits,0x3f
126         bne-            Lenc_key_abort
127
128         lis             r0,0xfff0
129         mfspr           $vrsave,256
130         mtspr           256,r0
131
132         bl              Lconsts
133         mtlr            r11
134
135         neg             r9,$inp
136         lvx             $in0,0,$inp
137         addi            $inp,$inp,15            # 15 is not typo
138         lvsr            $key,0,r9               # borrow $key
139         li              r8,0x20
140         cmpwi           $bits,192
141         lvx             $in1,0,$inp
142         le?vspltisb     $mask,0x0f              # borrow $mask
143         lvx             $rcon,0,$ptr
144         le?vxor         $key,$key,$mask         # adjust for byte swap
145         lvx             $mask,r8,$ptr
146         addi            $ptr,$ptr,0x10
147         vperm           $in0,$in0,$in1,$key     # align [and byte swap in LE]
148         li              $cnt,8
149         vxor            $zero,$zero,$zero
150         mtctr           $cnt
151
152         ?lvsr           $outperm,0,$out
153         vspltisb        $outmask,-1
154         lvx             $outhead,0,$out
155         ?vperm          $outmask,$zero,$outmask,$outperm
156
157         blt             Loop128
158         addi            $inp,$inp,8
159         beq             L192
160         addi            $inp,$inp,8
161         b               L256
162
163 .align  4
164 Loop128:
165         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
166         vsldoi          $tmp,$zero,$in0,12      # >>32
167          vperm          $outtail,$in0,$in0,$outperm     # rotate
168          vsel           $stage,$outhead,$outtail,$outmask
169          vmr            $outhead,$outtail
170         vcipherlast     $key,$key,$rcon
171          stvx           $stage,0,$out
172          addi           $out,$out,16
173
174         vxor            $in0,$in0,$tmp
175         vsldoi          $tmp,$zero,$tmp,12      # >>32
176         vxor            $in0,$in0,$tmp
177         vsldoi          $tmp,$zero,$tmp,12      # >>32
178         vxor            $in0,$in0,$tmp
179          vadduwm        $rcon,$rcon,$rcon
180         vxor            $in0,$in0,$key
181         bdnz            Loop128
182
183         lvx             $rcon,0,$ptr            # last two round keys
184
185         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
186         vsldoi          $tmp,$zero,$in0,12      # >>32
187          vperm          $outtail,$in0,$in0,$outperm     # rotate
188          vsel           $stage,$outhead,$outtail,$outmask
189          vmr            $outhead,$outtail
190         vcipherlast     $key,$key,$rcon
191          stvx           $stage,0,$out
192          addi           $out,$out,16
193
194         vxor            $in0,$in0,$tmp
195         vsldoi          $tmp,$zero,$tmp,12      # >>32
196         vxor            $in0,$in0,$tmp
197         vsldoi          $tmp,$zero,$tmp,12      # >>32
198         vxor            $in0,$in0,$tmp
199          vadduwm        $rcon,$rcon,$rcon
200         vxor            $in0,$in0,$key
201
202         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
203         vsldoi          $tmp,$zero,$in0,12      # >>32
204          vperm          $outtail,$in0,$in0,$outperm     # rotate
205          vsel           $stage,$outhead,$outtail,$outmask
206          vmr            $outhead,$outtail
207         vcipherlast     $key,$key,$rcon
208          stvx           $stage,0,$out
209          addi           $out,$out,16
210
211         vxor            $in0,$in0,$tmp
212         vsldoi          $tmp,$zero,$tmp,12      # >>32
213         vxor            $in0,$in0,$tmp
214         vsldoi          $tmp,$zero,$tmp,12      # >>32
215         vxor            $in0,$in0,$tmp
216         vxor            $in0,$in0,$key
217          vperm          $outtail,$in0,$in0,$outperm     # rotate
218          vsel           $stage,$outhead,$outtail,$outmask
219          vmr            $outhead,$outtail
220          stvx           $stage,0,$out
221
222         addi            $inp,$out,15            # 15 is not typo
223         addi            $out,$out,0x50
224
225         li              $rounds,10
226         b               Ldone
227
228 .align  4
229 L192:
230         lvx             $tmp,0,$inp
231         li              $cnt,4
232          vperm          $outtail,$in0,$in0,$outperm     # rotate
233          vsel           $stage,$outhead,$outtail,$outmask
234          vmr            $outhead,$outtail
235          stvx           $stage,0,$out
236          addi           $out,$out,16
237         vperm           $in1,$in1,$tmp,$key     # align [and byte swap in LE]
238         vspltisb        $key,8                  # borrow $key
239         mtctr           $cnt
240         vsububm         $mask,$mask,$key        # adjust the mask
241
242 Loop192:
243         vperm           $key,$in1,$in1,$mask    # roate-n-splat
244         vsldoi          $tmp,$zero,$in0,12      # >>32
245         vcipherlast     $key,$key,$rcon
246
247         vxor            $in0,$in0,$tmp
248         vsldoi          $tmp,$zero,$tmp,12      # >>32
249         vxor            $in0,$in0,$tmp
250         vsldoi          $tmp,$zero,$tmp,12      # >>32
251         vxor            $in0,$in0,$tmp
252
253          vsldoi         $stage,$zero,$in1,8
254         vspltw          $tmp,$in0,3
255         vxor            $tmp,$tmp,$in1
256         vsldoi          $in1,$zero,$in1,12      # >>32
257          vadduwm        $rcon,$rcon,$rcon
258         vxor            $in1,$in1,$tmp
259         vxor            $in0,$in0,$key
260         vxor            $in1,$in1,$key
261          vsldoi         $stage,$stage,$in0,8
262
263         vperm           $key,$in1,$in1,$mask    # rotate-n-splat
264         vsldoi          $tmp,$zero,$in0,12      # >>32
265          vperm          $outtail,$stage,$stage,$outperm # rotate
266          vsel           $stage,$outhead,$outtail,$outmask
267          vmr            $outhead,$outtail
268         vcipherlast     $key,$key,$rcon
269          stvx           $stage,0,$out
270          addi           $out,$out,16
271
272          vsldoi         $stage,$in0,$in1,8
273         vxor            $in0,$in0,$tmp
274         vsldoi          $tmp,$zero,$tmp,12      # >>32
275          vperm          $outtail,$stage,$stage,$outperm # rotate
276          vsel           $stage,$outhead,$outtail,$outmask
277          vmr            $outhead,$outtail
278         vxor            $in0,$in0,$tmp
279         vsldoi          $tmp,$zero,$tmp,12      # >>32
280         vxor            $in0,$in0,$tmp
281          stvx           $stage,0,$out
282          addi           $out,$out,16
283
284         vspltw          $tmp,$in0,3
285         vxor            $tmp,$tmp,$in1
286         vsldoi          $in1,$zero,$in1,12      # >>32
287          vadduwm        $rcon,$rcon,$rcon
288         vxor            $in1,$in1,$tmp
289         vxor            $in0,$in0,$key
290         vxor            $in1,$in1,$key
291          vperm          $outtail,$in0,$in0,$outperm     # rotate
292          vsel           $stage,$outhead,$outtail,$outmask
293          vmr            $outhead,$outtail
294          stvx           $stage,0,$out
295          addi           $inp,$out,15            # 15 is not typo
296          addi           $out,$out,16
297         bdnz            Loop192
298
299         li              $rounds,12
300         addi            $out,$out,0x20
301         b               Ldone
302
303 .align  4
304 L256:
305         lvx             $tmp,0,$inp
306         li              $cnt,7
307         li              $rounds,14
308          vperm          $outtail,$in0,$in0,$outperm     # rotate
309          vsel           $stage,$outhead,$outtail,$outmask
310          vmr            $outhead,$outtail
311          stvx           $stage,0,$out
312          addi           $out,$out,16
313         vperm           $in1,$in1,$tmp,$key     # align [and byte swap in LE]
314         mtctr           $cnt
315
316 Loop256:
317         vperm           $key,$in1,$in1,$mask    # rotate-n-splat
318         vsldoi          $tmp,$zero,$in0,12      # >>32
319          vperm          $outtail,$in1,$in1,$outperm     # rotate
320          vsel           $stage,$outhead,$outtail,$outmask
321          vmr            $outhead,$outtail
322         vcipherlast     $key,$key,$rcon
323          stvx           $stage,0,$out
324          addi           $out,$out,16
325
326         vxor            $in0,$in0,$tmp
327         vsldoi          $tmp,$zero,$tmp,12      # >>32
328         vxor            $in0,$in0,$tmp
329         vsldoi          $tmp,$zero,$tmp,12      # >>32
330         vxor            $in0,$in0,$tmp
331          vadduwm        $rcon,$rcon,$rcon
332         vxor            $in0,$in0,$key
333          vperm          $outtail,$in0,$in0,$outperm     # rotate
334          vsel           $stage,$outhead,$outtail,$outmask
335          vmr            $outhead,$outtail
336          stvx           $stage,0,$out
337          addi           $inp,$out,15            # 15 is not typo
338          addi           $out,$out,16
339         bdz             Ldone
340
341         vspltw          $key,$in0,3             # just splat
342         vsldoi          $tmp,$zero,$in1,12      # >>32
343         vsbox           $key,$key
344
345         vxor            $in1,$in1,$tmp
346         vsldoi          $tmp,$zero,$tmp,12      # >>32
347         vxor            $in1,$in1,$tmp
348         vsldoi          $tmp,$zero,$tmp,12      # >>32
349         vxor            $in1,$in1,$tmp
350
351         vxor            $in1,$in1,$key
352         b               Loop256
353
354 .align  4
355 Ldone:
356         lvx             $in1,0,$inp             # redundant in aligned case
357         vsel            $in1,$outhead,$in1,$outmask
358         stvx            $in1,0,$inp
359         li              $ptr,0
360         mtspr           256,$vrsave
361         stw             $rounds,0($out)
362
363 Lenc_key_abort:
364         mr              r3,$ptr
365         blr
366         .long           0
367         .byte           0,12,0x14,1,0,0,3,0
368         .long           0
369 .size   .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
370
371 .globl  .${prefix}_set_decrypt_key
372 .align  5
373 .${prefix}_set_decrypt_key:
374         $STU            $sp,-$FRAME($sp)
375         mflr            r10
376         $PUSH           r10,$FRAME+$LRSAVE($sp)
377         bl              Lset_encrypt_key
378         mtlr            r10
379
380         cmpwi           r3,0
381         bne-            Ldec_key_abort
382
383         slwi            $cnt,$rounds,4
384         subi            $inp,$out,240           # first round key
385         srwi            $rounds,$rounds,1
386         add             $out,$inp,$cnt          # last round key
387         mtctr           $rounds
388
389 Ldeckey:
390         lwz             r0, 0($inp)
391         lwz             r6, 4($inp)
392         lwz             r7, 8($inp)
393         lwz             r8, 12($inp)
394         addi            $inp,$inp,16
395         lwz             r9, 0($out)
396         lwz             r10,4($out)
397         lwz             r11,8($out)
398         lwz             r12,12($out)
399         stw             r0, 0($out)
400         stw             r6, 4($out)
401         stw             r7, 8($out)
402         stw             r8, 12($out)
403         subi            $out,$out,16
404         stw             r9, -16($inp)
405         stw             r10,-12($inp)
406         stw             r11,-8($inp)
407         stw             r12,-4($inp)
408         bdnz            Ldeckey
409
410         xor             r3,r3,r3                # return value
411 Ldec_key_abort:
412         addi            $sp,$sp,$FRAME
413         blr
414         .long           0
415         .byte           0,12,4,1,0x80,0,3,0
416         .long           0
417 .size   .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
418 ___
419 }}}
420 #########################################################################
421 {{{     # Single block en- and decrypt procedures                       #
422 sub gen_block () {
423 my $dir = shift;
424 my $n   = $dir eq "de" ? "n" : "";
425 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
426
427 $code.=<<___;
428 .globl  .${prefix}_${dir}crypt
429 .align  5
430 .${prefix}_${dir}crypt:
431         lwz             $rounds,240($key)
432         lis             r0,0xfc00
433         mfspr           $vrsave,256
434         li              $idx,15                 # 15 is not typo
435         mtspr           256,r0
436
437         lvx             v0,0,$inp
438         neg             r11,$out
439         lvx             v1,$idx,$inp
440         lvsl            v2,0,$inp               # inpperm
441         le?vspltisb     v4,0x0f
442         ?lvsl           v3,0,r11                # outperm
443         le?vxor         v2,v2,v4
444         li              $idx,16
445         vperm           v0,v0,v1,v2             # align [and byte swap in LE]
446         lvx             v1,0,$key
447         ?lvsl           v5,0,$key               # keyperm
448         srwi            $rounds,$rounds,1
449         lvx             v2,$idx,$key
450         addi            $idx,$idx,16
451         subi            $rounds,$rounds,1
452         ?vperm          v1,v1,v2,v5             # align round key
453
454         vxor            v0,v0,v1
455         lvx             v1,$idx,$key
456         addi            $idx,$idx,16
457         mtctr           $rounds
458
459 Loop_${dir}c:
460         ?vperm          v2,v2,v1,v5
461         v${n}cipher     v0,v0,v2
462         lvx             v2,$idx,$key
463         addi            $idx,$idx,16
464         ?vperm          v1,v1,v2,v5
465         v${n}cipher     v0,v0,v1
466         lvx             v1,$idx,$key
467         addi            $idx,$idx,16
468         bdnz            Loop_${dir}c
469
470         ?vperm          v2,v2,v1,v5
471         v${n}cipher     v0,v0,v2
472         lvx             v2,$idx,$key
473         ?vperm          v1,v1,v2,v5
474         v${n}cipherlast v0,v0,v1
475
476         vspltisb        v2,-1
477         vxor            v1,v1,v1
478         li              $idx,15                 # 15 is not typo
479         ?vperm          v2,v1,v2,v3             # outmask
480         le?vxor         v3,v3,v4
481         lvx             v1,0,$out               # outhead
482         vperm           v0,v0,v0,v3             # rotate [and byte swap in LE]
483         vsel            v1,v1,v0,v2
484         lvx             v4,$idx,$out
485         stvx            v1,0,$out
486         vsel            v0,v0,v4,v2
487         stvx            v0,$idx,$out
488
489         mtspr           256,$vrsave
490         blr
491         .long           0
492         .byte           0,12,0x14,0,0,0,3,0
493         .long           0
494 .size   .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
495 ___
496 }
497 &gen_block("en");
498 &gen_block("de");
499 }}}
500 #########################################################################
501 {{{     # CBC en- and decrypt procedures                                #
502 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
503 my ($rndkey0,$rndkey1,$inout,$tmp)=             map("v$_",(0..3));
504 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
505                                                 map("v$_",(4..10));
506 $code.=<<___;
507 .globl  .${prefix}_cbc_encrypt
508 .align  5
509 .${prefix}_cbc_encrypt:
510         ${UCMP}i        $len,16
511         bltlr-
512
513         cmpwi           $enc,0                  # test direction
514         lis             r0,0xffe0
515         mfspr           $vrsave,256
516         mtspr           256,r0
517
518         li              $idx,15
519         vxor            $rndkey0,$rndkey0,$rndkey0
520         le?vspltisb     $tmp,0x0f
521
522         lvx             $ivec,0,$ivp            # load [unaligned] iv
523         lvsl            $inpperm,0,$ivp
524         lvx             $inptail,$idx,$ivp
525         le?vxor         $inpperm,$inpperm,$tmp
526         vperm           $ivec,$ivec,$inptail,$inpperm
527
528         neg             r11,$inp
529         ?lvsl           $keyperm,0,$key         # prepare for unaligned key
530         lwz             $rounds,240($key)
531
532         lvsr            $inpperm,0,r11          # prepare for unaligned load
533         lvx             $inptail,0,$inp
534         addi            $inp,$inp,15            # 15 is not typo
535         le?vxor         $inpperm,$inpperm,$tmp
536
537         ?lvsr           $outperm,0,$out         # prepare for unaligned store
538         vspltisb        $outmask,-1
539         lvx             $outhead,0,$out
540         ?vperm          $outmask,$rndkey0,$outmask,$outperm
541         le?vxor         $outperm,$outperm,$tmp
542
543         srwi            $rounds,$rounds,1
544         li              $idx,16
545         subi            $rounds,$rounds,1
546         beq             Lcbc_dec
547
548 Lcbc_enc:
549         vmr             $inout,$inptail
550         lvx             $inptail,0,$inp
551         addi            $inp,$inp,16
552         mtctr           $rounds
553         subi            $len,$len,16            # len-=16
554
555         lvx             $rndkey0,0,$key
556          vperm          $inout,$inout,$inptail,$inpperm
557         lvx             $rndkey1,$idx,$key
558         addi            $idx,$idx,16
559         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
560         vxor            $inout,$inout,$rndkey0
561         lvx             $rndkey0,$idx,$key
562         addi            $idx,$idx,16
563         vxor            $inout,$inout,$ivec
564
565 Loop_cbc_enc:
566         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
567         vcipher         $inout,$inout,$rndkey1
568         lvx             $rndkey1,$idx,$key
569         addi            $idx,$idx,16
570         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
571         vcipher         $inout,$inout,$rndkey0
572         lvx             $rndkey0,$idx,$key
573         addi            $idx,$idx,16
574         bdnz            Loop_cbc_enc
575
576         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
577         vcipher         $inout,$inout,$rndkey1
578         lvx             $rndkey1,$idx,$key
579         li              $idx,16
580         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
581         vcipherlast     $ivec,$inout,$rndkey0
582         ${UCMP}i        $len,16
583
584         vperm           $tmp,$ivec,$ivec,$outperm
585         vsel            $inout,$outhead,$tmp,$outmask
586         vmr             $outhead,$tmp
587         stvx            $inout,0,$out
588         addi            $out,$out,16
589         bge             Lcbc_enc
590
591         b               Lcbc_done
592
593 .align  4
594 Lcbc_dec:
595         ${UCMP}i        $len,128
596         bge             _aesp8_cbc_decrypt8x
597         vmr             $tmp,$inptail
598         lvx             $inptail,0,$inp
599         addi            $inp,$inp,16
600         mtctr           $rounds
601         subi            $len,$len,16            # len-=16
602
603         lvx             $rndkey0,0,$key
604          vperm          $tmp,$tmp,$inptail,$inpperm
605         lvx             $rndkey1,$idx,$key
606         addi            $idx,$idx,16
607         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
608         vxor            $inout,$tmp,$rndkey0
609         lvx             $rndkey0,$idx,$key
610         addi            $idx,$idx,16
611
612 Loop_cbc_dec:
613         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
614         vncipher        $inout,$inout,$rndkey1
615         lvx             $rndkey1,$idx,$key
616         addi            $idx,$idx,16
617         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
618         vncipher        $inout,$inout,$rndkey0
619         lvx             $rndkey0,$idx,$key
620         addi            $idx,$idx,16
621         bdnz            Loop_cbc_dec
622
623         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
624         vncipher        $inout,$inout,$rndkey1
625         lvx             $rndkey1,$idx,$key
626         li              $idx,16
627         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
628         vncipherlast    $inout,$inout,$rndkey0
629         ${UCMP}i        $len,16
630
631         vxor            $inout,$inout,$ivec
632         vmr             $ivec,$tmp
633         vperm           $tmp,$inout,$inout,$outperm
634         vsel            $inout,$outhead,$tmp,$outmask
635         vmr             $outhead,$tmp
636         stvx            $inout,0,$out
637         addi            $out,$out,16
638         bge             Lcbc_dec
639
640 Lcbc_done:
641         addi            $out,$out,-1
642         lvx             $inout,0,$out           # redundant in aligned case
643         vsel            $inout,$outhead,$inout,$outmask
644         stvx            $inout,0,$out
645
646         neg             $enc,$ivp               # write [unaligned] iv
647         li              $idx,15                 # 15 is not typo
648         vxor            $rndkey0,$rndkey0,$rndkey0
649         vspltisb        $outmask,-1
650         le?vspltisb     $tmp,0x0f
651         ?lvsl           $outperm,0,$enc
652         ?vperm          $outmask,$rndkey0,$outmask,$outperm
653         le?vxor         $outperm,$outperm,$tmp
654         lvx             $outhead,0,$ivp
655         vperm           $ivec,$ivec,$ivec,$outperm
656         vsel            $inout,$outhead,$ivec,$outmask
657         lvx             $inptail,$idx,$ivp
658         stvx            $inout,0,$ivp
659         vsel            $inout,$ivec,$inptail,$outmask
660         stvx            $inout,$idx,$ivp
661
662         mtspr           256,$vrsave
663         blr
664         .long           0
665         .byte           0,12,0x14,0,0,0,6,0
666         .long           0
667 ___
668 #########################################################################
669 {{      # Optimized CBC decrypt procedure                               #
670 my $key_="r11";
671 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
672     $x00=0 if ($flavour =~ /osx/);
673 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
674 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
675 my $rndkey0="v23";      # v24-v25 rotating buffer for first found keys
676                         # v26-v31 last 6 round keys
677 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
678
679 $code.=<<___;
680 .align  5
681 _aesp8_cbc_decrypt8x:
682         $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
683         li              r10,`$FRAME+8*16+15`
684         li              r11,`$FRAME+8*16+31`
685         stvx            v20,r10,$sp             # ABI says so
686         addi            r10,r10,32
687         stvx            v21,r11,$sp
688         addi            r11,r11,32
689         stvx            v22,r10,$sp
690         addi            r10,r10,32
691         stvx            v23,r11,$sp
692         addi            r11,r11,32
693         stvx            v24,r10,$sp
694         addi            r10,r10,32
695         stvx            v25,r11,$sp
696         addi            r11,r11,32
697         stvx            v26,r10,$sp
698         addi            r10,r10,32
699         stvx            v27,r11,$sp
700         addi            r11,r11,32
701         stvx            v28,r10,$sp
702         addi            r10,r10,32
703         stvx            v29,r11,$sp
704         addi            r11,r11,32
705         stvx            v30,r10,$sp
706         stvx            v31,r11,$sp
707         li              r0,-1
708         stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
709         li              $x10,0x10
710         $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
711         li              $x20,0x20
712         $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
713         li              $x30,0x30
714         $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
715         li              $x40,0x40
716         $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
717         li              $x50,0x50
718         $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
719         li              $x60,0x60
720         $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
721         li              $x70,0x70
722         mtspr           256,r0
723
724         subi            $rounds,$rounds,3       # -4 in total
725         subi            $len,$len,128           # bias
726
727         lvx             $rndkey0,$x00,$key      # load key schedule
728         lvx             v30,$x10,$key
729         addi            $key,$key,0x20
730         lvx             v31,$x00,$key
731         ?vperm          $rndkey0,$rndkey0,v30,$keyperm
732         addi            $key_,$sp,$FRAME+15
733         mtctr           $rounds
734
735 Load_cbc_dec_key:
736         ?vperm          v24,v30,v31,$keyperm
737         lvx             v30,$x10,$key
738         addi            $key,$key,0x20
739         stvx            v24,$x00,$key_          # off-load round[1]
740         ?vperm          v25,v31,v30,$keyperm
741         lvx             v31,$x00,$key
742         stvx            v25,$x10,$key_          # off-load round[2]
743         addi            $key_,$key_,0x20
744         bdnz            Load_cbc_dec_key
745
746         lvx             v26,$x10,$key
747         ?vperm          v24,v30,v31,$keyperm
748         lvx             v27,$x20,$key
749         stvx            v24,$x00,$key_          # off-load round[3]
750         ?vperm          v25,v31,v26,$keyperm
751         lvx             v28,$x30,$key
752         stvx            v25,$x10,$key_          # off-load round[4]
753         addi            $key_,$sp,$FRAME+15     # rewind $key_
754         ?vperm          v26,v26,v27,$keyperm
755         lvx             v29,$x40,$key
756         ?vperm          v27,v27,v28,$keyperm
757         lvx             v30,$x50,$key
758         ?vperm          v28,v28,v29,$keyperm
759         lvx             v31,$x60,$key
760         ?vperm          v29,v29,v30,$keyperm
761         lvx             $out0,$x70,$key         # borrow $out0
762         ?vperm          v30,v30,v31,$keyperm
763         lvx             v24,$x00,$key_          # pre-load round[1]
764         ?vperm          v31,v31,$out0,$keyperm
765         lvx             v25,$x10,$key_          # pre-load round[2]
766
767         #lvx            $inptail,0,$inp         # "caller" already did this
768         #addi           $inp,$inp,15            # 15 is not typo
769         subi            $inp,$inp,15            # undo "caller"
770
771          le?li          $idx,8
772         lvx_u           $in0,$x00,$inp          # load first 8 "words"
773          le?lvsl        $inpperm,0,$idx
774          le?vspltisb    $tmp,0x0f
775         lvx_u           $in1,$x10,$inp
776          le?vxor        $inpperm,$inpperm,$tmp  # transform for lvx_u/stvx_u
777         lvx_u           $in2,$x20,$inp
778          le?vperm       $in0,$in0,$in0,$inpperm
779         lvx_u           $in3,$x30,$inp
780          le?vperm       $in1,$in1,$in1,$inpperm
781         lvx_u           $in4,$x40,$inp
782          le?vperm       $in2,$in2,$in2,$inpperm
783         vxor            $out0,$in0,$rndkey0
784         lvx_u           $in5,$x50,$inp
785          le?vperm       $in3,$in3,$in3,$inpperm
786         vxor            $out1,$in1,$rndkey0
787         lvx_u           $in6,$x60,$inp
788          le?vperm       $in4,$in4,$in4,$inpperm
789         vxor            $out2,$in2,$rndkey0
790         lvx_u           $in7,$x70,$inp
791         addi            $inp,$inp,0x80
792          le?vperm       $in5,$in5,$in5,$inpperm
793         vxor            $out3,$in3,$rndkey0
794          le?vperm       $in6,$in6,$in6,$inpperm
795         vxor            $out4,$in4,$rndkey0
796          le?vperm       $in7,$in7,$in7,$inpperm
797         vxor            $out5,$in5,$rndkey0
798         vxor            $out6,$in6,$rndkey0
799         vxor            $out7,$in7,$rndkey0
800
801         mtctr           $rounds
802         b               Loop_cbc_dec8x
803 .align  5
804 Loop_cbc_dec8x:
805         vncipher        $out0,$out0,v24
806         vncipher        $out1,$out1,v24
807         vncipher        $out2,$out2,v24
808         vncipher        $out3,$out3,v24
809         vncipher        $out4,$out4,v24
810         vncipher        $out5,$out5,v24
811         vncipher        $out6,$out6,v24
812         vncipher        $out7,$out7,v24
813         lvx             v24,$x20,$key_          # round[3]
814         addi            $key_,$key_,0x20
815
816         vncipher        $out0,$out0,v25
817         vncipher        $out1,$out1,v25
818         vncipher        $out2,$out2,v25
819         vncipher        $out3,$out3,v25
820         vncipher        $out4,$out4,v25
821         vncipher        $out5,$out5,v25
822         vncipher        $out6,$out6,v25
823         vncipher        $out7,$out7,v25
824         lvx             v25,$x10,$key_          # round[4]
825         bdnz            Loop_cbc_dec8x
826
827         subic           $len,$len,128           # $len-=128
828         vncipher        $out0,$out0,v24
829         vncipher        $out1,$out1,v24
830         vncipher        $out2,$out2,v24
831         vncipher        $out3,$out3,v24
832         vncipher        $out4,$out4,v24
833         vncipher        $out5,$out5,v24
834         vncipher        $out6,$out6,v24
835         vncipher        $out7,$out7,v24
836
837         subfe.          r0,r0,r0                # borrow?-1:0
838         vncipher        $out0,$out0,v25
839         vncipher        $out1,$out1,v25
840         vncipher        $out2,$out2,v25
841         vncipher        $out3,$out3,v25
842         vncipher        $out4,$out4,v25
843         vncipher        $out5,$out5,v25
844         vncipher        $out6,$out6,v25
845         vncipher        $out7,$out7,v25
846
847         and             r0,r0,$len
848         vncipher        $out0,$out0,v26
849         vncipher        $out1,$out1,v26
850         vncipher        $out2,$out2,v26
851         vncipher        $out3,$out3,v26
852         vncipher        $out4,$out4,v26
853         vncipher        $out5,$out5,v26
854         vncipher        $out6,$out6,v26
855         vncipher        $out7,$out7,v26
856
857         add             $inp,$inp,r0            # $inp is adjusted in such
858                                                 # way that at exit from the
859                                                 # loop inX-in7 are loaded
860                                                 # with last "words"
861         vncipher        $out0,$out0,v27
862         vncipher        $out1,$out1,v27
863         vncipher        $out2,$out2,v27
864         vncipher        $out3,$out3,v27
865         vncipher        $out4,$out4,v27
866         vncipher        $out5,$out5,v27
867         vncipher        $out6,$out6,v27
868         vncipher        $out7,$out7,v27
869
870         addi            $key_,$sp,$FRAME+15     # rewind $key_
871         vncipher        $out0,$out0,v28
872         vncipher        $out1,$out1,v28
873         vncipher        $out2,$out2,v28
874         vncipher        $out3,$out3,v28
875         vncipher        $out4,$out4,v28
876         vncipher        $out5,$out5,v28
877         vncipher        $out6,$out6,v28
878         vncipher        $out7,$out7,v28
879         lvx             v24,$x00,$key_          # re-pre-load round[1]
880
881         vncipher        $out0,$out0,v29
882         vncipher        $out1,$out1,v29
883         vncipher        $out2,$out2,v29
884         vncipher        $out3,$out3,v29
885         vncipher        $out4,$out4,v29
886         vncipher        $out5,$out5,v29
887         vncipher        $out6,$out6,v29
888         vncipher        $out7,$out7,v29
889         lvx             v25,$x10,$key_          # re-pre-load round[2]
890
891         vncipher        $out0,$out0,v30
892          vxor           $ivec,$ivec,v31         # xor with last round key
893         vncipher        $out1,$out1,v30
894          vxor           $in0,$in0,v31
895         vncipher        $out2,$out2,v30
896          vxor           $in1,$in1,v31
897         vncipher        $out3,$out3,v30
898          vxor           $in2,$in2,v31
899         vncipher        $out4,$out4,v30
900          vxor           $in3,$in3,v31
901         vncipher        $out5,$out5,v30
902          vxor           $in4,$in4,v31
903         vncipher        $out6,$out6,v30
904          vxor           $in5,$in5,v31
905         vncipher        $out7,$out7,v30
906          vxor           $in6,$in6,v31
907
908         vncipherlast    $out0,$out0,$ivec
909         vncipherlast    $out1,$out1,$in0
910          lvx_u          $in0,$x00,$inp          # load next input block
911         vncipherlast    $out2,$out2,$in1
912          lvx_u          $in1,$x10,$inp
913         vncipherlast    $out3,$out3,$in2
914          le?vperm       $in0,$in0,$in0,$inpperm
915          lvx_u          $in2,$x20,$inp
916         vncipherlast    $out4,$out4,$in3
917          le?vperm       $in1,$in1,$in1,$inpperm
918          lvx_u          $in3,$x30,$inp
919         vncipherlast    $out5,$out5,$in4
920          le?vperm       $in2,$in2,$in2,$inpperm
921          lvx_u          $in4,$x40,$inp
922         vncipherlast    $out6,$out6,$in5
923          le?vperm       $in3,$in3,$in3,$inpperm
924          lvx_u          $in5,$x50,$inp
925         vncipherlast    $out7,$out7,$in6
926          le?vperm       $in4,$in4,$in4,$inpperm
927          lvx_u          $in6,$x60,$inp
928         vmr             $ivec,$in7
929          le?vperm       $in5,$in5,$in5,$inpperm
930          lvx_u          $in7,$x70,$inp
931          addi           $inp,$inp,0x80
932
933         le?vperm        $out0,$out0,$out0,$inpperm
934         le?vperm        $out1,$out1,$out1,$inpperm
935         stvx_u          $out0,$x00,$out
936          le?vperm       $in6,$in6,$in6,$inpperm
937          vxor           $out0,$in0,$rndkey0
938         le?vperm        $out2,$out2,$out2,$inpperm
939         stvx_u          $out1,$x10,$out
940          le?vperm       $in7,$in7,$in7,$inpperm
941          vxor           $out1,$in1,$rndkey0
942         le?vperm        $out3,$out3,$out3,$inpperm
943         stvx_u          $out2,$x20,$out
944          vxor           $out2,$in2,$rndkey0
945         le?vperm        $out4,$out4,$out4,$inpperm
946         stvx_u          $out3,$x30,$out
947          vxor           $out3,$in3,$rndkey0
948         le?vperm        $out5,$out5,$out5,$inpperm
949         stvx_u          $out4,$x40,$out
950          vxor           $out4,$in4,$rndkey0
951         le?vperm        $out6,$out6,$out6,$inpperm
952         stvx_u          $out5,$x50,$out
953          vxor           $out5,$in5,$rndkey0
954         le?vperm        $out7,$out7,$out7,$inpperm
955         stvx_u          $out6,$x60,$out
956          vxor           $out6,$in6,$rndkey0
957         stvx_u          $out7,$x70,$out
958         addi            $out,$out,0x80
959          vxor           $out7,$in7,$rndkey0
960
961         mtctr           $rounds
962         beq             Loop_cbc_dec8x          # did $len-=128 borrow?
963
964         addic.          $len,$len,128
965         beq             Lcbc_dec8x_done
966         nop
967         nop
968
969 Loop_cbc_dec8x_tail:                            # up to 7 "words" tail...
970         vncipher        $out1,$out1,v24
971         vncipher        $out2,$out2,v24
972         vncipher        $out3,$out3,v24
973         vncipher        $out4,$out4,v24
974         vncipher        $out5,$out5,v24
975         vncipher        $out6,$out6,v24
976         vncipher        $out7,$out7,v24
977         lvx             v24,$x20,$key_          # round[3]
978         addi            $key_,$key_,0x20
979
980         vncipher        $out1,$out1,v25
981         vncipher        $out2,$out2,v25
982         vncipher        $out3,$out3,v25
983         vncipher        $out4,$out4,v25
984         vncipher        $out5,$out5,v25
985         vncipher        $out6,$out6,v25
986         vncipher        $out7,$out7,v25
987         lvx             v25,$x10,$key_          # round[4]
988         bdnz            Loop_cbc_dec8x_tail
989
990         vncipher        $out1,$out1,v24
991         vncipher        $out2,$out2,v24
992         vncipher        $out3,$out3,v24
993         vncipher        $out4,$out4,v24
994         vncipher        $out5,$out5,v24
995         vncipher        $out6,$out6,v24
996         vncipher        $out7,$out7,v24
997
998         vncipher        $out1,$out1,v25
999         vncipher        $out2,$out2,v25
1000         vncipher        $out3,$out3,v25
1001         vncipher        $out4,$out4,v25
1002         vncipher        $out5,$out5,v25
1003         vncipher        $out6,$out6,v25
1004         vncipher        $out7,$out7,v25
1005
1006         vncipher        $out1,$out1,v26
1007         vncipher        $out2,$out2,v26
1008         vncipher        $out3,$out3,v26
1009         vncipher        $out4,$out4,v26
1010         vncipher        $out5,$out5,v26
1011         vncipher        $out6,$out6,v26
1012         vncipher        $out7,$out7,v26
1013
1014         vncipher        $out1,$out1,v27
1015         vncipher        $out2,$out2,v27
1016         vncipher        $out3,$out3,v27
1017         vncipher        $out4,$out4,v27
1018         vncipher        $out5,$out5,v27
1019         vncipher        $out6,$out6,v27
1020         vncipher        $out7,$out7,v27
1021
1022         vncipher        $out1,$out1,v28
1023         vncipher        $out2,$out2,v28
1024         vncipher        $out3,$out3,v28
1025         vncipher        $out4,$out4,v28
1026         vncipher        $out5,$out5,v28
1027         vncipher        $out6,$out6,v28
1028         vncipher        $out7,$out7,v28
1029
1030         vncipher        $out1,$out1,v29
1031         vncipher        $out2,$out2,v29
1032         vncipher        $out3,$out3,v29
1033         vncipher        $out4,$out4,v29
1034         vncipher        $out5,$out5,v29
1035         vncipher        $out6,$out6,v29
1036         vncipher        $out7,$out7,v29
1037
1038         vncipher        $out1,$out1,v30
1039          vxor           $ivec,$ivec,v31         # last round key
1040         vncipher        $out2,$out2,v30
1041          vxor           $in1,$in1,v31
1042         vncipher        $out3,$out3,v30
1043          vxor           $in2,$in2,v31
1044         vncipher        $out4,$out4,v30
1045          vxor           $in3,$in3,v31
1046         vncipher        $out5,$out5,v30
1047          vxor           $in4,$in4,v31
1048         vncipher        $out6,$out6,v30
1049          vxor           $in5,$in5,v31
1050         vncipher        $out7,$out7,v30
1051          vxor           $in6,$in6,v31
1052
1053         cmplwi          $len,32                 # switch($len)
1054         blt             Lcbc_dec8x_one
1055         nop
1056         beq             Lcbc_dec8x_two
1057         cmplwi          $len,64
1058         blt             Lcbc_dec8x_three
1059         nop
1060         beq             Lcbc_dec8x_four
1061         cmplwi          $len,96
1062         blt             Lcbc_dec8x_five
1063         nop
1064         beq             Lcbc_dec8x_six
1065
1066 Lcbc_dec8x_seven:
1067         vncipherlast    $out1,$out1,$ivec
1068         vncipherlast    $out2,$out2,$in1
1069         vncipherlast    $out3,$out3,$in2
1070         vncipherlast    $out4,$out4,$in3
1071         vncipherlast    $out5,$out5,$in4
1072         vncipherlast    $out6,$out6,$in5
1073         vncipherlast    $out7,$out7,$in6
1074         vmr             $ivec,$in7
1075
1076         le?vperm        $out1,$out1,$out1,$inpperm
1077         le?vperm        $out2,$out2,$out2,$inpperm
1078         stvx_u          $out1,$x00,$out
1079         le?vperm        $out3,$out3,$out3,$inpperm
1080         stvx_u          $out2,$x10,$out
1081         le?vperm        $out4,$out4,$out4,$inpperm
1082         stvx_u          $out3,$x20,$out
1083         le?vperm        $out5,$out5,$out5,$inpperm
1084         stvx_u          $out4,$x30,$out
1085         le?vperm        $out6,$out6,$out6,$inpperm
1086         stvx_u          $out5,$x40,$out
1087         le?vperm        $out7,$out7,$out7,$inpperm
1088         stvx_u          $out6,$x50,$out
1089         stvx_u          $out7,$x60,$out
1090         addi            $out,$out,0x70
1091         b               Lcbc_dec8x_done
1092
1093 .align  5
1094 Lcbc_dec8x_six:
1095         vncipherlast    $out2,$out2,$ivec
1096         vncipherlast    $out3,$out3,$in2
1097         vncipherlast    $out4,$out4,$in3
1098         vncipherlast    $out5,$out5,$in4
1099         vncipherlast    $out6,$out6,$in5
1100         vncipherlast    $out7,$out7,$in6
1101         vmr             $ivec,$in7
1102
1103         le?vperm        $out2,$out2,$out2,$inpperm
1104         le?vperm        $out3,$out3,$out3,$inpperm
1105         stvx_u          $out2,$x00,$out
1106         le?vperm        $out4,$out4,$out4,$inpperm
1107         stvx_u          $out3,$x10,$out
1108         le?vperm        $out5,$out5,$out5,$inpperm
1109         stvx_u          $out4,$x20,$out
1110         le?vperm        $out6,$out6,$out6,$inpperm
1111         stvx_u          $out5,$x30,$out
1112         le?vperm        $out7,$out7,$out7,$inpperm
1113         stvx_u          $out6,$x40,$out
1114         stvx_u          $out7,$x50,$out
1115         addi            $out,$out,0x60
1116         b               Lcbc_dec8x_done
1117
1118 .align  5
1119 Lcbc_dec8x_five:
1120         vncipherlast    $out3,$out3,$ivec
1121         vncipherlast    $out4,$out4,$in3
1122         vncipherlast    $out5,$out5,$in4
1123         vncipherlast    $out6,$out6,$in5
1124         vncipherlast    $out7,$out7,$in6
1125         vmr             $ivec,$in7
1126
1127         le?vperm        $out3,$out3,$out3,$inpperm
1128         le?vperm        $out4,$out4,$out4,$inpperm
1129         stvx_u          $out3,$x00,$out
1130         le?vperm        $out5,$out5,$out5,$inpperm
1131         stvx_u          $out4,$x10,$out
1132         le?vperm        $out6,$out6,$out6,$inpperm
1133         stvx_u          $out5,$x20,$out
1134         le?vperm        $out7,$out7,$out7,$inpperm
1135         stvx_u          $out6,$x30,$out
1136         stvx_u          $out7,$x40,$out
1137         addi            $out,$out,0x50
1138         b               Lcbc_dec8x_done
1139
1140 .align  5
1141 Lcbc_dec8x_four:
1142         vncipherlast    $out4,$out4,$ivec
1143         vncipherlast    $out5,$out5,$in4
1144         vncipherlast    $out6,$out6,$in5
1145         vncipherlast    $out7,$out7,$in6
1146         vmr             $ivec,$in7
1147
1148         le?vperm        $out4,$out4,$out4,$inpperm
1149         le?vperm        $out5,$out5,$out5,$inpperm
1150         stvx_u          $out4,$x00,$out
1151         le?vperm        $out6,$out6,$out6,$inpperm
1152         stvx_u          $out5,$x10,$out
1153         le?vperm        $out7,$out7,$out7,$inpperm
1154         stvx_u          $out6,$x20,$out
1155         stvx_u          $out7,$x30,$out
1156         addi            $out,$out,0x40
1157         b               Lcbc_dec8x_done
1158
1159 .align  5
1160 Lcbc_dec8x_three:
1161         vncipherlast    $out5,$out5,$ivec
1162         vncipherlast    $out6,$out6,$in5
1163         vncipherlast    $out7,$out7,$in6
1164         vmr             $ivec,$in7
1165
1166         le?vperm        $out5,$out5,$out5,$inpperm
1167         le?vperm        $out6,$out6,$out6,$inpperm
1168         stvx_u          $out5,$x00,$out
1169         le?vperm        $out7,$out7,$out7,$inpperm
1170         stvx_u          $out6,$x10,$out
1171         stvx_u          $out7,$x20,$out
1172         addi            $out,$out,0x30
1173         b               Lcbc_dec8x_done
1174
1175 .align  5
1176 Lcbc_dec8x_two:
1177         vncipherlast    $out6,$out6,$ivec
1178         vncipherlast    $out7,$out7,$in6
1179         vmr             $ivec,$in7
1180
1181         le?vperm        $out6,$out6,$out6,$inpperm
1182         le?vperm        $out7,$out7,$out7,$inpperm
1183         stvx_u          $out6,$x00,$out
1184         stvx_u          $out7,$x10,$out
1185         addi            $out,$out,0x20
1186         b               Lcbc_dec8x_done
1187
1188 .align  5
1189 Lcbc_dec8x_one:
1190         vncipherlast    $out7,$out7,$ivec
1191         vmr             $ivec,$in7
1192
1193         le?vperm        $out7,$out7,$out7,$inpperm
1194         stvx_u          $out7,0,$out
1195         addi            $out,$out,0x10
1196
1197 Lcbc_dec8x_done:
1198         le?vperm        $ivec,$ivec,$ivec,$inpperm
1199         stvx_u          $ivec,0,$ivp            # write [unaligned] iv
1200
1201         li              r10,`$FRAME+15`
1202         li              r11,`$FRAME+31`
1203         stvx            $inpperm,r10,$sp        # wipe copies of round keys
1204         addi            r10,r10,32
1205         stvx            $inpperm,r11,$sp
1206         addi            r11,r11,32
1207         stvx            $inpperm,r10,$sp
1208         addi            r10,r10,32
1209         stvx            $inpperm,r11,$sp
1210         addi            r11,r11,32
1211         stvx            $inpperm,r10,$sp
1212         addi            r10,r10,32
1213         stvx            $inpperm,r11,$sp
1214         addi            r11,r11,32
1215         stvx            $inpperm,r10,$sp
1216         addi            r10,r10,32
1217         stvx            $inpperm,r11,$sp
1218         addi            r11,r11,32
1219
1220         mtspr           256,$vrsave
1221         lvx             v20,r10,$sp             # ABI says so
1222         addi            r10,r10,32
1223         lvx             v21,r11,$sp
1224         addi            r11,r11,32
1225         lvx             v22,r10,$sp
1226         addi            r10,r10,32
1227         lvx             v23,r11,$sp
1228         addi            r11,r11,32
1229         lvx             v24,r10,$sp
1230         addi            r10,r10,32
1231         lvx             v25,r11,$sp
1232         addi            r11,r11,32
1233         lvx             v26,r10,$sp
1234         addi            r10,r10,32
1235         lvx             v27,r11,$sp
1236         addi            r11,r11,32
1237         lvx             v28,r10,$sp
1238         addi            r10,r10,32
1239         lvx             v29,r11,$sp
1240         addi            r11,r11,32
1241         lvx             v30,r10,$sp
1242         lvx             v31,r11,$sp
1243         $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1244         $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1245         $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1246         $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1247         $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1248         $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1249         addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1250         blr
1251         .long           0
1252         .byte           0,12,0x04,0,0x80,6,6,0
1253         .long           0
1254 .size   .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1255 ___
1256 }}      }}}
1257
1258 #########################################################################
1259 {{{     # CTR procedure[s]                                              #
1260 my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1261 my ($rndkey0,$rndkey1,$inout,$tmp)=             map("v$_",(0..3));
1262 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1263                                                 map("v$_",(4..11));
1264 my $dat=$tmp;
1265
1266 $code.=<<___;
1267 .globl  .${prefix}_ctr32_encrypt_blocks
1268 .align  5
1269 .${prefix}_ctr32_encrypt_blocks:
1270         ${UCMP}i        $len,1
1271         bltlr-
1272
1273         lis             r0,0xfff0
1274         mfspr           $vrsave,256
1275         mtspr           256,r0
1276
1277         li              $idx,15
1278         vxor            $rndkey0,$rndkey0,$rndkey0
1279         le?vspltisb     $tmp,0x0f
1280
1281         lvx             $ivec,0,$ivp            # load [unaligned] iv
1282         lvsl            $inpperm,0,$ivp
1283         lvx             $inptail,$idx,$ivp
1284          vspltisb       $one,1
1285         le?vxor         $inpperm,$inpperm,$tmp
1286         vperm           $ivec,$ivec,$inptail,$inpperm
1287          vsldoi         $one,$rndkey0,$one,1
1288
1289         neg             r11,$inp
1290         ?lvsl           $keyperm,0,$key         # prepare for unaligned key
1291         lwz             $rounds,240($key)
1292
1293         lvsr            $inpperm,0,r11          # prepare for unaligned load
1294         lvx             $inptail,0,$inp
1295         addi            $inp,$inp,15            # 15 is not typo
1296         le?vxor         $inpperm,$inpperm,$tmp
1297
1298         srwi            $rounds,$rounds,1
1299         li              $idx,16
1300         subi            $rounds,$rounds,1
1301
1302         ${UCMP}i        $len,8
1303         bge             _aesp8_ctr32_encrypt8x
1304
1305         ?lvsr           $outperm,0,$out         # prepare for unaligned store
1306         vspltisb        $outmask,-1
1307         lvx             $outhead,0,$out
1308         ?vperm          $outmask,$rndkey0,$outmask,$outperm
1309         le?vxor         $outperm,$outperm,$tmp
1310
1311         lvx             $rndkey0,0,$key
1312         mtctr           $rounds
1313         lvx             $rndkey1,$idx,$key
1314         addi            $idx,$idx,16
1315         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
1316         vxor            $inout,$ivec,$rndkey0
1317         lvx             $rndkey0,$idx,$key
1318         addi            $idx,$idx,16
1319         b               Loop_ctr32_enc
1320
1321 .align  5
1322 Loop_ctr32_enc:
1323         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
1324         vcipher         $inout,$inout,$rndkey1
1325         lvx             $rndkey1,$idx,$key
1326         addi            $idx,$idx,16
1327         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
1328         vcipher         $inout,$inout,$rndkey0
1329         lvx             $rndkey0,$idx,$key
1330         addi            $idx,$idx,16
1331         bdnz            Loop_ctr32_enc
1332
1333         vadduwm         $ivec,$ivec,$one
1334          vmr            $dat,$inptail
1335          lvx            $inptail,0,$inp
1336          addi           $inp,$inp,16
1337          subic.         $len,$len,1             # blocks--
1338
1339         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
1340         vcipher         $inout,$inout,$rndkey1
1341         lvx             $rndkey1,$idx,$key
1342          vperm          $dat,$dat,$inptail,$inpperm
1343          li             $idx,16
1344         ?vperm          $rndkey1,$rndkey0,$rndkey1,$keyperm
1345          lvx            $rndkey0,0,$key
1346         vxor            $dat,$dat,$rndkey1      # last round key
1347         vcipherlast     $inout,$inout,$dat
1348
1349          lvx            $rndkey1,$idx,$key
1350          addi           $idx,$idx,16
1351         vperm           $inout,$inout,$inout,$outperm
1352         vsel            $dat,$outhead,$inout,$outmask
1353          mtctr          $rounds
1354          ?vperm         $rndkey0,$rndkey0,$rndkey1,$keyperm
1355         vmr             $outhead,$inout
1356          vxor           $inout,$ivec,$rndkey0
1357          lvx            $rndkey0,$idx,$key
1358          addi           $idx,$idx,16
1359         stvx            $dat,0,$out
1360         addi            $out,$out,16
1361         bne             Loop_ctr32_enc
1362
1363         addi            $out,$out,-1
1364         lvx             $inout,0,$out           # redundant in aligned case
1365         vsel            $inout,$outhead,$inout,$outmask
1366         stvx            $inout,0,$out
1367
1368         mtspr           256,$vrsave
1369         blr
1370         .long           0
1371         .byte           0,12,0x14,0,0,0,6,0
1372         .long           0
1373 ___
1374 #########################################################################
1375 {{      # Optimized CTR procedure                                       #
1376 my $key_="r11";
1377 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1378     $x00=0 if ($flavour =~ /osx/);
1379 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1380 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1381 my $rndkey0="v23";      # v24-v25 rotating buffer for first found keys
1382                         # v26-v31 last 6 round keys
1383 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1384 my ($two,$three,$four)=($outhead,$outperm,$outmask);
1385
1386 $code.=<<___;
1387 .align  5
1388 _aesp8_ctr32_encrypt8x:
1389         $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1390         li              r10,`$FRAME+8*16+15`
1391         li              r11,`$FRAME+8*16+31`
1392         stvx            v20,r10,$sp             # ABI says so
1393         addi            r10,r10,32
1394         stvx            v21,r11,$sp
1395         addi            r11,r11,32
1396         stvx            v22,r10,$sp
1397         addi            r10,r10,32
1398         stvx            v23,r11,$sp
1399         addi            r11,r11,32
1400         stvx            v24,r10,$sp
1401         addi            r10,r10,32
1402         stvx            v25,r11,$sp
1403         addi            r11,r11,32
1404         stvx            v26,r10,$sp
1405         addi            r10,r10,32
1406         stvx            v27,r11,$sp
1407         addi            r11,r11,32
1408         stvx            v28,r10,$sp
1409         addi            r10,r10,32
1410         stvx            v29,r11,$sp
1411         addi            r11,r11,32
1412         stvx            v30,r10,$sp
1413         stvx            v31,r11,$sp
1414         li              r0,-1
1415         stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
1416         li              $x10,0x10
1417         $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1418         li              $x20,0x20
1419         $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1420         li              $x30,0x30
1421         $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1422         li              $x40,0x40
1423         $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1424         li              $x50,0x50
1425         $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1426         li              $x60,0x60
1427         $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1428         li              $x70,0x70
1429         mtspr           256,r0
1430
1431         subi            $rounds,$rounds,3       # -4 in total
1432
1433         lvx             $rndkey0,$x00,$key      # load key schedule
1434         lvx             v30,$x10,$key
1435         addi            $key,$key,0x20
1436         lvx             v31,$x00,$key
1437         ?vperm          $rndkey0,$rndkey0,v30,$keyperm
1438         addi            $key_,$sp,$FRAME+15
1439         mtctr           $rounds
1440
1441 Load_ctr32_enc_key:
1442         ?vperm          v24,v30,v31,$keyperm
1443         lvx             v30,$x10,$key
1444         addi            $key,$key,0x20
1445         stvx            v24,$x00,$key_          # off-load round[1]
1446         ?vperm          v25,v31,v30,$keyperm
1447         lvx             v31,$x00,$key
1448         stvx            v25,$x10,$key_          # off-load round[2]
1449         addi            $key_,$key_,0x20
1450         bdnz            Load_ctr32_enc_key
1451
1452         lvx             v26,$x10,$key
1453         ?vperm          v24,v30,v31,$keyperm
1454         lvx             v27,$x20,$key
1455         stvx            v24,$x00,$key_          # off-load round[3]
1456         ?vperm          v25,v31,v26,$keyperm
1457         lvx             v28,$x30,$key
1458         stvx            v25,$x10,$key_          # off-load round[4]
1459         addi            $key_,$sp,$FRAME+15     # rewind $key_
1460         ?vperm          v26,v26,v27,$keyperm
1461         lvx             v29,$x40,$key
1462         ?vperm          v27,v27,v28,$keyperm
1463         lvx             v30,$x50,$key
1464         ?vperm          v28,v28,v29,$keyperm
1465         lvx             v31,$x60,$key
1466         ?vperm          v29,v29,v30,$keyperm
1467         lvx             $out0,$x70,$key         # borrow $out0
1468         ?vperm          v30,v30,v31,$keyperm
1469         lvx             v24,$x00,$key_          # pre-load round[1]
1470         ?vperm          v31,v31,$out0,$keyperm
1471         lvx             v25,$x10,$key_          # pre-load round[2]
1472
1473         vadduwm         $two,$one,$one
1474         subi            $inp,$inp,15            # undo "caller"
1475         $SHL            $len,$len,4
1476
1477         vadduwm         $out1,$ivec,$one        # counter values ...
1478         vadduwm         $out2,$ivec,$two
1479         vxor            $out0,$ivec,$rndkey0    # ... xored with rndkey[0]
1480          le?li          $idx,8
1481         vadduwm         $out3,$out1,$two
1482         vxor            $out1,$out1,$rndkey0
1483          le?lvsl        $inpperm,0,$idx
1484         vadduwm         $out4,$out2,$two
1485         vxor            $out2,$out2,$rndkey0
1486          le?vspltisb    $tmp,0x0f
1487         vadduwm         $out5,$out3,$two
1488         vxor            $out3,$out3,$rndkey0
1489          le?vxor        $inpperm,$inpperm,$tmp  # transform for lvx_u/stvx_u
1490         vadduwm         $out6,$out4,$two
1491         vxor            $out4,$out4,$rndkey0
1492         vadduwm         $out7,$out5,$two
1493         vxor            $out5,$out5,$rndkey0
1494         vadduwm         $ivec,$out6,$two        # next counter value
1495         vxor            $out6,$out6,$rndkey0
1496         vxor            $out7,$out7,$rndkey0
1497
1498         mtctr           $rounds
1499         b               Loop_ctr32_enc8x
1500 .align  5
1501 Loop_ctr32_enc8x:
1502         vcipher         $out0,$out0,v24
1503         vcipher         $out1,$out1,v24
1504         vcipher         $out2,$out2,v24
1505         vcipher         $out3,$out3,v24
1506         vcipher         $out4,$out4,v24
1507         vcipher         $out5,$out5,v24
1508         vcipher         $out6,$out6,v24
1509         vcipher         $out7,$out7,v24
1510 Loop_ctr32_enc8x_middle:
1511         lvx             v24,$x20,$key_          # round[3]
1512         addi            $key_,$key_,0x20
1513
1514         vcipher         $out0,$out0,v25
1515         vcipher         $out1,$out1,v25
1516         vcipher         $out2,$out2,v25
1517         vcipher         $out3,$out3,v25
1518         vcipher         $out4,$out4,v25
1519         vcipher         $out5,$out5,v25
1520         vcipher         $out6,$out6,v25
1521         vcipher         $out7,$out7,v25
1522         lvx             v25,$x10,$key_          # round[4]
1523         bdnz            Loop_ctr32_enc8x
1524
1525         subic           r11,$len,256            # $len-256, borrow $key_
1526         vcipher         $out0,$out0,v24
1527         vcipher         $out1,$out1,v24
1528         vcipher         $out2,$out2,v24
1529         vcipher         $out3,$out3,v24
1530         vcipher         $out4,$out4,v24
1531         vcipher         $out5,$out5,v24
1532         vcipher         $out6,$out6,v24
1533         vcipher         $out7,$out7,v24
1534
1535         subfe           r0,r0,r0                # borrow?-1:0
1536         vcipher         $out0,$out0,v25
1537         vcipher         $out1,$out1,v25
1538         vcipher         $out2,$out2,v25
1539         vcipher         $out3,$out3,v25
1540         vcipher         $out4,$out4,v25
1541         vcipher         $out5,$out5,v25
1542         vcipher         $out6,$out6,v25
1543         vcipher         $out7,$out7,v25
1544
1545         and             r0,r0,r11
1546         addi            $key_,$sp,$FRAME+15     # rewind $key_
1547         vcipher         $out0,$out0,v26
1548         vcipher         $out1,$out1,v26
1549         vcipher         $out2,$out2,v26
1550         vcipher         $out3,$out3,v26
1551         vcipher         $out4,$out4,v26
1552         vcipher         $out5,$out5,v26
1553         vcipher         $out6,$out6,v26
1554         vcipher         $out7,$out7,v26
1555         lvx             v24,$x00,$key_          # re-pre-load round[1]
1556
1557         subic           $len,$len,129           # $len-=129
1558         vcipher         $out0,$out0,v27
1559         addi            $len,$len,1             # $len-=128 really
1560         vcipher         $out1,$out1,v27
1561         vcipher         $out2,$out2,v27
1562         vcipher         $out3,$out3,v27
1563         vcipher         $out4,$out4,v27
1564         vcipher         $out5,$out5,v27
1565         vcipher         $out6,$out6,v27
1566         vcipher         $out7,$out7,v27
1567         lvx             v25,$x10,$key_          # re-pre-load round[2]
1568
1569         vcipher         $out0,$out0,v28
1570          lvx_u          $in0,$x00,$inp          # load input
1571         vcipher         $out1,$out1,v28
1572          lvx_u          $in1,$x10,$inp
1573         vcipher         $out2,$out2,v28
1574          lvx_u          $in2,$x20,$inp
1575         vcipher         $out3,$out3,v28
1576          lvx_u          $in3,$x30,$inp
1577         vcipher         $out4,$out4,v28
1578          lvx_u          $in4,$x40,$inp
1579         vcipher         $out5,$out5,v28
1580          lvx_u          $in5,$x50,$inp
1581         vcipher         $out6,$out6,v28
1582          lvx_u          $in6,$x60,$inp
1583         vcipher         $out7,$out7,v28
1584          lvx_u          $in7,$x70,$inp
1585          addi           $inp,$inp,0x80
1586
1587         vcipher         $out0,$out0,v29
1588          le?vperm       $in0,$in0,$in0,$inpperm
1589         vcipher         $out1,$out1,v29
1590          le?vperm       $in1,$in1,$in1,$inpperm
1591         vcipher         $out2,$out2,v29
1592          le?vperm       $in2,$in2,$in2,$inpperm
1593         vcipher         $out3,$out3,v29
1594          le?vperm       $in3,$in3,$in3,$inpperm
1595         vcipher         $out4,$out4,v29
1596          le?vperm       $in4,$in4,$in4,$inpperm
1597         vcipher         $out5,$out5,v29
1598          le?vperm       $in5,$in5,$in5,$inpperm
1599         vcipher         $out6,$out6,v29
1600          le?vperm       $in6,$in6,$in6,$inpperm
1601         vcipher         $out7,$out7,v29
1602          le?vperm       $in7,$in7,$in7,$inpperm
1603
1604         add             $inp,$inp,r0            # $inp is adjusted in such
1605                                                 # way that at exit from the
1606                                                 # loop inX-in7 are loaded
1607                                                 # with last "words"
1608         subfe.          r0,r0,r0                # borrow?-1:0
1609         vcipher         $out0,$out0,v30
1610          vxor           $in0,$in0,v31           # xor with last round key
1611         vcipher         $out1,$out1,v30
1612          vxor           $in1,$in1,v31
1613         vcipher         $out2,$out2,v30
1614          vxor           $in2,$in2,v31
1615         vcipher         $out3,$out3,v30
1616          vxor           $in3,$in3,v31
1617         vcipher         $out4,$out4,v30
1618          vxor           $in4,$in4,v31
1619         vcipher         $out5,$out5,v30
1620          vxor           $in5,$in5,v31
1621         vcipher         $out6,$out6,v30
1622          vxor           $in6,$in6,v31
1623         vcipher         $out7,$out7,v30
1624          vxor           $in7,$in7,v31
1625
1626         bne             Lctr32_enc8x_break      # did $len-129 borrow?
1627
1628         vcipherlast     $in0,$out0,$in0
1629         vcipherlast     $in1,$out1,$in1
1630          vadduwm        $out1,$ivec,$one        # counter values ...
1631         vcipherlast     $in2,$out2,$in2
1632          vadduwm        $out2,$ivec,$two
1633          vxor           $out0,$ivec,$rndkey0    # ... xored with rndkey[0]
1634         vcipherlast     $in3,$out3,$in3
1635          vadduwm        $out3,$out1,$two
1636          vxor           $out1,$out1,$rndkey0
1637         vcipherlast     $in4,$out4,$in4
1638          vadduwm        $out4,$out2,$two
1639          vxor           $out2,$out2,$rndkey0
1640         vcipherlast     $in5,$out5,$in5
1641          vadduwm        $out5,$out3,$two
1642          vxor           $out3,$out3,$rndkey0
1643         vcipherlast     $in6,$out6,$in6
1644          vadduwm        $out6,$out4,$two
1645          vxor           $out4,$out4,$rndkey0
1646         vcipherlast     $in7,$out7,$in7
1647          vadduwm        $out7,$out5,$two
1648          vxor           $out5,$out5,$rndkey0
1649         le?vperm        $in0,$in0,$in0,$inpperm
1650          vadduwm        $ivec,$out6,$two        # next counter value
1651          vxor           $out6,$out6,$rndkey0
1652         le?vperm        $in1,$in1,$in1,$inpperm
1653          vxor           $out7,$out7,$rndkey0
1654         mtctr           $rounds
1655
1656          vcipher        $out0,$out0,v24
1657         stvx_u          $in0,$x00,$out
1658         le?vperm        $in2,$in2,$in2,$inpperm
1659          vcipher        $out1,$out1,v24
1660         stvx_u          $in1,$x10,$out
1661         le?vperm        $in3,$in3,$in3,$inpperm
1662          vcipher        $out2,$out2,v24
1663         stvx_u          $in2,$x20,$out
1664         le?vperm        $in4,$in4,$in4,$inpperm
1665          vcipher        $out3,$out3,v24
1666         stvx_u          $in3,$x30,$out
1667         le?vperm        $in5,$in5,$in5,$inpperm
1668          vcipher        $out4,$out4,v24
1669         stvx_u          $in4,$x40,$out
1670         le?vperm        $in6,$in6,$in6,$inpperm
1671          vcipher        $out5,$out5,v24
1672         stvx_u          $in5,$x50,$out
1673         le?vperm        $in7,$in7,$in7,$inpperm
1674          vcipher        $out6,$out6,v24
1675         stvx_u          $in6,$x60,$out
1676          vcipher        $out7,$out7,v24
1677         stvx_u          $in7,$x70,$out
1678         addi            $out,$out,0x80
1679
1680         b               Loop_ctr32_enc8x_middle
1681
1682 .align  5
1683 Lctr32_enc8x_break:
1684         cmpwi           $len,-0x60
1685         blt             Lctr32_enc8x_one
1686         nop
1687         beq             Lctr32_enc8x_two
1688         cmpwi           $len,-0x40
1689         blt             Lctr32_enc8x_three
1690         nop
1691         beq             Lctr32_enc8x_four
1692         cmpwi           $len,-0x20
1693         blt             Lctr32_enc8x_five
1694         nop
1695         beq             Lctr32_enc8x_six
1696         cmpwi           $len,0x00
1697         blt             Lctr32_enc8x_seven
1698
1699 Lctr32_enc8x_eight:
1700         vcipherlast     $out0,$out0,$in0
1701         vcipherlast     $out1,$out1,$in1
1702         vcipherlast     $out2,$out2,$in2
1703         vcipherlast     $out3,$out3,$in3
1704         vcipherlast     $out4,$out4,$in4
1705         vcipherlast     $out5,$out5,$in5
1706         vcipherlast     $out6,$out6,$in6
1707         vcipherlast     $out7,$out7,$in7
1708
1709         le?vperm        $out0,$out0,$out0,$inpperm
1710         le?vperm        $out1,$out1,$out1,$inpperm
1711         stvx_u          $out0,$x00,$out
1712         le?vperm        $out2,$out2,$out2,$inpperm
1713         stvx_u          $out1,$x10,$out
1714         le?vperm        $out3,$out3,$out3,$inpperm
1715         stvx_u          $out2,$x20,$out
1716         le?vperm        $out4,$out4,$out4,$inpperm
1717         stvx_u          $out3,$x30,$out
1718         le?vperm        $out5,$out5,$out5,$inpperm
1719         stvx_u          $out4,$x40,$out
1720         le?vperm        $out6,$out6,$out6,$inpperm
1721         stvx_u          $out5,$x50,$out
1722         le?vperm        $out7,$out7,$out7,$inpperm
1723         stvx_u          $out6,$x60,$out
1724         stvx_u          $out7,$x70,$out
1725         addi            $out,$out,0x80
1726         b               Lctr32_enc8x_done
1727
1728 .align  5
1729 Lctr32_enc8x_seven:
1730         vcipherlast     $out0,$out0,$in1
1731         vcipherlast     $out1,$out1,$in2
1732         vcipherlast     $out2,$out2,$in3
1733         vcipherlast     $out3,$out3,$in4
1734         vcipherlast     $out4,$out4,$in5
1735         vcipherlast     $out5,$out5,$in6
1736         vcipherlast     $out6,$out6,$in7
1737
1738         le?vperm        $out0,$out0,$out0,$inpperm
1739         le?vperm        $out1,$out1,$out1,$inpperm
1740         stvx_u          $out0,$x00,$out
1741         le?vperm        $out2,$out2,$out2,$inpperm
1742         stvx_u          $out1,$x10,$out
1743         le?vperm        $out3,$out3,$out3,$inpperm
1744         stvx_u          $out2,$x20,$out
1745         le?vperm        $out4,$out4,$out4,$inpperm
1746         stvx_u          $out3,$x30,$out
1747         le?vperm        $out5,$out5,$out5,$inpperm
1748         stvx_u          $out4,$x40,$out
1749         le?vperm        $out6,$out6,$out6,$inpperm
1750         stvx_u          $out5,$x50,$out
1751         stvx_u          $out6,$x60,$out
1752         addi            $out,$out,0x70
1753         b               Lctr32_enc8x_done
1754
1755 .align  5
1756 Lctr32_enc8x_six:
1757         vcipherlast     $out0,$out0,$in2
1758         vcipherlast     $out1,$out1,$in3
1759         vcipherlast     $out2,$out2,$in4
1760         vcipherlast     $out3,$out3,$in5
1761         vcipherlast     $out4,$out4,$in6
1762         vcipherlast     $out5,$out5,$in7
1763
1764         le?vperm        $out0,$out0,$out0,$inpperm
1765         le?vperm        $out1,$out1,$out1,$inpperm
1766         stvx_u          $out0,$x00,$out
1767         le?vperm        $out2,$out2,$out2,$inpperm
1768         stvx_u          $out1,$x10,$out
1769         le?vperm        $out3,$out3,$out3,$inpperm
1770         stvx_u          $out2,$x20,$out
1771         le?vperm        $out4,$out4,$out4,$inpperm
1772         stvx_u          $out3,$x30,$out
1773         le?vperm        $out5,$out5,$out5,$inpperm
1774         stvx_u          $out4,$x40,$out
1775         stvx_u          $out5,$x50,$out
1776         addi            $out,$out,0x60
1777         b               Lctr32_enc8x_done
1778
1779 .align  5
1780 Lctr32_enc8x_five:
1781         vcipherlast     $out0,$out0,$in3
1782         vcipherlast     $out1,$out1,$in4
1783         vcipherlast     $out2,$out2,$in5
1784         vcipherlast     $out3,$out3,$in6
1785         vcipherlast     $out4,$out4,$in7
1786
1787         le?vperm        $out0,$out0,$out0,$inpperm
1788         le?vperm        $out1,$out1,$out1,$inpperm
1789         stvx_u          $out0,$x00,$out
1790         le?vperm        $out2,$out2,$out2,$inpperm
1791         stvx_u          $out1,$x10,$out
1792         le?vperm        $out3,$out3,$out3,$inpperm
1793         stvx_u          $out2,$x20,$out
1794         le?vperm        $out4,$out4,$out4,$inpperm
1795         stvx_u          $out3,$x30,$out
1796         stvx_u          $out4,$x40,$out
1797         addi            $out,$out,0x50
1798         b               Lctr32_enc8x_done
1799
1800 .align  5
1801 Lctr32_enc8x_four:
1802         vcipherlast     $out0,$out0,$in4
1803         vcipherlast     $out1,$out1,$in5
1804         vcipherlast     $out2,$out2,$in6
1805         vcipherlast     $out3,$out3,$in7
1806
1807         le?vperm        $out0,$out0,$out0,$inpperm
1808         le?vperm        $out1,$out1,$out1,$inpperm
1809         stvx_u          $out0,$x00,$out
1810         le?vperm        $out2,$out2,$out2,$inpperm
1811         stvx_u          $out1,$x10,$out
1812         le?vperm        $out3,$out3,$out3,$inpperm
1813         stvx_u          $out2,$x20,$out
1814         stvx_u          $out3,$x30,$out
1815         addi            $out,$out,0x40
1816         b               Lctr32_enc8x_done
1817
1818 .align  5
1819 Lctr32_enc8x_three:
1820         vcipherlast     $out0,$out0,$in5
1821         vcipherlast     $out1,$out1,$in6
1822         vcipherlast     $out2,$out2,$in7
1823
1824         le?vperm        $out0,$out0,$out0,$inpperm
1825         le?vperm        $out1,$out1,$out1,$inpperm
1826         stvx_u          $out0,$x00,$out
1827         le?vperm        $out2,$out2,$out2,$inpperm
1828         stvx_u          $out1,$x10,$out
1829         stvx_u          $out2,$x20,$out
1830         addi            $out,$out,0x30
1831         b               Lcbc_dec8x_done
1832
1833 .align  5
1834 Lctr32_enc8x_two:
1835         vcipherlast     $out0,$out0,$in6
1836         vcipherlast     $out1,$out1,$in7
1837
1838         le?vperm        $out0,$out0,$out0,$inpperm
1839         le?vperm        $out1,$out1,$out1,$inpperm
1840         stvx_u          $out0,$x00,$out
1841         stvx_u          $out1,$x10,$out
1842         addi            $out,$out,0x20
1843         b               Lcbc_dec8x_done
1844
1845 .align  5
1846 Lctr32_enc8x_one:
1847         vcipherlast     $out0,$out0,$in7
1848
1849         le?vperm        $out0,$out0,$out0,$inpperm
1850         stvx_u          $out0,0,$out
1851         addi            $out,$out,0x10
1852
1853 Lctr32_enc8x_done:
1854         li              r10,`$FRAME+15`
1855         li              r11,`$FRAME+31`
1856         stvx            $inpperm,r10,$sp        # wipe copies of round keys
1857         addi            r10,r10,32
1858         stvx            $inpperm,r11,$sp
1859         addi            r11,r11,32
1860         stvx            $inpperm,r10,$sp
1861         addi            r10,r10,32
1862         stvx            $inpperm,r11,$sp
1863         addi            r11,r11,32
1864         stvx            $inpperm,r10,$sp
1865         addi            r10,r10,32
1866         stvx            $inpperm,r11,$sp
1867         addi            r11,r11,32
1868         stvx            $inpperm,r10,$sp
1869         addi            r10,r10,32
1870         stvx            $inpperm,r11,$sp
1871         addi            r11,r11,32
1872
1873         mtspr           256,$vrsave
1874         lvx             v20,r10,$sp             # ABI says so
1875         addi            r10,r10,32
1876         lvx             v21,r11,$sp
1877         addi            r11,r11,32
1878         lvx             v22,r10,$sp
1879         addi            r10,r10,32
1880         lvx             v23,r11,$sp
1881         addi            r11,r11,32
1882         lvx             v24,r10,$sp
1883         addi            r10,r10,32
1884         lvx             v25,r11,$sp
1885         addi            r11,r11,32
1886         lvx             v26,r10,$sp
1887         addi            r10,r10,32
1888         lvx             v27,r11,$sp
1889         addi            r11,r11,32
1890         lvx             v28,r10,$sp
1891         addi            r10,r10,32
1892         lvx             v29,r11,$sp
1893         addi            r11,r11,32
1894         lvx             v30,r10,$sp
1895         lvx             v31,r11,$sp
1896         $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1897         $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1898         $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1899         $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1900         $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1901         $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1902         addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1903         blr
1904         .long           0
1905         .byte           0,12,0x04,0,0x80,6,6,0
1906         .long           0
1907 .size   .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
1908 ___
1909 }}      }}}
1910
1911 #########################################################################
1912 {{{     # XTS procedures                                                #
1913 # int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len,   #
1914 #                             const AES_KEY *key1, const AES_KEY *key2, #
1915 #                             [const] unsigned char iv[16]);            #
1916 # If $key2 is NULL, then a "tweak chaining" mode is engaged, in which   #
1917 # input tweak value is assumed to be encrypted already, and last tweak  #
1918 # value, one suitable for consecutive call on same chunk of data, is    #
1919 # written back to original buffer. In addition, in "tweak chaining"     #
1920 # mode only complete input blocks are processed.                        #
1921
1922 my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) =     map("r$_",(3..10));
1923 my ($rndkey0,$rndkey1,$inout) =                         map("v$_",(0..2));
1924 my ($output,$inptail,$inpperm,$leperm,$keyperm) =       map("v$_",(3..7));
1925 my ($tweak,$seven,$eighty7,$tmp,$tweak1) =              map("v$_",(8..12));
1926 my $taillen = $key2;
1927
1928    ($inp,$idx) = ($idx,$inp);                           # reassign
1929
1930 $code.=<<___;
1931 .globl  .${prefix}_xts_encrypt
1932 .align  5
1933 .${prefix}_xts_encrypt:
1934         mr              $inp,r3                         # reassign
1935         li              r3,-1
1936         ${UCMP}i        $len,16
1937         bltlr-
1938
1939         lis             r0,0xfff0
1940         mfspr           r12,256                         # save vrsave
1941         li              r11,0
1942         mtspr           256,r0
1943
1944         vspltisb        $seven,0x07                     # 0x070707..07
1945         le?lvsl         $leperm,r11,r11
1946         le?vspltisb     $tmp,0x0f
1947         le?vxor         $leperm,$leperm,$seven
1948
1949         li              $idx,15
1950         lvx             $tweak,0,$ivp                   # load [unaligned] iv
1951         lvsl            $inpperm,0,$ivp
1952         lvx             $inptail,$idx,$ivp
1953         le?vxor         $inpperm,$inpperm,$tmp
1954         vperm           $tweak,$tweak,$inptail,$inpperm
1955
1956         neg             r11,$inp
1957         lvsr            $inpperm,0,r11                  # prepare for unaligned load
1958         lvx             $inout,0,$inp
1959         addi            $inp,$inp,15                    # 15 is not typo
1960         le?vxor         $inpperm,$inpperm,$tmp
1961
1962         ${UCMP}i        $key2,0                         # key2==NULL?
1963         beq             Lxts_enc_no_key2
1964
1965         ?lvsl           $keyperm,0,$key2                # prepare for unaligned key
1966         lwz             $rounds,240($key2)
1967         srwi            $rounds,$rounds,1
1968         subi            $rounds,$rounds,1
1969         li              $idx,16
1970
1971         lvx             $rndkey0,0,$key2
1972         lvx             $rndkey1,$idx,$key2
1973         addi            $idx,$idx,16
1974         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
1975         vxor            $tweak,$tweak,$rndkey0
1976         lvx             $rndkey0,$idx,$key2
1977         addi            $idx,$idx,16
1978         mtctr           $rounds
1979
1980 Ltweak_xts_enc:
1981         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
1982         vcipher         $tweak,$tweak,$rndkey1
1983         lvx             $rndkey1,$idx,$key2
1984         addi            $idx,$idx,16
1985         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
1986         vcipher         $tweak,$tweak,$rndkey0
1987         lvx             $rndkey0,$idx,$key2
1988         addi            $idx,$idx,16
1989         bdnz            Ltweak_xts_enc
1990
1991         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
1992         vcipher         $tweak,$tweak,$rndkey1
1993         lvx             $rndkey1,$idx,$key2
1994         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
1995         vcipherlast     $tweak,$tweak,$rndkey0
1996
1997         li              $ivp,0                          # don't chain the tweak
1998         b               Lxts_enc
1999
2000 Lxts_enc_no_key2:
2001         li              $idx,-16
2002         and             $len,$len,$idx                  # in "tweak chaining"
2003                                                         # mode only complete
2004                                                         # blocks are processed
2005 Lxts_enc:
2006         lvx             $inptail,0,$inp
2007         addi            $inp,$inp,16
2008
2009         ?lvsl           $keyperm,0,$key1                # prepare for unaligned key
2010         lwz             $rounds,240($key1)
2011         srwi            $rounds,$rounds,1
2012         subi            $rounds,$rounds,1
2013         li              $idx,16
2014
2015         vslb            $eighty7,$seven,$seven          # 0x808080..80
2016         vor             $eighty7,$eighty7,$seven        # 0x878787..87
2017         vspltisb        $tmp,1                          # 0x010101..01
2018         vsldoi          $eighty7,$eighty7,$tmp,15       # 0x870101..01
2019
2020         ${UCMP}i        $len,96
2021         bge             _aesp8_xts_encrypt6x
2022
2023         andi.           $taillen,$len,15
2024         subic           r0,$len,32
2025         subi            $taillen,$taillen,16
2026         subfe           r0,r0,r0
2027         and             r0,r0,$taillen
2028         add             $inp,$inp,r0
2029
2030         lvx             $rndkey0,0,$key1
2031         lvx             $rndkey1,$idx,$key1
2032         addi            $idx,$idx,16
2033         vperm           $inout,$inout,$inptail,$inpperm
2034         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2035         vxor            $inout,$inout,$tweak
2036         vxor            $inout,$inout,$rndkey0
2037         lvx             $rndkey0,$idx,$key1
2038         addi            $idx,$idx,16
2039         mtctr           $rounds
2040         b               Loop_xts_enc
2041
2042 .align  5
2043 Loop_xts_enc:
2044         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2045         vcipher         $inout,$inout,$rndkey1
2046         lvx             $rndkey1,$idx,$key1
2047         addi            $idx,$idx,16
2048         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2049         vcipher         $inout,$inout,$rndkey0
2050         lvx             $rndkey0,$idx,$key1
2051         addi            $idx,$idx,16
2052         bdnz            Loop_xts_enc
2053
2054         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2055         vcipher         $inout,$inout,$rndkey1
2056         lvx             $rndkey1,$idx,$key1
2057         li              $idx,16
2058         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2059         vxor            $rndkey0,$rndkey0,$tweak
2060         vcipherlast     $output,$inout,$rndkey0
2061
2062         le?vperm        $tmp,$output,$output,$leperm
2063         be?nop
2064         le?stvx_u       $tmp,0,$out
2065         be?stvx_u       $output,0,$out
2066         addi            $out,$out,16
2067
2068         subic.          $len,$len,16
2069         beq             Lxts_enc_done
2070
2071         vmr             $inout,$inptail
2072         lvx             $inptail,0,$inp
2073         addi            $inp,$inp,16
2074         lvx             $rndkey0,0,$key1
2075         lvx             $rndkey1,$idx,$key1
2076         addi            $idx,$idx,16
2077
2078         subic           r0,$len,32
2079         subfe           r0,r0,r0
2080         and             r0,r0,$taillen
2081         add             $inp,$inp,r0
2082
2083         vsrab           $tmp,$tweak,$seven              # next tweak value
2084         vaddubm         $tweak,$tweak,$tweak
2085         vsldoi          $tmp,$tmp,$tmp,15
2086         vand            $tmp,$tmp,$eighty7
2087         vxor            $tweak,$tweak,$tmp
2088
2089         vperm           $inout,$inout,$inptail,$inpperm
2090         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2091         vxor            $inout,$inout,$tweak
2092         vxor            $output,$output,$rndkey0        # just in case $len<16
2093         vxor            $inout,$inout,$rndkey0
2094         lvx             $rndkey0,$idx,$key1
2095         addi            $idx,$idx,16
2096
2097         mtctr           $rounds
2098         ${UCMP}i        $len,16
2099         bge             Loop_xts_enc
2100
2101         vxor            $output,$output,$tweak
2102         lvsr            $inpperm,0,$len                 # $inpperm is no longer needed
2103         vxor            $inptail,$inptail,$inptail      # $inptail is no longer needed
2104         vspltisb        $tmp,-1
2105         vperm           $inptail,$inptail,$tmp,$inpperm
2106         vsel            $inout,$inout,$output,$inptail
2107
2108         subi            r11,$out,17
2109         subi            $out,$out,16
2110         mtctr           $len
2111         li              $len,16
2112 Loop_xts_enc_steal:
2113         lbzu            r0,1(r11)
2114         stb             r0,16(r11)
2115         bdnz            Loop_xts_enc_steal
2116
2117         mtctr           $rounds
2118         b               Loop_xts_enc                    # one more time...
2119
2120 Lxts_enc_done:
2121         ${UCMP}i        $ivp,0
2122         beq             Lxts_enc_ret
2123
2124         vsrab           $tmp,$tweak,$seven              # next tweak value
2125         vaddubm         $tweak,$tweak,$tweak
2126         vsldoi          $tmp,$tmp,$tmp,15
2127         vand            $tmp,$tmp,$eighty7
2128         vxor            $tweak,$tweak,$tmp
2129
2130         le?vperm        $tweak,$tweak,$tweak,$leperm
2131         stvx_u          $tweak,0,$ivp
2132
2133 Lxts_enc_ret:
2134         mtspr           256,r12                         # restore vrsave
2135         li              r3,0
2136         blr
2137         .long           0
2138         .byte           0,12,0x04,0,0x80,6,6,0
2139         .long           0
2140 .size   .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
2141
2142 .globl  .${prefix}_xts_decrypt
2143 .align  5
2144 .${prefix}_xts_decrypt:
2145         mr              $inp,r3                         # reassign
2146         li              r3,-1
2147         ${UCMP}i        $len,16
2148         bltlr-
2149
2150         lis             r0,0xfff8
2151         mfspr           r12,256                         # save vrsave
2152         li              r11,0
2153         mtspr           256,r0
2154
2155         andi.           r0,$len,15
2156         neg             r0,r0
2157         andi.           r0,r0,16
2158         sub             $len,$len,r0
2159
2160         vspltisb        $seven,0x07                     # 0x070707..07
2161         le?lvsl         $leperm,r11,r11
2162         le?vspltisb     $tmp,0x0f
2163         le?vxor         $leperm,$leperm,$seven
2164
2165         li              $idx,15
2166         lvx             $tweak,0,$ivp                   # load [unaligned] iv
2167         lvsl            $inpperm,0,$ivp
2168         lvx             $inptail,$idx,$ivp
2169         le?vxor         $inpperm,$inpperm,$tmp
2170         vperm           $tweak,$tweak,$inptail,$inpperm
2171
2172         neg             r11,$inp
2173         lvsr            $inpperm,0,r11                  # prepare for unaligned load
2174         lvx             $inout,0,$inp
2175         addi            $inp,$inp,15                    # 15 is not typo
2176         le?vxor         $inpperm,$inpperm,$tmp
2177
2178         ${UCMP}i        $key2,0                         # key2==NULL?
2179         beq             Lxts_dec_no_key2
2180
2181         ?lvsl           $keyperm,0,$key2                # prepare for unaligned key
2182         lwz             $rounds,240($key2)
2183         srwi            $rounds,$rounds,1
2184         subi            $rounds,$rounds,1
2185         li              $idx,16
2186
2187         lvx             $rndkey0,0,$key2
2188         lvx             $rndkey1,$idx,$key2
2189         addi            $idx,$idx,16
2190         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2191         vxor            $tweak,$tweak,$rndkey0
2192         lvx             $rndkey0,$idx,$key2
2193         addi            $idx,$idx,16
2194         mtctr           $rounds
2195
2196 Ltweak_xts_dec:
2197         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2198         vcipher         $tweak,$tweak,$rndkey1
2199         lvx             $rndkey1,$idx,$key2
2200         addi            $idx,$idx,16
2201         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2202         vcipher         $tweak,$tweak,$rndkey0
2203         lvx             $rndkey0,$idx,$key2
2204         addi            $idx,$idx,16
2205         bdnz            Ltweak_xts_dec
2206
2207         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2208         vcipher         $tweak,$tweak,$rndkey1
2209         lvx             $rndkey1,$idx,$key2
2210         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2211         vcipherlast     $tweak,$tweak,$rndkey0
2212
2213         li              $ivp,0                          # don't chain the tweak
2214         b               Lxts_dec
2215
2216 Lxts_dec_no_key2:
2217         neg             $idx,$len
2218         andi.           $idx,$idx,15
2219         add             $len,$len,$idx                  # in "tweak chaining"
2220                                                         # mode only complete
2221                                                         # blocks are processed
2222 Lxts_dec:
2223         lvx             $inptail,0,$inp
2224         addi            $inp,$inp,16
2225
2226         ?lvsl           $keyperm,0,$key1                # prepare for unaligned key
2227         lwz             $rounds,240($key1)
2228         srwi            $rounds,$rounds,1
2229         subi            $rounds,$rounds,1
2230         li              $idx,16
2231
2232         vslb            $eighty7,$seven,$seven          # 0x808080..80
2233         vor             $eighty7,$eighty7,$seven        # 0x878787..87
2234         vspltisb        $tmp,1                          # 0x010101..01
2235         vsldoi          $eighty7,$eighty7,$tmp,15       # 0x870101..01
2236
2237         ${UCMP}i        $len,96
2238         bge             _aesp8_xts_decrypt6x
2239
2240         lvx             $rndkey0,0,$key1
2241         lvx             $rndkey1,$idx,$key1
2242         addi            $idx,$idx,16
2243         vperm           $inout,$inout,$inptail,$inpperm
2244         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2245         vxor            $inout,$inout,$tweak
2246         vxor            $inout,$inout,$rndkey0
2247         lvx             $rndkey0,$idx,$key1
2248         addi            $idx,$idx,16
2249         mtctr           $rounds
2250
2251         ${UCMP}i        $len,16
2252         blt             Ltail_xts_dec
2253         be?b            Loop_xts_dec
2254
2255 .align  5
2256 Loop_xts_dec:
2257         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2258         vncipher        $inout,$inout,$rndkey1
2259         lvx             $rndkey1,$idx,$key1
2260         addi            $idx,$idx,16
2261         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2262         vncipher        $inout,$inout,$rndkey0
2263         lvx             $rndkey0,$idx,$key1
2264         addi            $idx,$idx,16
2265         bdnz            Loop_xts_dec
2266
2267         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2268         vncipher        $inout,$inout,$rndkey1
2269         lvx             $rndkey1,$idx,$key1
2270         li              $idx,16
2271         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2272         vxor            $rndkey0,$rndkey0,$tweak
2273         vncipherlast    $output,$inout,$rndkey0
2274
2275         le?vperm        $tmp,$output,$output,$leperm
2276         be?nop
2277         le?stvx_u       $tmp,0,$out
2278         be?stvx_u       $output,0,$out
2279         addi            $out,$out,16
2280
2281         subic.          $len,$len,16
2282         beq             Lxts_dec_done
2283
2284         vmr             $inout,$inptail
2285         lvx             $inptail,0,$inp
2286         addi            $inp,$inp,16
2287         lvx             $rndkey0,0,$key1
2288         lvx             $rndkey1,$idx,$key1
2289         addi            $idx,$idx,16
2290
2291         vsrab           $tmp,$tweak,$seven              # next tweak value
2292         vaddubm         $tweak,$tweak,$tweak
2293         vsldoi          $tmp,$tmp,$tmp,15
2294         vand            $tmp,$tmp,$eighty7
2295         vxor            $tweak,$tweak,$tmp
2296
2297         vperm           $inout,$inout,$inptail,$inpperm
2298         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2299         vxor            $inout,$inout,$tweak
2300         vxor            $inout,$inout,$rndkey0
2301         lvx             $rndkey0,$idx,$key1
2302         addi            $idx,$idx,16
2303
2304         mtctr           $rounds
2305         ${UCMP}i        $len,16
2306         bge             Loop_xts_dec
2307
2308 Ltail_xts_dec:
2309         vsrab           $tmp,$tweak,$seven              # next tweak value
2310         vaddubm         $tweak1,$tweak,$tweak
2311         vsldoi          $tmp,$tmp,$tmp,15
2312         vand            $tmp,$tmp,$eighty7
2313         vxor            $tweak1,$tweak1,$tmp
2314
2315         subi            $inp,$inp,16
2316         add             $inp,$inp,$len
2317
2318         vxor            $inout,$inout,$tweak            # :-(
2319         vxor            $inout,$inout,$tweak1           # :-)
2320
2321 Loop_xts_dec_short:
2322         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2323         vncipher        $inout,$inout,$rndkey1
2324         lvx             $rndkey1,$idx,$key1
2325         addi            $idx,$idx,16
2326         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2327         vncipher        $inout,$inout,$rndkey0
2328         lvx             $rndkey0,$idx,$key1
2329         addi            $idx,$idx,16
2330         bdnz            Loop_xts_dec_short
2331
2332         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2333         vncipher        $inout,$inout,$rndkey1
2334         lvx             $rndkey1,$idx,$key1
2335         li              $idx,16
2336         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2337         vxor            $rndkey0,$rndkey0,$tweak1
2338         vncipherlast    $output,$inout,$rndkey0
2339
2340         le?vperm        $tmp,$output,$output,$leperm
2341         be?nop
2342         le?stvx_u       $tmp,0,$out
2343         be?stvx_u       $output,0,$out
2344
2345         vmr             $inout,$inptail
2346         lvx             $inptail,0,$inp
2347         #addi           $inp,$inp,16
2348         lvx             $rndkey0,0,$key1
2349         lvx             $rndkey1,$idx,$key1
2350         addi            $idx,$idx,16
2351         vperm           $inout,$inout,$inptail,$inpperm
2352         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2353
2354         lvsr            $inpperm,0,$len                 # $inpperm is no longer needed
2355         vxor            $inptail,$inptail,$inptail      # $inptail is no longer needed
2356         vspltisb        $tmp,-1
2357         vperm           $inptail,$inptail,$tmp,$inpperm
2358         vsel            $inout,$inout,$output,$inptail
2359
2360         vxor            $rndkey0,$rndkey0,$tweak
2361         vxor            $inout,$inout,$rndkey0
2362         lvx             $rndkey0,$idx,$key1
2363         addi            $idx,$idx,16
2364
2365         subi            r11,$out,1
2366         mtctr           $len
2367         li              $len,16
2368 Loop_xts_dec_steal:
2369         lbzu            r0,1(r11)
2370         stb             r0,16(r11)
2371         bdnz            Loop_xts_dec_steal
2372
2373         mtctr           $rounds
2374         b               Loop_xts_dec                    # one more time...
2375
2376 Lxts_dec_done:
2377         ${UCMP}i        $ivp,0
2378         beq             Lxts_dec_ret
2379
2380         vsrab           $tmp,$tweak,$seven              # next tweak value
2381         vaddubm         $tweak,$tweak,$tweak
2382         vsldoi          $tmp,$tmp,$tmp,15
2383         vand            $tmp,$tmp,$eighty7
2384         vxor            $tweak,$tweak,$tmp
2385
2386         le?vperm        $tweak,$tweak,$tweak,$leperm
2387         stvx_u          $tweak,0,$ivp
2388
2389 Lxts_dec_ret:
2390         mtspr           256,r12                         # restore vrsave
2391         li              r3,0
2392         blr
2393         .long           0
2394         .byte           0,12,0x04,0,0x80,6,6,0
2395         .long           0
2396 .size   .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
2397 ___
2398 #########################################################################
2399 {{      # Optimized XTS procedures                                      #
2400 my $key_=$key2;
2401 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
2402     $x00=0 if ($flavour =~ /osx/);
2403 my ($in0,  $in1,  $in2,  $in3,  $in4,  $in5 )=map("v$_",(0..5));
2404 my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
2405 my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
2406 my $rndkey0="v23";      # v24-v25 rotating buffer for first found keys
2407                         # v26-v31 last 6 round keys
2408 my ($keyperm)=($out0);  # aliases with "caller", redundant assignment
2409 my $taillen=$x70;
2410
2411 $code.=<<___;
2412 .align  5
2413 _aesp8_xts_encrypt6x:
2414         $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
2415         mflr            r11
2416         li              r7,`$FRAME+8*16+15`
2417         li              r3,`$FRAME+8*16+31`
2418         $PUSH           r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
2419         stvx            v20,r7,$sp              # ABI says so
2420         addi            r7,r7,32
2421         stvx            v21,r3,$sp
2422         addi            r3,r3,32
2423         stvx            v22,r7,$sp
2424         addi            r7,r7,32
2425         stvx            v23,r3,$sp
2426         addi            r3,r3,32
2427         stvx            v24,r7,$sp
2428         addi            r7,r7,32
2429         stvx            v25,r3,$sp
2430         addi            r3,r3,32
2431         stvx            v26,r7,$sp
2432         addi            r7,r7,32
2433         stvx            v27,r3,$sp
2434         addi            r3,r3,32
2435         stvx            v28,r7,$sp
2436         addi            r7,r7,32
2437         stvx            v29,r3,$sp
2438         addi            r3,r3,32
2439         stvx            v30,r7,$sp
2440         stvx            v31,r3,$sp
2441         li              r0,-1
2442         stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
2443         li              $x10,0x10
2444         $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2445         li              $x20,0x20
2446         $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2447         li              $x30,0x30
2448         $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2449         li              $x40,0x40
2450         $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2451         li              $x50,0x50
2452         $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2453         li              $x60,0x60
2454         $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2455         li              $x70,0x70
2456         mtspr           256,r0
2457
2458         subi            $rounds,$rounds,3       # -4 in total
2459
2460         lvx             $rndkey0,$x00,$key1     # load key schedule
2461         lvx             v30,$x10,$key1
2462         addi            $key1,$key1,0x20
2463         lvx             v31,$x00,$key1
2464         ?vperm          $rndkey0,$rndkey0,v30,$keyperm
2465         addi            $key_,$sp,$FRAME+15
2466         mtctr           $rounds
2467
2468 Load_xts_enc_key:
2469         ?vperm          v24,v30,v31,$keyperm
2470         lvx             v30,$x10,$key1
2471         addi            $key1,$key1,0x20
2472         stvx            v24,$x00,$key_          # off-load round[1]
2473         ?vperm          v25,v31,v30,$keyperm
2474         lvx             v31,$x00,$key1
2475         stvx            v25,$x10,$key_          # off-load round[2]
2476         addi            $key_,$key_,0x20
2477         bdnz            Load_xts_enc_key
2478
2479         lvx             v26,$x10,$key1
2480         ?vperm          v24,v30,v31,$keyperm
2481         lvx             v27,$x20,$key1
2482         stvx            v24,$x00,$key_          # off-load round[3]
2483         ?vperm          v25,v31,v26,$keyperm
2484         lvx             v28,$x30,$key1
2485         stvx            v25,$x10,$key_          # off-load round[4]
2486         addi            $key_,$sp,$FRAME+15     # rewind $key_
2487         ?vperm          v26,v26,v27,$keyperm
2488         lvx             v29,$x40,$key1
2489         ?vperm          v27,v27,v28,$keyperm
2490         lvx             v30,$x50,$key1
2491         ?vperm          v28,v28,v29,$keyperm
2492         lvx             v31,$x60,$key1
2493         ?vperm          v29,v29,v30,$keyperm
2494         lvx             $twk5,$x70,$key1        # borrow $twk5
2495         ?vperm          v30,v30,v31,$keyperm
2496         lvx             v24,$x00,$key_          # pre-load round[1]
2497         ?vperm          v31,v31,$twk5,$keyperm
2498         lvx             v25,$x10,$key_          # pre-load round[2]
2499
2500          vperm          $in0,$inout,$inptail,$inpperm
2501          subi           $inp,$inp,31            # undo "caller"
2502         vxor            $twk0,$tweak,$rndkey0
2503         vsrab           $tmp,$tweak,$seven      # next tweak value
2504         vaddubm         $tweak,$tweak,$tweak
2505         vsldoi          $tmp,$tmp,$tmp,15
2506         vand            $tmp,$tmp,$eighty7
2507          vxor           $out0,$in0,$twk0
2508         vxor            $tweak,$tweak,$tmp
2509
2510          lvx_u          $in1,$x10,$inp
2511         vxor            $twk1,$tweak,$rndkey0
2512         vsrab           $tmp,$tweak,$seven      # next tweak value
2513         vaddubm         $tweak,$tweak,$tweak
2514         vsldoi          $tmp,$tmp,$tmp,15
2515          le?vperm       $in1,$in1,$in1,$leperm
2516         vand            $tmp,$tmp,$eighty7
2517          vxor           $out1,$in1,$twk1
2518         vxor            $tweak,$tweak,$tmp
2519
2520          lvx_u          $in2,$x20,$inp
2521          andi.          $taillen,$len,15
2522         vxor            $twk2,$tweak,$rndkey0
2523         vsrab           $tmp,$tweak,$seven      # next tweak value
2524         vaddubm         $tweak,$tweak,$tweak
2525         vsldoi          $tmp,$tmp,$tmp,15
2526          le?vperm       $in2,$in2,$in2,$leperm
2527         vand            $tmp,$tmp,$eighty7
2528          vxor           $out2,$in2,$twk2
2529         vxor            $tweak,$tweak,$tmp
2530
2531          lvx_u          $in3,$x30,$inp
2532          sub            $len,$len,$taillen
2533         vxor            $twk3,$tweak,$rndkey0
2534         vsrab           $tmp,$tweak,$seven      # next tweak value
2535         vaddubm         $tweak,$tweak,$tweak
2536         vsldoi          $tmp,$tmp,$tmp,15
2537          le?vperm       $in3,$in3,$in3,$leperm
2538         vand            $tmp,$tmp,$eighty7
2539          vxor           $out3,$in3,$twk3
2540         vxor            $tweak,$tweak,$tmp
2541
2542          lvx_u          $in4,$x40,$inp
2543          subi           $len,$len,0x60
2544         vxor            $twk4,$tweak,$rndkey0
2545         vsrab           $tmp,$tweak,$seven      # next tweak value
2546         vaddubm         $tweak,$tweak,$tweak
2547         vsldoi          $tmp,$tmp,$tmp,15
2548          le?vperm       $in4,$in4,$in4,$leperm
2549         vand            $tmp,$tmp,$eighty7
2550          vxor           $out4,$in4,$twk4
2551         vxor            $tweak,$tweak,$tmp
2552
2553          lvx_u          $in5,$x50,$inp
2554          addi           $inp,$inp,0x60
2555         vxor            $twk5,$tweak,$rndkey0
2556         vsrab           $tmp,$tweak,$seven      # next tweak value
2557         vaddubm         $tweak,$tweak,$tweak
2558         vsldoi          $tmp,$tmp,$tmp,15
2559          le?vperm       $in5,$in5,$in5,$leperm
2560         vand            $tmp,$tmp,$eighty7
2561          vxor           $out5,$in5,$twk5
2562         vxor            $tweak,$tweak,$tmp
2563
2564         vxor            v31,v31,$rndkey0
2565         mtctr           $rounds
2566         b               Loop_xts_enc6x
2567
2568 .align  5
2569 Loop_xts_enc6x:
2570         vcipher         $out0,$out0,v24
2571         vcipher         $out1,$out1,v24
2572         vcipher         $out2,$out2,v24
2573         vcipher         $out3,$out3,v24
2574         vcipher         $out4,$out4,v24
2575         vcipher         $out5,$out5,v24
2576         lvx             v24,$x20,$key_          # round[3]
2577         addi            $key_,$key_,0x20
2578
2579         vcipher         $out0,$out0,v25
2580         vcipher         $out1,$out1,v25
2581         vcipher         $out2,$out2,v25
2582         vcipher         $out3,$out3,v25
2583         vcipher         $out4,$out4,v25
2584         vcipher         $out5,$out5,v25
2585         lvx             v25,$x10,$key_          # round[4]
2586         bdnz            Loop_xts_enc6x
2587
2588         subic           $len,$len,96            # $len-=96
2589          vxor           $in0,$twk0,v31          # xor with last round key
2590         vcipher         $out0,$out0,v24
2591         vcipher         $out1,$out1,v24
2592          vsrab          $tmp,$tweak,$seven      # next tweak value
2593          vxor           $twk0,$tweak,$rndkey0
2594          vaddubm        $tweak,$tweak,$tweak
2595         vcipher         $out2,$out2,v24
2596         vcipher         $out3,$out3,v24
2597          vsldoi         $tmp,$tmp,$tmp,15
2598         vcipher         $out4,$out4,v24
2599         vcipher         $out5,$out5,v24
2600
2601         subfe.          r0,r0,r0                # borrow?-1:0
2602          vand           $tmp,$tmp,$eighty7
2603         vcipher         $out0,$out0,v25
2604         vcipher         $out1,$out1,v25
2605          vxor           $tweak,$tweak,$tmp
2606         vcipher         $out2,$out2,v25
2607         vcipher         $out3,$out3,v25
2608          vxor           $in1,$twk1,v31
2609          vsrab          $tmp,$tweak,$seven      # next tweak value
2610          vxor           $twk1,$tweak,$rndkey0
2611         vcipher         $out4,$out4,v25
2612         vcipher         $out5,$out5,v25
2613
2614         and             r0,r0,$len
2615          vaddubm        $tweak,$tweak,$tweak
2616          vsldoi         $tmp,$tmp,$tmp,15
2617         vcipher         $out0,$out0,v26
2618         vcipher         $out1,$out1,v26
2619          vand           $tmp,$tmp,$eighty7
2620         vcipher         $out2,$out2,v26
2621         vcipher         $out3,$out3,v26
2622          vxor           $tweak,$tweak,$tmp
2623         vcipher         $out4,$out4,v26
2624         vcipher         $out5,$out5,v26
2625
2626         add             $inp,$inp,r0            # $inp is adjusted in such
2627                                                 # way that at exit from the
2628                                                 # loop inX-in5 are loaded
2629                                                 # with last "words"
2630          vxor           $in2,$twk2,v31
2631          vsrab          $tmp,$tweak,$seven      # next tweak value
2632          vxor           $twk2,$tweak,$rndkey0
2633          vaddubm        $tweak,$tweak,$tweak
2634         vcipher         $out0,$out0,v27
2635         vcipher         $out1,$out1,v27
2636          vsldoi         $tmp,$tmp,$tmp,15
2637         vcipher         $out2,$out2,v27
2638         vcipher         $out3,$out3,v27
2639          vand           $tmp,$tmp,$eighty7
2640         vcipher         $out4,$out4,v27
2641         vcipher         $out5,$out5,v27
2642
2643         addi            $key_,$sp,$FRAME+15     # rewind $key_
2644          vxor           $tweak,$tweak,$tmp
2645         vcipher         $out0,$out0,v28
2646         vcipher         $out1,$out1,v28
2647          vxor           $in3,$twk3,v31
2648          vsrab          $tmp,$tweak,$seven      # next tweak value
2649          vxor           $twk3,$tweak,$rndkey0
2650         vcipher         $out2,$out2,v28
2651         vcipher         $out3,$out3,v28
2652          vaddubm        $tweak,$tweak,$tweak
2653          vsldoi         $tmp,$tmp,$tmp,15
2654         vcipher         $out4,$out4,v28
2655         vcipher         $out5,$out5,v28
2656         lvx             v24,$x00,$key_          # re-pre-load round[1]
2657          vand           $tmp,$tmp,$eighty7
2658
2659         vcipher         $out0,$out0,v29
2660         vcipher         $out1,$out1,v29
2661          vxor           $tweak,$tweak,$tmp
2662         vcipher         $out2,$out2,v29
2663         vcipher         $out3,$out3,v29
2664          vxor           $in4,$twk4,v31
2665          vsrab          $tmp,$tweak,$seven      # next tweak value
2666          vxor           $twk4,$tweak,$rndkey0
2667         vcipher         $out4,$out4,v29
2668         vcipher         $out5,$out5,v29
2669         lvx             v25,$x10,$key_          # re-pre-load round[2]
2670          vaddubm        $tweak,$tweak,$tweak
2671          vsldoi         $tmp,$tmp,$tmp,15
2672
2673         vcipher         $out0,$out0,v30
2674         vcipher         $out1,$out1,v30
2675          vand           $tmp,$tmp,$eighty7
2676         vcipher         $out2,$out2,v30
2677         vcipher         $out3,$out3,v30
2678          vxor           $tweak,$tweak,$tmp
2679         vcipher         $out4,$out4,v30
2680         vcipher         $out5,$out5,v30
2681          vxor           $in5,$twk5,v31
2682          vsrab          $tmp,$tweak,$seven      # next tweak value
2683          vxor           $twk5,$tweak,$rndkey0
2684
2685         vcipherlast     $out0,$out0,$in0
2686          lvx_u          $in0,$x00,$inp          # load next input block
2687          vaddubm        $tweak,$tweak,$tweak
2688          vsldoi         $tmp,$tmp,$tmp,15
2689         vcipherlast     $out1,$out1,$in1
2690          lvx_u          $in1,$x10,$inp
2691         vcipherlast     $out2,$out2,$in2
2692          le?vperm       $in0,$in0,$in0,$leperm
2693          lvx_u          $in2,$x20,$inp
2694          vand           $tmp,$tmp,$eighty7
2695         vcipherlast     $out3,$out3,$in3
2696          le?vperm       $in1,$in1,$in1,$leperm
2697          lvx_u          $in3,$x30,$inp
2698         vcipherlast     $out4,$out4,$in4
2699          le?vperm       $in2,$in2,$in2,$leperm
2700          lvx_u          $in4,$x40,$inp
2701          vxor           $tweak,$tweak,$tmp
2702         vcipherlast     $tmp,$out5,$in5         # last block might be needed
2703                                                 # in stealing mode
2704          le?vperm       $in3,$in3,$in3,$leperm
2705          lvx_u          $in5,$x50,$inp
2706          addi           $inp,$inp,0x60
2707          le?vperm       $in4,$in4,$in4,$leperm
2708          le?vperm       $in5,$in5,$in5,$leperm
2709
2710         le?vperm        $out0,$out0,$out0,$leperm
2711         le?vperm        $out1,$out1,$out1,$leperm
2712         stvx_u          $out0,$x00,$out         # store output
2713          vxor           $out0,$in0,$twk0
2714         le?vperm        $out2,$out2,$out2,$leperm
2715         stvx_u          $out1,$x10,$out
2716          vxor           $out1,$in1,$twk1
2717         le?vperm        $out3,$out3,$out3,$leperm
2718         stvx_u          $out2,$x20,$out
2719          vxor           $out2,$in2,$twk2
2720         le?vperm        $out4,$out4,$out4,$leperm
2721         stvx_u          $out3,$x30,$out
2722          vxor           $out3,$in3,$twk3
2723         le?vperm        $out5,$tmp,$tmp,$leperm
2724         stvx_u          $out4,$x40,$out
2725          vxor           $out4,$in4,$twk4
2726         le?stvx_u       $out5,$x50,$out
2727         be?stvx_u       $tmp, $x50,$out
2728          vxor           $out5,$in5,$twk5
2729         addi            $out,$out,0x60
2730
2731         mtctr           $rounds
2732         beq             Loop_xts_enc6x          # did $len-=96 borrow?
2733
2734         addic.          $len,$len,0x60
2735         beq             Lxts_enc6x_zero
2736         cmpwi           $len,0x20
2737         blt             Lxts_enc6x_one
2738         nop
2739         beq             Lxts_enc6x_two
2740         cmpwi           $len,0x40
2741         blt             Lxts_enc6x_three
2742         nop
2743         beq             Lxts_enc6x_four
2744
2745 Lxts_enc6x_five:
2746         vxor            $out0,$in1,$twk0
2747         vxor            $out1,$in2,$twk1
2748         vxor            $out2,$in3,$twk2
2749         vxor            $out3,$in4,$twk3
2750         vxor            $out4,$in5,$twk4
2751
2752         bl              _aesp8_xts_enc5x
2753
2754         le?vperm        $out0,$out0,$out0,$leperm
2755         vmr             $twk0,$twk5             # unused tweak
2756         le?vperm        $out1,$out1,$out1,$leperm
2757         stvx_u          $out0,$x00,$out         # store output
2758         le?vperm        $out2,$out2,$out2,$leperm
2759         stvx_u          $out1,$x10,$out
2760         le?vperm        $out3,$out3,$out3,$leperm
2761         stvx_u          $out2,$x20,$out
2762         vxor            $tmp,$out4,$twk5        # last block prep for stealing
2763         le?vperm        $out4,$out4,$out4,$leperm
2764         stvx_u          $out3,$x30,$out
2765         stvx_u          $out4,$x40,$out
2766         addi            $out,$out,0x50
2767         bne             Lxts_enc6x_steal
2768         b               Lxts_enc6x_done
2769
2770 .align  4
2771 Lxts_enc6x_four:
2772         vxor            $out0,$in2,$twk0
2773         vxor            $out1,$in3,$twk1
2774         vxor            $out2,$in4,$twk2
2775         vxor            $out3,$in5,$twk3
2776         vxor            $out4,$out4,$out4
2777
2778         bl              _aesp8_xts_enc5x
2779
2780         le?vperm        $out0,$out0,$out0,$leperm
2781         vmr             $twk0,$twk4             # unused tweak
2782         le?vperm        $out1,$out1,$out1,$leperm
2783         stvx_u          $out0,$x00,$out         # store output
2784         le?vperm        $out2,$out2,$out2,$leperm
2785         stvx_u          $out1,$x10,$out
2786         vxor            $tmp,$out3,$twk4        # last block prep for stealing
2787         le?vperm        $out3,$out3,$out3,$leperm
2788         stvx_u          $out2,$x20,$out
2789         stvx_u          $out3,$x30,$out
2790         addi            $out,$out,0x40
2791         bne             Lxts_enc6x_steal
2792         b               Lxts_enc6x_done
2793
2794 .align  4
2795 Lxts_enc6x_three:
2796         vxor            $out0,$in3,$twk0
2797         vxor            $out1,$in4,$twk1
2798         vxor            $out2,$in5,$twk2
2799         vxor            $out3,$out3,$out3
2800         vxor            $out4,$out4,$out4
2801
2802         bl              _aesp8_xts_enc5x
2803
2804         le?vperm        $out0,$out0,$out0,$leperm
2805         vmr             $twk0,$twk3             # unused tweak
2806         le?vperm        $out1,$out1,$out1,$leperm
2807         stvx_u          $out0,$x00,$out         # store output
2808         vxor            $tmp,$out2,$twk3        # last block prep for stealing
2809         le?vperm        $out2,$out2,$out2,$leperm
2810         stvx_u          $out1,$x10,$out
2811         stvx_u          $out2,$x20,$out
2812         addi            $out,$out,0x30
2813         bne             Lxts_enc6x_steal
2814         b               Lxts_enc6x_done
2815
2816 .align  4
2817 Lxts_enc6x_two:
2818         vxor            $out0,$in4,$twk0
2819         vxor            $out1,$in5,$twk1
2820         vxor            $out2,$out2,$out2
2821         vxor            $out3,$out3,$out3
2822         vxor            $out4,$out4,$out4
2823
2824         bl              _aesp8_xts_enc5x
2825
2826         le?vperm        $out0,$out0,$out0,$leperm
2827         vmr             $twk0,$twk2             # unused tweak
2828         vxor            $tmp,$out1,$twk2        # last block prep for stealing
2829         le?vperm        $out1,$out1,$out1,$leperm
2830         stvx_u          $out0,$x00,$out         # store output
2831         stvx_u          $out1,$x10,$out
2832         addi            $out,$out,0x20
2833         bne             Lxts_enc6x_steal
2834         b               Lxts_enc6x_done
2835
2836 .align  4
2837 Lxts_enc6x_one:
2838         vxor            $out0,$in5,$twk0
2839         nop
2840 Loop_xts_enc1x:
2841         vcipher         $out0,$out0,v24
2842         lvx             v24,$x20,$key_          # round[3]
2843         addi            $key_,$key_,0x20
2844
2845         vcipher         $out0,$out0,v25
2846         lvx             v25,$x10,$key_          # round[4]
2847         bdnz            Loop_xts_enc1x
2848
2849         add             $inp,$inp,$taillen
2850         cmpwi           $taillen,0
2851         vcipher         $out0,$out0,v24
2852
2853         subi            $inp,$inp,16
2854         vcipher         $out0,$out0,v25
2855
2856         lvsr            $inpperm,0,$taillen
2857         vcipher         $out0,$out0,v26
2858
2859         lvx_u           $in0,0,$inp
2860         vcipher         $out0,$out0,v27
2861
2862         addi            $key_,$sp,$FRAME+15     # rewind $key_
2863         vcipher         $out0,$out0,v28
2864         lvx             v24,$x00,$key_          # re-pre-load round[1]
2865
2866         vcipher         $out0,$out0,v29
2867         lvx             v25,$x10,$key_          # re-pre-load round[2]
2868          vxor           $twk0,$twk0,v31
2869
2870         le?vperm        $in0,$in0,$in0,$leperm
2871         vcipher         $out0,$out0,v30
2872
2873         vperm           $in0,$in0,$in0,$inpperm
2874         vcipherlast     $out0,$out0,$twk0
2875
2876         vmr             $twk0,$twk1             # unused tweak
2877         vxor            $tmp,$out0,$twk1        # last block prep for stealing
2878         le?vperm        $out0,$out0,$out0,$leperm
2879         stvx_u          $out0,$x00,$out         # store output
2880         addi            $out,$out,0x10
2881         bne             Lxts_enc6x_steal
2882         b               Lxts_enc6x_done
2883
2884 .align  4
2885 Lxts_enc6x_zero:
2886         cmpwi           $taillen,0
2887         beq             Lxts_enc6x_done
2888
2889         add             $inp,$inp,$taillen
2890         subi            $inp,$inp,16
2891         lvx_u           $in0,0,$inp
2892         lvsr            $inpperm,0,$taillen     # $in5 is no more
2893         le?vperm        $in0,$in0,$in0,$leperm
2894         vperm           $in0,$in0,$in0,$inpperm
2895         vxor            $tmp,$tmp,$twk0
2896 Lxts_enc6x_steal:
2897         vxor            $in0,$in0,$twk0
2898         vxor            $out0,$out0,$out0
2899         vspltisb        $out1,-1
2900         vperm           $out0,$out0,$out1,$inpperm
2901         vsel            $out0,$in0,$tmp,$out0   # $tmp is last block, remember?
2902
2903         subi            r30,$out,17
2904         subi            $out,$out,16
2905         mtctr           $taillen
2906 Loop_xts_enc6x_steal:
2907         lbzu            r0,1(r30)
2908         stb             r0,16(r30)
2909         bdnz            Loop_xts_enc6x_steal
2910
2911         li              $taillen,0
2912         mtctr           $rounds
2913         b               Loop_xts_enc1x          # one more time...
2914
2915 .align  4
2916 Lxts_enc6x_done:
2917         ${UCMP}i        $ivp,0
2918         beq             Lxts_enc6x_ret
2919
2920         vxor            $tweak,$twk0,$rndkey0
2921         le?vperm        $tweak,$tweak,$tweak,$leperm
2922         stvx_u          $tweak,0,$ivp
2923
2924 Lxts_enc6x_ret:
2925         mtlr            r11
2926         li              r10,`$FRAME+15`
2927         li              r11,`$FRAME+31`
2928         stvx            $seven,r10,$sp          # wipe copies of round keys
2929         addi            r10,r10,32
2930         stvx            $seven,r11,$sp
2931         addi            r11,r11,32
2932         stvx            $seven,r10,$sp
2933         addi            r10,r10,32
2934         stvx            $seven,r11,$sp
2935         addi            r11,r11,32
2936         stvx            $seven,r10,$sp
2937         addi            r10,r10,32
2938         stvx            $seven,r11,$sp
2939         addi            r11,r11,32
2940         stvx            $seven,r10,$sp
2941         addi            r10,r10,32
2942         stvx            $seven,r11,$sp
2943         addi            r11,r11,32
2944
2945         mtspr           256,$vrsave
2946         lvx             v20,r10,$sp             # ABI says so
2947         addi            r10,r10,32
2948         lvx             v21,r11,$sp
2949         addi            r11,r11,32
2950         lvx             v22,r10,$sp
2951         addi            r10,r10,32
2952         lvx             v23,r11,$sp
2953         addi            r11,r11,32
2954         lvx             v24,r10,$sp
2955         addi            r10,r10,32
2956         lvx             v25,r11,$sp
2957         addi            r11,r11,32
2958         lvx             v26,r10,$sp
2959         addi            r10,r10,32
2960         lvx             v27,r11,$sp
2961         addi            r11,r11,32
2962         lvx             v28,r10,$sp
2963         addi            r10,r10,32
2964         lvx             v29,r11,$sp
2965         addi            r11,r11,32
2966         lvx             v30,r10,$sp
2967         lvx             v31,r11,$sp
2968         $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2969         $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2970         $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2971         $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2972         $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2973         $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2974         addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
2975         blr
2976         .long           0
2977         .byte           0,12,0x04,1,0x80,6,6,0
2978         .long           0
2979
2980 .align  5
2981 _aesp8_xts_enc5x:
2982         vcipher         $out0,$out0,v24
2983         vcipher         $out1,$out1,v24
2984         vcipher         $out2,$out2,v24
2985         vcipher         $out3,$out3,v24
2986         vcipher         $out4,$out4,v24
2987         lvx             v24,$x20,$key_          # round[3]
2988         addi            $key_,$key_,0x20
2989
2990         vcipher         $out0,$out0,v25
2991         vcipher         $out1,$out1,v25
2992         vcipher         $out2,$out2,v25
2993         vcipher         $out3,$out3,v25
2994         vcipher         $out4,$out4,v25
2995         lvx             v25,$x10,$key_          # round[4]
2996         bdnz            _aesp8_xts_enc5x
2997
2998         add             $inp,$inp,$taillen
2999         cmpwi           $taillen,0
3000         vcipher         $out0,$out0,v24
3001         vcipher         $out1,$out1,v24
3002         vcipher         $out2,$out2,v24
3003         vcipher         $out3,$out3,v24
3004         vcipher         $out4,$out4,v24
3005
3006         subi            $inp,$inp,16
3007         vcipher         $out0,$out0,v25
3008         vcipher         $out1,$out1,v25
3009         vcipher         $out2,$out2,v25
3010         vcipher         $out3,$out3,v25
3011         vcipher         $out4,$out4,v25
3012          vxor           $twk0,$twk0,v31
3013
3014         vcipher         $out0,$out0,v26
3015         lvsr            $inpperm,0,$taillen     # $in5 is no more
3016         vcipher         $out1,$out1,v26
3017         vcipher         $out2,$out2,v26
3018         vcipher         $out3,$out3,v26
3019         vcipher         $out4,$out4,v26
3020          vxor           $in1,$twk1,v31
3021
3022         vcipher         $out0,$out0,v27
3023         lvx_u           $in0,0,$inp
3024         vcipher         $out1,$out1,v27
3025         vcipher         $out2,$out2,v27
3026         vcipher         $out3,$out3,v27
3027         vcipher         $out4,$out4,v27
3028          vxor           $in2,$twk2,v31
3029
3030         addi            $key_,$sp,$FRAME+15     # rewind $key_
3031         vcipher         $out0,$out0,v28
3032         vcipher         $out1,$out1,v28
3033         vcipher         $out2,$out2,v28
3034         vcipher         $out3,$out3,v28
3035         vcipher         $out4,$out4,v28
3036         lvx             v24,$x00,$key_          # re-pre-load round[1]
3037          vxor           $in3,$twk3,v31
3038
3039         vcipher         $out0,$out0,v29
3040         le?vperm        $in0,$in0,$in0,$leperm
3041         vcipher         $out1,$out1,v29
3042         vcipher         $out2,$out2,v29
3043         vcipher         $out3,$out3,v29
3044         vcipher         $out4,$out4,v29
3045         lvx             v25,$x10,$key_          # re-pre-load round[2]
3046          vxor           $in4,$twk4,v31
3047
3048         vcipher         $out0,$out0,v30
3049         vperm           $in0,$in0,$in0,$inpperm
3050         vcipher         $out1,$out1,v30
3051         vcipher         $out2,$out2,v30
3052         vcipher         $out3,$out3,v30
3053         vcipher         $out4,$out4,v30
3054
3055         vcipherlast     $out0,$out0,$twk0
3056         vcipherlast     $out1,$out1,$in1
3057         vcipherlast     $out2,$out2,$in2
3058         vcipherlast     $out3,$out3,$in3
3059         vcipherlast     $out4,$out4,$in4
3060         blr
3061         .long           0
3062         .byte           0,12,0x14,0,0,0,0,0
3063
3064 .align  5
3065 _aesp8_xts_decrypt6x:
3066         $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
3067         mflr            r11
3068         li              r7,`$FRAME+8*16+15`
3069         li              r3,`$FRAME+8*16+31`
3070         $PUSH           r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
3071         stvx            v20,r7,$sp              # ABI says so
3072         addi            r7,r7,32
3073         stvx            v21,r3,$sp
3074         addi            r3,r3,32
3075         stvx            v22,r7,$sp
3076         addi            r7,r7,32
3077         stvx            v23,r3,$sp
3078         addi            r3,r3,32
3079         stvx            v24,r7,$sp
3080         addi            r7,r7,32
3081         stvx            v25,r3,$sp
3082         addi            r3,r3,32
3083         stvx            v26,r7,$sp
3084         addi            r7,r7,32
3085         stvx            v27,r3,$sp
3086         addi            r3,r3,32
3087         stvx            v28,r7,$sp
3088         addi            r7,r7,32
3089         stvx            v29,r3,$sp
3090         addi            r3,r3,32
3091         stvx            v30,r7,$sp
3092         stvx            v31,r3,$sp
3093         li              r0,-1
3094         stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
3095         li              $x10,0x10
3096         $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3097         li              $x20,0x20
3098         $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3099         li              $x30,0x30
3100         $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3101         li              $x40,0x40
3102         $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3103         li              $x50,0x50
3104         $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3105         li              $x60,0x60
3106         $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3107         li              $x70,0x70
3108         mtspr           256,r0
3109
3110         subi            $rounds,$rounds,3       # -4 in total
3111
3112         lvx             $rndkey0,$x00,$key1     # load key schedule
3113         lvx             v30,$x10,$key1
3114         addi            $key1,$key1,0x20
3115         lvx             v31,$x00,$key1
3116         ?vperm          $rndkey0,$rndkey0,v30,$keyperm
3117         addi            $key_,$sp,$FRAME+15
3118         mtctr           $rounds
3119
3120 Load_xts_dec_key:
3121         ?vperm          v24,v30,v31,$keyperm
3122         lvx             v30,$x10,$key1
3123         addi            $key1,$key1,0x20
3124         stvx            v24,$x00,$key_          # off-load round[1]
3125         ?vperm          v25,v31,v30,$keyperm
3126         lvx             v31,$x00,$key1
3127         stvx            v25,$x10,$key_          # off-load round[2]
3128         addi            $key_,$key_,0x20
3129         bdnz            Load_xts_dec_key
3130
3131         lvx             v26,$x10,$key1
3132         ?vperm          v24,v30,v31,$keyperm
3133         lvx             v27,$x20,$key1
3134         stvx            v24,$x00,$key_          # off-load round[3]
3135         ?vperm          v25,v31,v26,$keyperm
3136         lvx             v28,$x30,$key1
3137         stvx            v25,$x10,$key_          # off-load round[4]
3138         addi            $key_,$sp,$FRAME+15     # rewind $key_
3139         ?vperm          v26,v26,v27,$keyperm
3140         lvx             v29,$x40,$key1
3141         ?vperm          v27,v27,v28,$keyperm
3142         lvx             v30,$x50,$key1
3143         ?vperm          v28,v28,v29,$keyperm
3144         lvx             v31,$x60,$key1
3145         ?vperm          v29,v29,v30,$keyperm
3146         lvx             $twk5,$x70,$key1        # borrow $twk5
3147         ?vperm          v30,v30,v31,$keyperm
3148         lvx             v24,$x00,$key_          # pre-load round[1]
3149         ?vperm          v31,v31,$twk5,$keyperm
3150         lvx             v25,$x10,$key_          # pre-load round[2]
3151
3152          vperm          $in0,$inout,$inptail,$inpperm
3153          subi           $inp,$inp,31            # undo "caller"
3154         vxor            $twk0,$tweak,$rndkey0
3155         vsrab           $tmp,$tweak,$seven      # next tweak value
3156         vaddubm         $tweak,$tweak,$tweak
3157         vsldoi          $tmp,$tmp,$tmp,15
3158         vand            $tmp,$tmp,$eighty7
3159          vxor           $out0,$in0,$twk0
3160         vxor            $tweak,$tweak,$tmp
3161
3162          lvx_u          $in1,$x10,$inp
3163         vxor            $twk1,$tweak,$rndkey0
3164         vsrab           $tmp,$tweak,$seven      # next tweak value
3165         vaddubm         $tweak,$tweak,$tweak
3166         vsldoi          $tmp,$tmp,$tmp,15
3167          le?vperm       $in1,$in1,$in1,$leperm
3168         vand            $tmp,$tmp,$eighty7
3169          vxor           $out1,$in1,$twk1
3170         vxor            $tweak,$tweak,$tmp
3171
3172          lvx_u          $in2,$x20,$inp
3173          andi.          $taillen,$len,15
3174         vxor            $twk2,$tweak,$rndkey0
3175         vsrab           $tmp,$tweak,$seven      # next tweak value
3176         vaddubm         $tweak,$tweak,$tweak
3177         vsldoi          $tmp,$tmp,$tmp,15
3178          le?vperm       $in2,$in2,$in2,$leperm
3179         vand            $tmp,$tmp,$eighty7
3180          vxor           $out2,$in2,$twk2
3181         vxor            $tweak,$tweak,$tmp
3182
3183          lvx_u          $in3,$x30,$inp
3184          sub            $len,$len,$taillen
3185         vxor            $twk3,$tweak,$rndkey0
3186         vsrab           $tmp,$tweak,$seven      # next tweak value
3187         vaddubm         $tweak,$tweak,$tweak
3188         vsldoi          $tmp,$tmp,$tmp,15
3189          le?vperm       $in3,$in3,$in3,$leperm
3190         vand            $tmp,$tmp,$eighty7
3191          vxor           $out3,$in3,$twk3
3192         vxor            $tweak,$tweak,$tmp
3193
3194          lvx_u          $in4,$x40,$inp
3195          subi           $len,$len,0x60
3196         vxor            $twk4,$tweak,$rndkey0
3197         vsrab           $tmp,$tweak,$seven      # next tweak value
3198         vaddubm         $tweak,$tweak,$tweak
3199         vsldoi          $tmp,$tmp,$tmp,15
3200          le?vperm       $in4,$in4,$in4,$leperm
3201         vand            $tmp,$tmp,$eighty7
3202          vxor           $out4,$in4,$twk4
3203         vxor            $tweak,$tweak,$tmp
3204
3205          lvx_u          $in5,$x50,$inp
3206          addi           $inp,$inp,0x60
3207         vxor            $twk5,$tweak,$rndkey0
3208         vsrab           $tmp,$tweak,$seven      # next tweak value
3209         vaddubm         $tweak,$tweak,$tweak
3210         vsldoi          $tmp,$tmp,$tmp,15
3211          le?vperm       $in5,$in5,$in5,$leperm
3212         vand            $tmp,$tmp,$eighty7
3213          vxor           $out5,$in5,$twk5
3214         vxor            $tweak,$tweak,$tmp
3215
3216         vxor            v31,v31,$rndkey0
3217         mtctr           $rounds
3218         b               Loop_xts_dec6x
3219
3220 .align  5
3221 Loop_xts_dec6x:
3222         vncipher        $out0,$out0,v24
3223         vncipher        $out1,$out1,v24
3224         vncipher        $out2,$out2,v24
3225         vncipher        $out3,$out3,v24
3226         vncipher        $out4,$out4,v24
3227         vncipher        $out5,$out5,v24
3228         lvx             v24,$x20,$key_          # round[3]
3229         addi            $key_,$key_,0x20
3230
3231         vncipher        $out0,$out0,v25
3232         vncipher        $out1,$out1,v25
3233         vncipher        $out2,$out2,v25
3234         vncipher        $out3,$out3,v25
3235         vncipher        $out4,$out4,v25
3236         vncipher        $out5,$out5,v25
3237         lvx             v25,$x10,$key_          # round[4]
3238         bdnz            Loop_xts_dec6x
3239
3240         subic           $len,$len,96            # $len-=96
3241          vxor           $in0,$twk0,v31          # xor with last round key
3242         vncipher        $out0,$out0,v24
3243         vncipher        $out1,$out1,v24
3244          vsrab          $tmp,$tweak,$seven      # next tweak value
3245          vxor           $twk0,$tweak,$rndkey0
3246          vaddubm        $tweak,$tweak,$tweak
3247         vncipher        $out2,$out2,v24
3248         vncipher        $out3,$out3,v24
3249          vsldoi         $tmp,$tmp,$tmp,15
3250         vncipher        $out4,$out4,v24
3251         vncipher        $out5,$out5,v24
3252
3253         subfe.          r0,r0,r0                # borrow?-1:0
3254          vand           $tmp,$tmp,$eighty7
3255         vncipher        $out0,$out0,v25
3256         vncipher        $out1,$out1,v25
3257          vxor           $tweak,$tweak,$tmp
3258         vncipher        $out2,$out2,v25
3259         vncipher        $out3,$out3,v25
3260          vxor           $in1,$twk1,v31
3261          vsrab          $tmp,$tweak,$seven      # next tweak value
3262          vxor           $twk1,$tweak,$rndkey0
3263         vncipher        $out4,$out4,v25
3264         vncipher        $out5,$out5,v25
3265
3266         and             r0,r0,$len
3267          vaddubm        $tweak,$tweak,$tweak
3268          vsldoi         $tmp,$tmp,$tmp,15
3269         vncipher        $out0,$out0,v26
3270         vncipher        $out1,$out1,v26
3271          vand           $tmp,$tmp,$eighty7
3272         vncipher        $out2,$out2,v26
3273         vncipher        $out3,$out3,v26
3274          vxor           $tweak,$tweak,$tmp
3275         vncipher        $out4,$out4,v26
3276         vncipher        $out5,$out5,v26
3277
3278         add             $inp,$inp,r0            # $inp is adjusted in such
3279                                                 # way that at exit from the
3280                                                 # loop inX-in5 are loaded
3281                                                 # with last "words"
3282          vxor           $in2,$twk2,v31
3283          vsrab          $tmp,$tweak,$seven      # next tweak value
3284          vxor           $twk2,$tweak,$rndkey0
3285          vaddubm        $tweak,$tweak,$tweak
3286         vncipher        $out0,$out0,v27
3287         vncipher        $out1,$out1,v27
3288          vsldoi         $tmp,$tmp,$tmp,15
3289         vncipher        $out2,$out2,v27
3290         vncipher        $out3,$out3,v27
3291          vand           $tmp,$tmp,$eighty7
3292         vncipher        $out4,$out4,v27
3293         vncipher        $out5,$out5,v27
3294
3295         addi            $key_,$sp,$FRAME+15     # rewind $key_
3296          vxor           $tweak,$tweak,$tmp
3297         vncipher        $out0,$out0,v28
3298         vncipher        $out1,$out1,v28
3299          vxor           $in3,$twk3,v31
3300          vsrab          $tmp,$tweak,$seven      # next tweak value
3301          vxor           $twk3,$tweak,$rndkey0
3302         vncipher        $out2,$out2,v28
3303         vncipher        $out3,$out3,v28
3304          vaddubm        $tweak,$tweak,$tweak
3305          vsldoi         $tmp,$tmp,$tmp,15
3306         vncipher        $out4,$out4,v28
3307         vncipher        $out5,$out5,v28
3308         lvx             v24,$x00,$key_          # re-pre-load round[1]
3309          vand           $tmp,$tmp,$eighty7
3310
3311         vncipher        $out0,$out0,v29
3312         vncipher        $out1,$out1,v29
3313          vxor           $tweak,$tweak,$tmp
3314         vncipher        $out2,$out2,v29
3315         vncipher        $out3,$out3,v29
3316          vxor           $in4,$twk4,v31
3317          vsrab          $tmp,$tweak,$seven      # next tweak value
3318          vxor           $twk4,$tweak,$rndkey0
3319         vncipher        $out4,$out4,v29
3320         vncipher        $out5,$out5,v29
3321         lvx             v25,$x10,$key_          # re-pre-load round[2]
3322          vaddubm        $tweak,$tweak,$tweak
3323          vsldoi         $tmp,$tmp,$tmp,15
3324
3325         vncipher        $out0,$out0,v30
3326         vncipher        $out1,$out1,v30
3327          vand           $tmp,$tmp,$eighty7
3328         vncipher        $out2,$out2,v30
3329         vncipher        $out3,$out3,v30
3330          vxor           $tweak,$tweak,$tmp
3331         vncipher        $out4,$out4,v30
3332         vncipher        $out5,$out5,v30
3333          vxor           $in5,$twk5,v31
3334          vsrab          $tmp,$tweak,$seven      # next tweak value
3335          vxor           $twk5,$tweak,$rndkey0
3336
3337         vncipherlast    $out0,$out0,$in0
3338          lvx_u          $in0,$x00,$inp          # load next input block
3339          vaddubm        $tweak,$tweak,$tweak
3340          vsldoi         $tmp,$tmp,$tmp,15
3341         vncipherlast    $out1,$out1,$in1
3342          lvx_u          $in1,$x10,$inp
3343         vncipherlast    $out2,$out2,$in2
3344          le?vperm       $in0,$in0,$in0,$leperm
3345          lvx_u          $in2,$x20,$inp
3346          vand           $tmp,$tmp,$eighty7
3347         vncipherlast    $out3,$out3,$in3
3348          le?vperm       $in1,$in1,$in1,$leperm
3349          lvx_u          $in3,$x30,$inp
3350         vncipherlast    $out4,$out4,$in4
3351          le?vperm       $in2,$in2,$in2,$leperm
3352          lvx_u          $in4,$x40,$inp
3353          vxor           $tweak,$tweak,$tmp
3354         vncipherlast    $out5,$out5,$in5
3355          le?vperm       $in3,$in3,$in3,$leperm
3356          lvx_u          $in5,$x50,$inp
3357          addi           $inp,$inp,0x60
3358          le?vperm       $in4,$in4,$in4,$leperm
3359          le?vperm       $in5,$in5,$in5,$leperm
3360
3361         le?vperm        $out0,$out0,$out0,$leperm
3362         le?vperm        $out1,$out1,$out1,$leperm
3363         stvx_u          $out0,$x00,$out         # store output
3364          vxor           $out0,$in0,$twk0
3365         le?vperm        $out2,$out2,$out2,$leperm
3366         stvx_u          $out1,$x10,$out
3367          vxor           $out1,$in1,$twk1
3368         le?vperm        $out3,$out3,$out3,$leperm
3369         stvx_u          $out2,$x20,$out
3370          vxor           $out2,$in2,$twk2
3371         le?vperm        $out4,$out4,$out4,$leperm
3372         stvx_u          $out3,$x30,$out
3373          vxor           $out3,$in3,$twk3
3374         le?vperm        $out5,$out5,$out5,$leperm
3375         stvx_u          $out4,$x40,$out
3376          vxor           $out4,$in4,$twk4
3377         stvx_u          $out5,$x50,$out
3378          vxor           $out5,$in5,$twk5
3379         addi            $out,$out,0x60
3380
3381         mtctr           $rounds
3382         beq             Loop_xts_dec6x          # did $len-=96 borrow?
3383
3384         addic.          $len,$len,0x60
3385         beq             Lxts_dec6x_zero
3386         cmpwi           $len,0x20
3387         blt             Lxts_dec6x_one
3388         nop
3389         beq             Lxts_dec6x_two
3390         cmpwi           $len,0x40
3391         blt             Lxts_dec6x_three
3392         nop
3393         beq             Lxts_dec6x_four
3394
3395 Lxts_dec6x_five:
3396         vxor            $out0,$in1,$twk0
3397         vxor            $out1,$in2,$twk1
3398         vxor            $out2,$in3,$twk2
3399         vxor            $out3,$in4,$twk3
3400         vxor            $out4,$in5,$twk4
3401
3402         bl              _aesp8_xts_dec5x
3403
3404         le?vperm        $out0,$out0,$out0,$leperm
3405         vmr             $twk0,$twk5             # unused tweak
3406         vxor            $twk1,$tweak,$rndkey0
3407         le?vperm        $out1,$out1,$out1,$leperm
3408         stvx_u          $out0,$x00,$out         # store output
3409         vxor            $out0,$in0,$twk1
3410         le?vperm        $out2,$out2,$out2,$leperm
3411         stvx_u          $out1,$x10,$out
3412         le?vperm        $out3,$out3,$out3,$leperm
3413         stvx_u          $out2,$x20,$out
3414         le?vperm        $out4,$out4,$out4,$leperm
3415         stvx_u          $out3,$x30,$out
3416         stvx_u          $out4,$x40,$out
3417         addi            $out,$out,0x50
3418         bne             Lxts_dec6x_steal
3419         b               Lxts_dec6x_done
3420
3421 .align  4
3422 Lxts_dec6x_four:
3423         vxor            $out0,$in2,$twk0
3424         vxor            $out1,$in3,$twk1
3425         vxor            $out2,$in4,$twk2
3426         vxor            $out3,$in5,$twk3
3427         vxor            $out4,$out4,$out4
3428
3429         bl              _aesp8_xts_dec5x
3430
3431         le?vperm        $out0,$out0,$out0,$leperm
3432         vmr             $twk0,$twk4             # unused tweak
3433         vmr             $twk1,$twk5
3434         le?vperm        $out1,$out1,$out1,$leperm
3435         stvx_u          $out0,$x00,$out         # store output
3436         vxor            $out0,$in0,$twk5
3437         le?vperm        $out2,$out2,$out2,$leperm
3438         stvx_u          $out1,$x10,$out
3439         le?vperm        $out3,$out3,$out3,$leperm
3440         stvx_u          $out2,$x20,$out
3441         stvx_u          $out3,$x30,$out
3442         addi            $out,$out,0x40
3443         bne             Lxts_dec6x_steal
3444         b               Lxts_dec6x_done
3445
3446 .align  4
3447 Lxts_dec6x_three:
3448         vxor            $out0,$in3,$twk0
3449         vxor            $out1,$in4,$twk1
3450         vxor            $out2,$in5,$twk2
3451         vxor            $out3,$out3,$out3
3452         vxor            $out4,$out4,$out4
3453
3454         bl              _aesp8_xts_dec5x
3455
3456         le?vperm        $out0,$out0,$out0,$leperm
3457         vmr             $twk0,$twk3             # unused tweak
3458         vmr             $twk1,$twk4
3459         le?vperm        $out1,$out1,$out1,$leperm
3460         stvx_u          $out0,$x00,$out         # store output
3461         vxor            $out0,$in0,$twk4
3462         le?vperm        $out2,$out2,$out2,$leperm
3463         stvx_u          $out1,$x10,$out
3464         stvx_u          $out2,$x20,$out
3465         addi            $out,$out,0x30
3466         bne             Lxts_dec6x_steal
3467         b               Lxts_dec6x_done
3468
3469 .align  4
3470 Lxts_dec6x_two:
3471         vxor            $out0,$in4,$twk0
3472         vxor            $out1,$in5,$twk1
3473         vxor            $out2,$out2,$out2
3474         vxor            $out3,$out3,$out3
3475         vxor            $out4,$out4,$out4
3476
3477         bl              _aesp8_xts_dec5x
3478
3479         le?vperm        $out0,$out0,$out0,$leperm
3480         vmr             $twk0,$twk2             # unused tweak
3481         vmr             $twk1,$twk3
3482         le?vperm        $out1,$out1,$out1,$leperm
3483         stvx_u          $out0,$x00,$out         # store output
3484         vxor            $out0,$in0,$twk3
3485         stvx_u          $out1,$x10,$out
3486         addi            $out,$out,0x20
3487         bne             Lxts_dec6x_steal
3488         b               Lxts_dec6x_done
3489
3490 .align  4
3491 Lxts_dec6x_one:
3492         vxor            $out0,$in5,$twk0
3493         nop
3494 Loop_xts_dec1x:
3495         vncipher        $out0,$out0,v24
3496         lvx             v24,$x20,$key_          # round[3]
3497         addi            $key_,$key_,0x20
3498
3499         vncipher        $out0,$out0,v25
3500         lvx             v25,$x10,$key_          # round[4]
3501         bdnz            Loop_xts_dec1x
3502
3503         subi            r0,$taillen,1
3504         vncipher        $out0,$out0,v24
3505
3506         andi.           r0,r0,16
3507         cmpwi           $taillen,0
3508         vncipher        $out0,$out0,v25
3509
3510         sub             $inp,$inp,r0
3511         vncipher        $out0,$out0,v26
3512
3513         lvx_u           $in0,0,$inp
3514         vncipher        $out0,$out0,v27
3515
3516         addi            $key_,$sp,$FRAME+15     # rewind $key_
3517         vncipher        $out0,$out0,v28
3518         lvx             v24,$x00,$key_          # re-pre-load round[1]
3519
3520         vncipher        $out0,$out0,v29
3521         lvx             v25,$x10,$key_          # re-pre-load round[2]
3522          vxor           $twk0,$twk0,v31
3523
3524         le?vperm        $in0,$in0,$in0,$leperm
3525         vncipher        $out0,$out0,v30
3526
3527         mtctr           $rounds
3528         vncipherlast    $out0,$out0,$twk0
3529
3530         vmr             $twk0,$twk1             # unused tweak
3531         vmr             $twk1,$twk2
3532         le?vperm        $out0,$out0,$out0,$leperm
3533         stvx_u          $out0,$x00,$out         # store output
3534         addi            $out,$out,0x10
3535         vxor            $out0,$in0,$twk2
3536         bne             Lxts_dec6x_steal
3537         b               Lxts_dec6x_done
3538
3539 .align  4
3540 Lxts_dec6x_zero:
3541         cmpwi           $taillen,0
3542         beq             Lxts_dec6x_done
3543
3544         lvx_u           $in0,0,$inp
3545         le?vperm        $in0,$in0,$in0,$leperm
3546         vxor            $out0,$in0,$twk1
3547 Lxts_dec6x_steal:
3548         vncipher        $out0,$out0,v24
3549         lvx             v24,$x20,$key_          # round[3]
3550         addi            $key_,$key_,0x20
3551
3552         vncipher        $out0,$out0,v25
3553         lvx             v25,$x10,$key_          # round[4]
3554         bdnz            Lxts_dec6x_steal
3555
3556         add             $inp,$inp,$taillen
3557         vncipher        $out0,$out0,v24
3558
3559         cmpwi           $taillen,0
3560         vncipher        $out0,$out0,v25
3561
3562         lvx_u           $in0,0,$inp
3563         vncipher        $out0,$out0,v26
3564
3565         lvsr            $inpperm,0,$taillen     # $in5 is no more
3566         vncipher        $out0,$out0,v27
3567
3568         addi            $key_,$sp,$FRAME+15     # rewind $key_
3569         vncipher        $out0,$out0,v28
3570         lvx             v24,$x00,$key_          # re-pre-load round[1]
3571
3572         vncipher        $out0,$out0,v29
3573         lvx             v25,$x10,$key_          # re-pre-load round[2]
3574          vxor           $twk1,$twk1,v31
3575
3576         le?vperm        $in0,$in0,$in0,$leperm
3577         vncipher        $out0,$out0,v30
3578
3579         vperm           $in0,$in0,$in0,$inpperm
3580         vncipherlast    $tmp,$out0,$twk1
3581
3582         le?vperm        $out0,$tmp,$tmp,$leperm
3583         le?stvx_u       $out0,0,$out
3584         be?stvx_u       $tmp,0,$out
3585
3586         vxor            $out0,$out0,$out0
3587         vspltisb        $out1,-1
3588         vperm           $out0,$out0,$out1,$inpperm
3589         vsel            $out0,$in0,$tmp,$out0
3590         vxor            $out0,$out0,$twk0
3591
3592         subi            r30,$out,1
3593         mtctr           $taillen
3594 Loop_xts_dec6x_steal:
3595         lbzu            r0,1(r30)
3596         stb             r0,16(r30)
3597         bdnz            Loop_xts_dec6x_steal
3598
3599         li              $taillen,0
3600         mtctr           $rounds
3601         b               Loop_xts_dec1x          # one more time...
3602
3603 .align  4
3604 Lxts_dec6x_done:
3605         ${UCMP}i        $ivp,0
3606         beq             Lxts_dec6x_ret
3607
3608         vxor            $tweak,$twk0,$rndkey0
3609         le?vperm        $tweak,$tweak,$tweak,$leperm
3610         stvx_u          $tweak,0,$ivp
3611
3612 Lxts_dec6x_ret:
3613         mtlr            r11
3614         li              r10,`$FRAME+15`
3615         li              r11,`$FRAME+31`
3616         stvx            $seven,r10,$sp          # wipe copies of round keys
3617         addi            r10,r10,32
3618         stvx            $seven,r11,$sp
3619         addi            r11,r11,32
3620         stvx            $seven,r10,$sp
3621         addi            r10,r10,32
3622         stvx            $seven,r11,$sp
3623         addi            r11,r11,32
3624         stvx            $seven,r10,$sp
3625         addi            r10,r10,32
3626         stvx            $seven,r11,$sp
3627         addi            r11,r11,32
3628         stvx            $seven,r10,$sp
3629         addi            r10,r10,32
3630         stvx            $seven,r11,$sp
3631         addi            r11,r11,32
3632
3633         mtspr           256,$vrsave
3634         lvx             v20,r10,$sp             # ABI says so
3635         addi            r10,r10,32
3636         lvx             v21,r11,$sp
3637         addi            r11,r11,32
3638         lvx             v22,r10,$sp
3639         addi            r10,r10,32
3640         lvx             v23,r11,$sp
3641         addi            r11,r11,32
3642         lvx             v24,r10,$sp
3643         addi            r10,r10,32
3644         lvx             v25,r11,$sp
3645         addi            r11,r11,32
3646         lvx             v26,r10,$sp
3647         addi            r10,r10,32
3648         lvx             v27,r11,$sp
3649         addi            r11,r11,32
3650         lvx             v28,r10,$sp
3651         addi            r10,r10,32
3652         lvx             v29,r11,$sp
3653         addi            r11,r11,32
3654         lvx             v30,r10,$sp
3655         lvx             v31,r11,$sp
3656         $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3657         $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3658         $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3659         $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3660         $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3661         $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3662         addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3663         blr
3664         .long           0
3665         .byte           0,12,0x04,1,0x80,6,6,0
3666         .long           0
3667
3668 .align  5
3669 _aesp8_xts_dec5x:
3670         vncipher        $out0,$out0,v24
3671         vncipher        $out1,$out1,v24
3672         vncipher        $out2,$out2,v24
3673         vncipher        $out3,$out3,v24
3674         vncipher        $out4,$out4,v24
3675         lvx             v24,$x20,$key_          # round[3]
3676         addi            $key_,$key_,0x20
3677
3678         vncipher        $out0,$out0,v25
3679         vncipher        $out1,$out1,v25
3680         vncipher        $out2,$out2,v25
3681         vncipher        $out3,$out3,v25
3682         vncipher        $out4,$out4,v25
3683         lvx             v25,$x10,$key_          # round[4]
3684         bdnz            _aesp8_xts_dec5x
3685
3686         subi            r0,$taillen,1
3687         vncipher        $out0,$out0,v24
3688         vncipher        $out1,$out1,v24
3689         vncipher        $out2,$out2,v24
3690         vncipher        $out3,$out3,v24
3691         vncipher        $out4,$out4,v24
3692
3693         andi.           r0,r0,16
3694         cmpwi           $taillen,0
3695         vncipher        $out0,$out0,v25
3696         vncipher        $out1,$out1,v25
3697         vncipher        $out2,$out2,v25
3698         vncipher        $out3,$out3,v25
3699         vncipher        $out4,$out4,v25
3700          vxor           $twk0,$twk0,v31
3701
3702         sub             $inp,$inp,r0
3703         vncipher        $out0,$out0,v26
3704         vncipher        $out1,$out1,v26
3705         vncipher        $out2,$out2,v26
3706         vncipher        $out3,$out3,v26
3707         vncipher        $out4,$out4,v26
3708          vxor           $in1,$twk1,v31
3709
3710         vncipher        $out0,$out0,v27
3711         lvx_u           $in0,0,$inp
3712         vncipher        $out1,$out1,v27
3713         vncipher        $out2,$out2,v27
3714         vncipher        $out3,$out3,v27
3715         vncipher        $out4,$out4,v27
3716          vxor           $in2,$twk2,v31
3717
3718         addi            $key_,$sp,$FRAME+15     # rewind $key_
3719         vncipher        $out0,$out0,v28
3720         vncipher        $out1,$out1,v28
3721         vncipher        $out2,$out2,v28
3722         vncipher        $out3,$out3,v28
3723         vncipher        $out4,$out4,v28
3724         lvx             v24,$x00,$key_          # re-pre-load round[1]
3725          vxor           $in3,$twk3,v31
3726
3727         vncipher        $out0,$out0,v29
3728         le?vperm        $in0,$in0,$in0,$leperm
3729         vncipher        $out1,$out1,v29
3730         vncipher        $out2,$out2,v29
3731         vncipher        $out3,$out3,v29
3732         vncipher        $out4,$out4,v29
3733         lvx             v25,$x10,$key_          # re-pre-load round[2]
3734          vxor           $in4,$twk4,v31
3735
3736         vncipher        $out0,$out0,v30
3737         vncipher        $out1,$out1,v30
3738         vncipher        $out2,$out2,v30
3739         vncipher        $out3,$out3,v30
3740         vncipher        $out4,$out4,v30
3741
3742         vncipherlast    $out0,$out0,$twk0
3743         vncipherlast    $out1,$out1,$in1
3744         vncipherlast    $out2,$out2,$in2
3745         vncipherlast    $out3,$out3,$in3
3746         vncipherlast    $out4,$out4,$in4
3747         mtctr           $rounds
3748         blr
3749         .long           0
3750         .byte           0,12,0x14,0,0,0,0,0
3751 ___
3752 }}      }}}
3753
3754 my $consts=1;
3755 foreach(split("\n",$code)) {
3756         s/\`([^\`]*)\`/eval($1)/geo;
3757
3758         # constants table endian-specific conversion
3759         if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
3760             my $conv=$3;
3761             my @bytes=();
3762
3763             # convert to endian-agnostic format
3764             if ($1 eq "long") {
3765               foreach (split(/,\s*/,$2)) {
3766                 my $l = /^0/?oct:int;
3767                 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
3768               }
3769             } else {
3770                 @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
3771             }
3772
3773             # little-endian conversion
3774             if ($flavour =~ /le$/o) {
3775                 SWITCH: for($conv)  {
3776                     /\?inv/ && do   { @bytes=map($_^0xf,@bytes); last; };
3777                     /\?rev/ && do   { @bytes=reverse(@bytes);    last; };
3778                 }
3779             }
3780
3781             #emit
3782             print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
3783             next;
3784         }
3785         $consts=0 if (m/Lconsts:/o);    # end of table
3786
3787         # instructions prefixed with '?' are endian-specific and need
3788         # to be adjusted accordingly...
3789         if ($flavour =~ /le$/o) {       # little-endian
3790             s/le\?//o           or
3791             s/be\?/#be#/o       or
3792             s/\?lvsr/lvsl/o     or
3793             s/\?lvsl/lvsr/o     or
3794             s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
3795             s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
3796             s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
3797         } else {                        # big-endian
3798             s/le\?/#le#/o       or
3799             s/be\?//o           or
3800             s/\?([a-z]+)/$1/o;
3801         }
3802
3803         print $_,"\n";
3804 }
3805
3806 close STDOUT;