aes/asm/aesp8-ppc.pl: add XTS subroutines.
[openssl.git] / crypto / aes / asm / aesp8-ppc.pl
1 #! /usr/bin/env perl
2 # Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
3 #
4 # Licensed under the OpenSSL license (the "License").  You may not use
5 # this file except in compliance with the License.  You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
8
9 #
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
16 #
17 # This module implements support for AES instructions as per PowerISA
18 # specification version 2.07, first implemented by POWER8 processor.
19 # The module is endian-agnostic in sense that it supports both big-
20 # and little-endian cases. Data alignment in parallelizable modes is
21 # handled with VSX loads and stores, which implies MSR.VSX flag being
22 # set. It should also be noted that ISA specification doesn't prohibit
23 # alignment exceptions for these instructions on page boundaries.
24 # Initially alignment was handled in pure AltiVec/VMX way [when data
25 # is aligned programmatically, which in turn guarantees exception-
26 # free execution], but it turned to hamper performance when vcipher
27 # instructions are interleaved. It's reckoned that eventual
28 # misalignment penalties at page boundaries are in average lower
29 # than additional overhead in pure AltiVec approach.
30 #
31 # May 2016
32 #
33 # Add XTS subroutine, 9x on little- and 12x improvement on big-endian
34 # systems were measured.
35 #
36 ######################################################################
37 # Current large-block performance in cycles per byte processed with
38 # 128-bit key (less is better).
39 #
40 #               CBC en-/decrypt CTR     XTS
41 # POWER8[le]    3.96/0.72       0.74    1.1
42 # POWER8[be]    3.75/0.65       0.66    1.0
43
44 $flavour = shift;
45
46 if ($flavour =~ /64/) {
47         $SIZE_T =8;
48         $LRSAVE =2*$SIZE_T;
49         $STU    ="stdu";
50         $POP    ="ld";
51         $PUSH   ="std";
52         $UCMP   ="cmpld";
53         $SHL    ="sldi";
54 } elsif ($flavour =~ /32/) {
55         $SIZE_T =4;
56         $LRSAVE =$SIZE_T;
57         $STU    ="stwu";
58         $POP    ="lwz";
59         $PUSH   ="stw";
60         $UCMP   ="cmplw";
61         $SHL    ="slwi";
62 } else { die "nonsense $flavour"; }
63
64 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
65
66 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
67 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
68 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
69 die "can't locate ppc-xlate.pl";
70
71 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
72
73 $FRAME=8*$SIZE_T;
74 $prefix="aes_p8";
75
76 $sp="r1";
77 $vrsave="r12";
78
79 #########################################################################
80 {{{     # Key setup procedures                                          #
81 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
82 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
83 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
84
85 $code.=<<___;
86 .machine        "any"
87
88 .text
89
90 .align  7
91 rcon:
92 .long   0x01000000, 0x01000000, 0x01000000, 0x01000000  ?rev
93 .long   0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000  ?rev
94 .long   0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c  ?rev
95 .long   0,0,0,0                                         ?asis
96 Lconsts:
97         mflr    r0
98         bcl     20,31,\$+4
99         mflr    $ptr     #vvvvv "distance between . and rcon
100         addi    $ptr,$ptr,-0x48
101         mtlr    r0
102         blr
103         .long   0
104         .byte   0,12,0x14,0,0,0,0,0
105 .asciz  "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
106
107 .globl  .${prefix}_set_encrypt_key
108 .align  5
109 .${prefix}_set_encrypt_key:
110 Lset_encrypt_key:
111         mflr            r11
112         $PUSH           r11,$LRSAVE($sp)
113
114         li              $ptr,-1
115         ${UCMP}i        $inp,0
116         beq-            Lenc_key_abort          # if ($inp==0) return -1;
117         ${UCMP}i        $out,0
118         beq-            Lenc_key_abort          # if ($out==0) return -1;
119         li              $ptr,-2
120         cmpwi           $bits,128
121         blt-            Lenc_key_abort
122         cmpwi           $bits,256
123         bgt-            Lenc_key_abort
124         andi.           r0,$bits,0x3f
125         bne-            Lenc_key_abort
126
127         lis             r0,0xfff0
128         mfspr           $vrsave,256
129         mtspr           256,r0
130
131         bl              Lconsts
132         mtlr            r11
133
134         neg             r9,$inp
135         lvx             $in0,0,$inp
136         addi            $inp,$inp,15            # 15 is not typo
137         lvsr            $key,0,r9               # borrow $key
138         li              r8,0x20
139         cmpwi           $bits,192
140         lvx             $in1,0,$inp
141         le?vspltisb     $mask,0x0f              # borrow $mask
142         lvx             $rcon,0,$ptr
143         le?vxor         $key,$key,$mask         # adjust for byte swap
144         lvx             $mask,r8,$ptr
145         addi            $ptr,$ptr,0x10
146         vperm           $in0,$in0,$in1,$key     # align [and byte swap in LE]
147         li              $cnt,8
148         vxor            $zero,$zero,$zero
149         mtctr           $cnt
150
151         ?lvsr           $outperm,0,$out
152         vspltisb        $outmask,-1
153         lvx             $outhead,0,$out
154         ?vperm          $outmask,$zero,$outmask,$outperm
155
156         blt             Loop128
157         addi            $inp,$inp,8
158         beq             L192
159         addi            $inp,$inp,8
160         b               L256
161
162 .align  4
163 Loop128:
164         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
165         vsldoi          $tmp,$zero,$in0,12      # >>32
166          vperm          $outtail,$in0,$in0,$outperm     # rotate
167          vsel           $stage,$outhead,$outtail,$outmask
168          vmr            $outhead,$outtail
169         vcipherlast     $key,$key,$rcon
170          stvx           $stage,0,$out
171          addi           $out,$out,16
172
173         vxor            $in0,$in0,$tmp
174         vsldoi          $tmp,$zero,$tmp,12      # >>32
175         vxor            $in0,$in0,$tmp
176         vsldoi          $tmp,$zero,$tmp,12      # >>32
177         vxor            $in0,$in0,$tmp
178          vadduwm        $rcon,$rcon,$rcon
179         vxor            $in0,$in0,$key
180         bdnz            Loop128
181
182         lvx             $rcon,0,$ptr            # last two round keys
183
184         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
185         vsldoi          $tmp,$zero,$in0,12      # >>32
186          vperm          $outtail,$in0,$in0,$outperm     # rotate
187          vsel           $stage,$outhead,$outtail,$outmask
188          vmr            $outhead,$outtail
189         vcipherlast     $key,$key,$rcon
190          stvx           $stage,0,$out
191          addi           $out,$out,16
192
193         vxor            $in0,$in0,$tmp
194         vsldoi          $tmp,$zero,$tmp,12      # >>32
195         vxor            $in0,$in0,$tmp
196         vsldoi          $tmp,$zero,$tmp,12      # >>32
197         vxor            $in0,$in0,$tmp
198          vadduwm        $rcon,$rcon,$rcon
199         vxor            $in0,$in0,$key
200
201         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
202         vsldoi          $tmp,$zero,$in0,12      # >>32
203          vperm          $outtail,$in0,$in0,$outperm     # rotate
204          vsel           $stage,$outhead,$outtail,$outmask
205          vmr            $outhead,$outtail
206         vcipherlast     $key,$key,$rcon
207          stvx           $stage,0,$out
208          addi           $out,$out,16
209
210         vxor            $in0,$in0,$tmp
211         vsldoi          $tmp,$zero,$tmp,12      # >>32
212         vxor            $in0,$in0,$tmp
213         vsldoi          $tmp,$zero,$tmp,12      # >>32
214         vxor            $in0,$in0,$tmp
215         vxor            $in0,$in0,$key
216          vperm          $outtail,$in0,$in0,$outperm     # rotate
217          vsel           $stage,$outhead,$outtail,$outmask
218          vmr            $outhead,$outtail
219          stvx           $stage,0,$out
220
221         addi            $inp,$out,15            # 15 is not typo
222         addi            $out,$out,0x50
223
224         li              $rounds,10
225         b               Ldone
226
227 .align  4
228 L192:
229         lvx             $tmp,0,$inp
230         li              $cnt,4
231          vperm          $outtail,$in0,$in0,$outperm     # rotate
232          vsel           $stage,$outhead,$outtail,$outmask
233          vmr            $outhead,$outtail
234          stvx           $stage,0,$out
235          addi           $out,$out,16
236         vperm           $in1,$in1,$tmp,$key     # align [and byte swap in LE]
237         vspltisb        $key,8                  # borrow $key
238         mtctr           $cnt
239         vsububm         $mask,$mask,$key        # adjust the mask
240
241 Loop192:
242         vperm           $key,$in1,$in1,$mask    # roate-n-splat
243         vsldoi          $tmp,$zero,$in0,12      # >>32
244         vcipherlast     $key,$key,$rcon
245
246         vxor            $in0,$in0,$tmp
247         vsldoi          $tmp,$zero,$tmp,12      # >>32
248         vxor            $in0,$in0,$tmp
249         vsldoi          $tmp,$zero,$tmp,12      # >>32
250         vxor            $in0,$in0,$tmp
251
252          vsldoi         $stage,$zero,$in1,8
253         vspltw          $tmp,$in0,3
254         vxor            $tmp,$tmp,$in1
255         vsldoi          $in1,$zero,$in1,12      # >>32
256          vadduwm        $rcon,$rcon,$rcon
257         vxor            $in1,$in1,$tmp
258         vxor            $in0,$in0,$key
259         vxor            $in1,$in1,$key
260          vsldoi         $stage,$stage,$in0,8
261
262         vperm           $key,$in1,$in1,$mask    # rotate-n-splat
263         vsldoi          $tmp,$zero,$in0,12      # >>32
264          vperm          $outtail,$stage,$stage,$outperm # rotate
265          vsel           $stage,$outhead,$outtail,$outmask
266          vmr            $outhead,$outtail
267         vcipherlast     $key,$key,$rcon
268          stvx           $stage,0,$out
269          addi           $out,$out,16
270
271          vsldoi         $stage,$in0,$in1,8
272         vxor            $in0,$in0,$tmp
273         vsldoi          $tmp,$zero,$tmp,12      # >>32
274          vperm          $outtail,$stage,$stage,$outperm # rotate
275          vsel           $stage,$outhead,$outtail,$outmask
276          vmr            $outhead,$outtail
277         vxor            $in0,$in0,$tmp
278         vsldoi          $tmp,$zero,$tmp,12      # >>32
279         vxor            $in0,$in0,$tmp
280          stvx           $stage,0,$out
281          addi           $out,$out,16
282
283         vspltw          $tmp,$in0,3
284         vxor            $tmp,$tmp,$in1
285         vsldoi          $in1,$zero,$in1,12      # >>32
286          vadduwm        $rcon,$rcon,$rcon
287         vxor            $in1,$in1,$tmp
288         vxor            $in0,$in0,$key
289         vxor            $in1,$in1,$key
290          vperm          $outtail,$in0,$in0,$outperm     # rotate
291          vsel           $stage,$outhead,$outtail,$outmask
292          vmr            $outhead,$outtail
293          stvx           $stage,0,$out
294          addi           $inp,$out,15            # 15 is not typo
295          addi           $out,$out,16
296         bdnz            Loop192
297
298         li              $rounds,12
299         addi            $out,$out,0x20
300         b               Ldone
301
302 .align  4
303 L256:
304         lvx             $tmp,0,$inp
305         li              $cnt,7
306         li              $rounds,14
307          vperm          $outtail,$in0,$in0,$outperm     # rotate
308          vsel           $stage,$outhead,$outtail,$outmask
309          vmr            $outhead,$outtail
310          stvx           $stage,0,$out
311          addi           $out,$out,16
312         vperm           $in1,$in1,$tmp,$key     # align [and byte swap in LE]
313         mtctr           $cnt
314
315 Loop256:
316         vperm           $key,$in1,$in1,$mask    # rotate-n-splat
317         vsldoi          $tmp,$zero,$in0,12      # >>32
318          vperm          $outtail,$in1,$in1,$outperm     # rotate
319          vsel           $stage,$outhead,$outtail,$outmask
320          vmr            $outhead,$outtail
321         vcipherlast     $key,$key,$rcon
322          stvx           $stage,0,$out
323          addi           $out,$out,16
324
325         vxor            $in0,$in0,$tmp
326         vsldoi          $tmp,$zero,$tmp,12      # >>32
327         vxor            $in0,$in0,$tmp
328         vsldoi          $tmp,$zero,$tmp,12      # >>32
329         vxor            $in0,$in0,$tmp
330          vadduwm        $rcon,$rcon,$rcon
331         vxor            $in0,$in0,$key
332          vperm          $outtail,$in0,$in0,$outperm     # rotate
333          vsel           $stage,$outhead,$outtail,$outmask
334          vmr            $outhead,$outtail
335          stvx           $stage,0,$out
336          addi           $inp,$out,15            # 15 is not typo
337          addi           $out,$out,16
338         bdz             Ldone
339
340         vspltw          $key,$in0,3             # just splat
341         vsldoi          $tmp,$zero,$in1,12      # >>32
342         vsbox           $key,$key
343
344         vxor            $in1,$in1,$tmp
345         vsldoi          $tmp,$zero,$tmp,12      # >>32
346         vxor            $in1,$in1,$tmp
347         vsldoi          $tmp,$zero,$tmp,12      # >>32
348         vxor            $in1,$in1,$tmp
349
350         vxor            $in1,$in1,$key
351         b               Loop256
352
353 .align  4
354 Ldone:
355         lvx             $in1,0,$inp             # redundant in aligned case
356         vsel            $in1,$outhead,$in1,$outmask
357         stvx            $in1,0,$inp
358         li              $ptr,0
359         mtspr           256,$vrsave
360         stw             $rounds,0($out)
361
362 Lenc_key_abort:
363         mr              r3,$ptr
364         blr
365         .long           0
366         .byte           0,12,0x14,1,0,0,3,0
367         .long           0
368 .size   .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
369
370 .globl  .${prefix}_set_decrypt_key
371 .align  5
372 .${prefix}_set_decrypt_key:
373         $STU            $sp,-$FRAME($sp)
374         mflr            r10
375         $PUSH           r10,$FRAME+$LRSAVE($sp)
376         bl              Lset_encrypt_key
377         mtlr            r10
378
379         cmpwi           r3,0
380         bne-            Ldec_key_abort
381
382         slwi            $cnt,$rounds,4
383         subi            $inp,$out,240           # first round key
384         srwi            $rounds,$rounds,1
385         add             $out,$inp,$cnt          # last round key
386         mtctr           $rounds
387
388 Ldeckey:
389         lwz             r0, 0($inp)
390         lwz             r6, 4($inp)
391         lwz             r7, 8($inp)
392         lwz             r8, 12($inp)
393         addi            $inp,$inp,16
394         lwz             r9, 0($out)
395         lwz             r10,4($out)
396         lwz             r11,8($out)
397         lwz             r12,12($out)
398         stw             r0, 0($out)
399         stw             r6, 4($out)
400         stw             r7, 8($out)
401         stw             r8, 12($out)
402         subi            $out,$out,16
403         stw             r9, -16($inp)
404         stw             r10,-12($inp)
405         stw             r11,-8($inp)
406         stw             r12,-4($inp)
407         bdnz            Ldeckey
408
409         xor             r3,r3,r3                # return value
410 Ldec_key_abort:
411         addi            $sp,$sp,$FRAME
412         blr
413         .long           0
414         .byte           0,12,4,1,0x80,0,3,0
415         .long           0
416 .size   .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
417 ___
418 }}}
419 #########################################################################
420 {{{     # Single block en- and decrypt procedures                       #
421 sub gen_block () {
422 my $dir = shift;
423 my $n   = $dir eq "de" ? "n" : "";
424 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
425
426 $code.=<<___;
427 .globl  .${prefix}_${dir}crypt
428 .align  5
429 .${prefix}_${dir}crypt:
430         lwz             $rounds,240($key)
431         lis             r0,0xfc00
432         mfspr           $vrsave,256
433         li              $idx,15                 # 15 is not typo
434         mtspr           256,r0
435
436         lvx             v0,0,$inp
437         neg             r11,$out
438         lvx             v1,$idx,$inp
439         lvsl            v2,0,$inp               # inpperm
440         le?vspltisb     v4,0x0f
441         ?lvsl           v3,0,r11                # outperm
442         le?vxor         v2,v2,v4
443         li              $idx,16
444         vperm           v0,v0,v1,v2             # align [and byte swap in LE]
445         lvx             v1,0,$key
446         ?lvsl           v5,0,$key               # keyperm
447         srwi            $rounds,$rounds,1
448         lvx             v2,$idx,$key
449         addi            $idx,$idx,16
450         subi            $rounds,$rounds,1
451         ?vperm          v1,v1,v2,v5             # align round key
452
453         vxor            v0,v0,v1
454         lvx             v1,$idx,$key
455         addi            $idx,$idx,16
456         mtctr           $rounds
457
458 Loop_${dir}c:
459         ?vperm          v2,v2,v1,v5
460         v${n}cipher     v0,v0,v2
461         lvx             v2,$idx,$key
462         addi            $idx,$idx,16
463         ?vperm          v1,v1,v2,v5
464         v${n}cipher     v0,v0,v1
465         lvx             v1,$idx,$key
466         addi            $idx,$idx,16
467         bdnz            Loop_${dir}c
468
469         ?vperm          v2,v2,v1,v5
470         v${n}cipher     v0,v0,v2
471         lvx             v2,$idx,$key
472         ?vperm          v1,v1,v2,v5
473         v${n}cipherlast v0,v0,v1
474
475         vspltisb        v2,-1
476         vxor            v1,v1,v1
477         li              $idx,15                 # 15 is not typo
478         ?vperm          v2,v1,v2,v3             # outmask
479         le?vxor         v3,v3,v4
480         lvx             v1,0,$out               # outhead
481         vperm           v0,v0,v0,v3             # rotate [and byte swap in LE]
482         vsel            v1,v1,v0,v2
483         lvx             v4,$idx,$out
484         stvx            v1,0,$out
485         vsel            v0,v0,v4,v2
486         stvx            v0,$idx,$out
487
488         mtspr           256,$vrsave
489         blr
490         .long           0
491         .byte           0,12,0x14,0,0,0,3,0
492         .long           0
493 .size   .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
494 ___
495 }
496 &gen_block("en");
497 &gen_block("de");
498 }}}
499 #########################################################################
500 {{{     # CBC en- and decrypt procedures                                #
501 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
502 my ($rndkey0,$rndkey1,$inout,$tmp)=             map("v$_",(0..3));
503 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
504                                                 map("v$_",(4..10));
505 $code.=<<___;
506 .globl  .${prefix}_cbc_encrypt
507 .align  5
508 .${prefix}_cbc_encrypt:
509         ${UCMP}i        $len,16
510         bltlr-
511
512         cmpwi           $enc,0                  # test direction
513         lis             r0,0xffe0
514         mfspr           $vrsave,256
515         mtspr           256,r0
516
517         li              $idx,15
518         vxor            $rndkey0,$rndkey0,$rndkey0
519         le?vspltisb     $tmp,0x0f
520
521         lvx             $ivec,0,$ivp            # load [unaligned] iv
522         lvsl            $inpperm,0,$ivp
523         lvx             $inptail,$idx,$ivp
524         le?vxor         $inpperm,$inpperm,$tmp
525         vperm           $ivec,$ivec,$inptail,$inpperm
526
527         neg             r11,$inp
528         ?lvsl           $keyperm,0,$key         # prepare for unaligned key
529         lwz             $rounds,240($key)
530
531         lvsr            $inpperm,0,r11          # prepare for unaligned load
532         lvx             $inptail,0,$inp
533         addi            $inp,$inp,15            # 15 is not typo
534         le?vxor         $inpperm,$inpperm,$tmp
535
536         ?lvsr           $outperm,0,$out         # prepare for unaligned store
537         vspltisb        $outmask,-1
538         lvx             $outhead,0,$out
539         ?vperm          $outmask,$rndkey0,$outmask,$outperm
540         le?vxor         $outperm,$outperm,$tmp
541
542         srwi            $rounds,$rounds,1
543         li              $idx,16
544         subi            $rounds,$rounds,1
545         beq             Lcbc_dec
546
547 Lcbc_enc:
548         vmr             $inout,$inptail
549         lvx             $inptail,0,$inp
550         addi            $inp,$inp,16
551         mtctr           $rounds
552         subi            $len,$len,16            # len-=16
553
554         lvx             $rndkey0,0,$key
555          vperm          $inout,$inout,$inptail,$inpperm
556         lvx             $rndkey1,$idx,$key
557         addi            $idx,$idx,16
558         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
559         vxor            $inout,$inout,$rndkey0
560         lvx             $rndkey0,$idx,$key
561         addi            $idx,$idx,16
562         vxor            $inout,$inout,$ivec
563
564 Loop_cbc_enc:
565         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
566         vcipher         $inout,$inout,$rndkey1
567         lvx             $rndkey1,$idx,$key
568         addi            $idx,$idx,16
569         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
570         vcipher         $inout,$inout,$rndkey0
571         lvx             $rndkey0,$idx,$key
572         addi            $idx,$idx,16
573         bdnz            Loop_cbc_enc
574
575         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
576         vcipher         $inout,$inout,$rndkey1
577         lvx             $rndkey1,$idx,$key
578         li              $idx,16
579         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
580         vcipherlast     $ivec,$inout,$rndkey0
581         ${UCMP}i        $len,16
582
583         vperm           $tmp,$ivec,$ivec,$outperm
584         vsel            $inout,$outhead,$tmp,$outmask
585         vmr             $outhead,$tmp
586         stvx            $inout,0,$out
587         addi            $out,$out,16
588         bge             Lcbc_enc
589
590         b               Lcbc_done
591
592 .align  4
593 Lcbc_dec:
594         ${UCMP}i        $len,128
595         bge             _aesp8_cbc_decrypt8x
596         vmr             $tmp,$inptail
597         lvx             $inptail,0,$inp
598         addi            $inp,$inp,16
599         mtctr           $rounds
600         subi            $len,$len,16            # len-=16
601
602         lvx             $rndkey0,0,$key
603          vperm          $tmp,$tmp,$inptail,$inpperm
604         lvx             $rndkey1,$idx,$key
605         addi            $idx,$idx,16
606         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
607         vxor            $inout,$tmp,$rndkey0
608         lvx             $rndkey0,$idx,$key
609         addi            $idx,$idx,16
610
611 Loop_cbc_dec:
612         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
613         vncipher        $inout,$inout,$rndkey1
614         lvx             $rndkey1,$idx,$key
615         addi            $idx,$idx,16
616         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
617         vncipher        $inout,$inout,$rndkey0
618         lvx             $rndkey0,$idx,$key
619         addi            $idx,$idx,16
620         bdnz            Loop_cbc_dec
621
622         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
623         vncipher        $inout,$inout,$rndkey1
624         lvx             $rndkey1,$idx,$key
625         li              $idx,16
626         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
627         vncipherlast    $inout,$inout,$rndkey0
628         ${UCMP}i        $len,16
629
630         vxor            $inout,$inout,$ivec
631         vmr             $ivec,$tmp
632         vperm           $tmp,$inout,$inout,$outperm
633         vsel            $inout,$outhead,$tmp,$outmask
634         vmr             $outhead,$tmp
635         stvx            $inout,0,$out
636         addi            $out,$out,16
637         bge             Lcbc_dec
638
639 Lcbc_done:
640         addi            $out,$out,-1
641         lvx             $inout,0,$out           # redundant in aligned case
642         vsel            $inout,$outhead,$inout,$outmask
643         stvx            $inout,0,$out
644
645         neg             $enc,$ivp               # write [unaligned] iv
646         li              $idx,15                 # 15 is not typo
647         vxor            $rndkey0,$rndkey0,$rndkey0
648         vspltisb        $outmask,-1
649         le?vspltisb     $tmp,0x0f
650         ?lvsl           $outperm,0,$enc
651         ?vperm          $outmask,$rndkey0,$outmask,$outperm
652         le?vxor         $outperm,$outperm,$tmp
653         lvx             $outhead,0,$ivp
654         vperm           $ivec,$ivec,$ivec,$outperm
655         vsel            $inout,$outhead,$ivec,$outmask
656         lvx             $inptail,$idx,$ivp
657         stvx            $inout,0,$ivp
658         vsel            $inout,$ivec,$inptail,$outmask
659         stvx            $inout,$idx,$ivp
660
661         mtspr           256,$vrsave
662         blr
663         .long           0
664         .byte           0,12,0x14,0,0,0,6,0
665         .long           0
666 ___
667 #########################################################################
668 {{      # Optimized CBC decrypt procedure                               #
669 my $key_="r11";
670 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
671     $x00=0 if ($flavour =~ /osx/);
672 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
673 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
674 my $rndkey0="v23";      # v24-v25 rotating buffer for first found keys
675                         # v26-v31 last 6 round keys
676 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
677
678 $code.=<<___;
679 .align  5
680 _aesp8_cbc_decrypt8x:
681         $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
682         li              r10,`$FRAME+8*16+15`
683         li              r11,`$FRAME+8*16+31`
684         stvx            v20,r10,$sp             # ABI says so
685         addi            r10,r10,32
686         stvx            v21,r11,$sp
687         addi            r11,r11,32
688         stvx            v22,r10,$sp
689         addi            r10,r10,32
690         stvx            v23,r11,$sp
691         addi            r11,r11,32
692         stvx            v24,r10,$sp
693         addi            r10,r10,32
694         stvx            v25,r11,$sp
695         addi            r11,r11,32
696         stvx            v26,r10,$sp
697         addi            r10,r10,32
698         stvx            v27,r11,$sp
699         addi            r11,r11,32
700         stvx            v28,r10,$sp
701         addi            r10,r10,32
702         stvx            v29,r11,$sp
703         addi            r11,r11,32
704         stvx            v30,r10,$sp
705         stvx            v31,r11,$sp
706         li              r0,-1
707         stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
708         li              $x10,0x10
709         $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
710         li              $x20,0x20
711         $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
712         li              $x30,0x30
713         $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
714         li              $x40,0x40
715         $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
716         li              $x50,0x50
717         $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
718         li              $x60,0x60
719         $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
720         li              $x70,0x70
721         mtspr           256,r0
722
723         subi            $rounds,$rounds,3       # -4 in total
724         subi            $len,$len,128           # bias
725
726         lvx             $rndkey0,$x00,$key      # load key schedule
727         lvx             v30,$x10,$key
728         addi            $key,$key,0x20
729         lvx             v31,$x00,$key
730         ?vperm          $rndkey0,$rndkey0,v30,$keyperm
731         addi            $key_,$sp,$FRAME+15
732         mtctr           $rounds
733
734 Load_cbc_dec_key:
735         ?vperm          v24,v30,v31,$keyperm
736         lvx             v30,$x10,$key
737         addi            $key,$key,0x20
738         stvx            v24,$x00,$key_          # off-load round[1]
739         ?vperm          v25,v31,v30,$keyperm
740         lvx             v31,$x00,$key
741         stvx            v25,$x10,$key_          # off-load round[2]
742         addi            $key_,$key_,0x20
743         bdnz            Load_cbc_dec_key
744
745         lvx             v26,$x10,$key
746         ?vperm          v24,v30,v31,$keyperm
747         lvx             v27,$x20,$key
748         stvx            v24,$x00,$key_          # off-load round[3]
749         ?vperm          v25,v31,v26,$keyperm
750         lvx             v28,$x30,$key
751         stvx            v25,$x10,$key_          # off-load round[4]
752         addi            $key_,$sp,$FRAME+15     # rewind $key_
753         ?vperm          v26,v26,v27,$keyperm
754         lvx             v29,$x40,$key
755         ?vperm          v27,v27,v28,$keyperm
756         lvx             v30,$x50,$key
757         ?vperm          v28,v28,v29,$keyperm
758         lvx             v31,$x60,$key
759         ?vperm          v29,v29,v30,$keyperm
760         lvx             $out0,$x70,$key         # borrow $out0
761         ?vperm          v30,v30,v31,$keyperm
762         lvx             v24,$x00,$key_          # pre-load round[1]
763         ?vperm          v31,v31,$out0,$keyperm
764         lvx             v25,$x10,$key_          # pre-load round[2]
765
766         #lvx            $inptail,0,$inp         # "caller" already did this
767         #addi           $inp,$inp,15            # 15 is not typo
768         subi            $inp,$inp,15            # undo "caller"
769
770          le?li          $idx,8
771         lvx_u           $in0,$x00,$inp          # load first 8 "words"
772          le?lvsl        $inpperm,0,$idx
773          le?vspltisb    $tmp,0x0f
774         lvx_u           $in1,$x10,$inp
775          le?vxor        $inpperm,$inpperm,$tmp  # transform for lvx_u/stvx_u
776         lvx_u           $in2,$x20,$inp
777          le?vperm       $in0,$in0,$in0,$inpperm
778         lvx_u           $in3,$x30,$inp
779          le?vperm       $in1,$in1,$in1,$inpperm
780         lvx_u           $in4,$x40,$inp
781          le?vperm       $in2,$in2,$in2,$inpperm
782         vxor            $out0,$in0,$rndkey0
783         lvx_u           $in5,$x50,$inp
784          le?vperm       $in3,$in3,$in3,$inpperm
785         vxor            $out1,$in1,$rndkey0
786         lvx_u           $in6,$x60,$inp
787          le?vperm       $in4,$in4,$in4,$inpperm
788         vxor            $out2,$in2,$rndkey0
789         lvx_u           $in7,$x70,$inp
790         addi            $inp,$inp,0x80
791          le?vperm       $in5,$in5,$in5,$inpperm
792         vxor            $out3,$in3,$rndkey0
793          le?vperm       $in6,$in6,$in6,$inpperm
794         vxor            $out4,$in4,$rndkey0
795          le?vperm       $in7,$in7,$in7,$inpperm
796         vxor            $out5,$in5,$rndkey0
797         vxor            $out6,$in6,$rndkey0
798         vxor            $out7,$in7,$rndkey0
799
800         mtctr           $rounds
801         b               Loop_cbc_dec8x
802 .align  5
803 Loop_cbc_dec8x:
804         vncipher        $out0,$out0,v24
805         vncipher        $out1,$out1,v24
806         vncipher        $out2,$out2,v24
807         vncipher        $out3,$out3,v24
808         vncipher        $out4,$out4,v24
809         vncipher        $out5,$out5,v24
810         vncipher        $out6,$out6,v24
811         vncipher        $out7,$out7,v24
812         lvx             v24,$x20,$key_          # round[3]
813         addi            $key_,$key_,0x20
814
815         vncipher        $out0,$out0,v25
816         vncipher        $out1,$out1,v25
817         vncipher        $out2,$out2,v25
818         vncipher        $out3,$out3,v25
819         vncipher        $out4,$out4,v25
820         vncipher        $out5,$out5,v25
821         vncipher        $out6,$out6,v25
822         vncipher        $out7,$out7,v25
823         lvx             v25,$x10,$key_          # round[4]
824         bdnz            Loop_cbc_dec8x
825
826         subic           $len,$len,128           # $len-=128
827         vncipher        $out0,$out0,v24
828         vncipher        $out1,$out1,v24
829         vncipher        $out2,$out2,v24
830         vncipher        $out3,$out3,v24
831         vncipher        $out4,$out4,v24
832         vncipher        $out5,$out5,v24
833         vncipher        $out6,$out6,v24
834         vncipher        $out7,$out7,v24
835
836         subfe.          r0,r0,r0                # borrow?-1:0
837         vncipher        $out0,$out0,v25
838         vncipher        $out1,$out1,v25
839         vncipher        $out2,$out2,v25
840         vncipher        $out3,$out3,v25
841         vncipher        $out4,$out4,v25
842         vncipher        $out5,$out5,v25
843         vncipher        $out6,$out6,v25
844         vncipher        $out7,$out7,v25
845
846         and             r0,r0,$len
847         vncipher        $out0,$out0,v26
848         vncipher        $out1,$out1,v26
849         vncipher        $out2,$out2,v26
850         vncipher        $out3,$out3,v26
851         vncipher        $out4,$out4,v26
852         vncipher        $out5,$out5,v26
853         vncipher        $out6,$out6,v26
854         vncipher        $out7,$out7,v26
855
856         add             $inp,$inp,r0            # $inp is adjusted in such
857                                                 # way that at exit from the
858                                                 # loop inX-in7 are loaded
859                                                 # with last "words"
860         vncipher        $out0,$out0,v27
861         vncipher        $out1,$out1,v27
862         vncipher        $out2,$out2,v27
863         vncipher        $out3,$out3,v27
864         vncipher        $out4,$out4,v27
865         vncipher        $out5,$out5,v27
866         vncipher        $out6,$out6,v27
867         vncipher        $out7,$out7,v27
868
869         addi            $key_,$sp,$FRAME+15     # rewind $key_
870         vncipher        $out0,$out0,v28
871         vncipher        $out1,$out1,v28
872         vncipher        $out2,$out2,v28
873         vncipher        $out3,$out3,v28
874         vncipher        $out4,$out4,v28
875         vncipher        $out5,$out5,v28
876         vncipher        $out6,$out6,v28
877         vncipher        $out7,$out7,v28
878         lvx             v24,$x00,$key_          # re-pre-load round[1]
879
880         vncipher        $out0,$out0,v29
881         vncipher        $out1,$out1,v29
882         vncipher        $out2,$out2,v29
883         vncipher        $out3,$out3,v29
884         vncipher        $out4,$out4,v29
885         vncipher        $out5,$out5,v29
886         vncipher        $out6,$out6,v29
887         vncipher        $out7,$out7,v29
888         lvx             v25,$x10,$key_          # re-pre-load round[2]
889
890         vncipher        $out0,$out0,v30
891          vxor           $ivec,$ivec,v31         # xor with last round key
892         vncipher        $out1,$out1,v30
893          vxor           $in0,$in0,v31
894         vncipher        $out2,$out2,v30
895          vxor           $in1,$in1,v31
896         vncipher        $out3,$out3,v30
897          vxor           $in2,$in2,v31
898         vncipher        $out4,$out4,v30
899          vxor           $in3,$in3,v31
900         vncipher        $out5,$out5,v30
901          vxor           $in4,$in4,v31
902         vncipher        $out6,$out6,v30
903          vxor           $in5,$in5,v31
904         vncipher        $out7,$out7,v30
905          vxor           $in6,$in6,v31
906
907         vncipherlast    $out0,$out0,$ivec
908         vncipherlast    $out1,$out1,$in0
909          lvx_u          $in0,$x00,$inp          # load next input block
910         vncipherlast    $out2,$out2,$in1
911          lvx_u          $in1,$x10,$inp
912         vncipherlast    $out3,$out3,$in2
913          le?vperm       $in0,$in0,$in0,$inpperm
914          lvx_u          $in2,$x20,$inp
915         vncipherlast    $out4,$out4,$in3
916          le?vperm       $in1,$in1,$in1,$inpperm
917          lvx_u          $in3,$x30,$inp
918         vncipherlast    $out5,$out5,$in4
919          le?vperm       $in2,$in2,$in2,$inpperm
920          lvx_u          $in4,$x40,$inp
921         vncipherlast    $out6,$out6,$in5
922          le?vperm       $in3,$in3,$in3,$inpperm
923          lvx_u          $in5,$x50,$inp
924         vncipherlast    $out7,$out7,$in6
925          le?vperm       $in4,$in4,$in4,$inpperm
926          lvx_u          $in6,$x60,$inp
927         vmr             $ivec,$in7
928          le?vperm       $in5,$in5,$in5,$inpperm
929          lvx_u          $in7,$x70,$inp
930          addi           $inp,$inp,0x80
931
932         le?vperm        $out0,$out0,$out0,$inpperm
933         le?vperm        $out1,$out1,$out1,$inpperm
934         stvx_u          $out0,$x00,$out
935          le?vperm       $in6,$in6,$in6,$inpperm
936          vxor           $out0,$in0,$rndkey0
937         le?vperm        $out2,$out2,$out2,$inpperm
938         stvx_u          $out1,$x10,$out
939          le?vperm       $in7,$in7,$in7,$inpperm
940          vxor           $out1,$in1,$rndkey0
941         le?vperm        $out3,$out3,$out3,$inpperm
942         stvx_u          $out2,$x20,$out
943          vxor           $out2,$in2,$rndkey0
944         le?vperm        $out4,$out4,$out4,$inpperm
945         stvx_u          $out3,$x30,$out
946          vxor           $out3,$in3,$rndkey0
947         le?vperm        $out5,$out5,$out5,$inpperm
948         stvx_u          $out4,$x40,$out
949          vxor           $out4,$in4,$rndkey0
950         le?vperm        $out6,$out6,$out6,$inpperm
951         stvx_u          $out5,$x50,$out
952          vxor           $out5,$in5,$rndkey0
953         le?vperm        $out7,$out7,$out7,$inpperm
954         stvx_u          $out6,$x60,$out
955          vxor           $out6,$in6,$rndkey0
956         stvx_u          $out7,$x70,$out
957         addi            $out,$out,0x80
958          vxor           $out7,$in7,$rndkey0
959
960         mtctr           $rounds
961         beq             Loop_cbc_dec8x          # did $len-=128 borrow?
962
963         addic.          $len,$len,128
964         beq             Lcbc_dec8x_done
965         nop
966         nop
967
968 Loop_cbc_dec8x_tail:                            # up to 7 "words" tail...
969         vncipher        $out1,$out1,v24
970         vncipher        $out2,$out2,v24
971         vncipher        $out3,$out3,v24
972         vncipher        $out4,$out4,v24
973         vncipher        $out5,$out5,v24
974         vncipher        $out6,$out6,v24
975         vncipher        $out7,$out7,v24
976         lvx             v24,$x20,$key_          # round[3]
977         addi            $key_,$key_,0x20
978
979         vncipher        $out1,$out1,v25
980         vncipher        $out2,$out2,v25
981         vncipher        $out3,$out3,v25
982         vncipher        $out4,$out4,v25
983         vncipher        $out5,$out5,v25
984         vncipher        $out6,$out6,v25
985         vncipher        $out7,$out7,v25
986         lvx             v25,$x10,$key_          # round[4]
987         bdnz            Loop_cbc_dec8x_tail
988
989         vncipher        $out1,$out1,v24
990         vncipher        $out2,$out2,v24
991         vncipher        $out3,$out3,v24
992         vncipher        $out4,$out4,v24
993         vncipher        $out5,$out5,v24
994         vncipher        $out6,$out6,v24
995         vncipher        $out7,$out7,v24
996
997         vncipher        $out1,$out1,v25
998         vncipher        $out2,$out2,v25
999         vncipher        $out3,$out3,v25
1000         vncipher        $out4,$out4,v25
1001         vncipher        $out5,$out5,v25
1002         vncipher        $out6,$out6,v25
1003         vncipher        $out7,$out7,v25
1004
1005         vncipher        $out1,$out1,v26
1006         vncipher        $out2,$out2,v26
1007         vncipher        $out3,$out3,v26
1008         vncipher        $out4,$out4,v26
1009         vncipher        $out5,$out5,v26
1010         vncipher        $out6,$out6,v26
1011         vncipher        $out7,$out7,v26
1012
1013         vncipher        $out1,$out1,v27
1014         vncipher        $out2,$out2,v27
1015         vncipher        $out3,$out3,v27
1016         vncipher        $out4,$out4,v27
1017         vncipher        $out5,$out5,v27
1018         vncipher        $out6,$out6,v27
1019         vncipher        $out7,$out7,v27
1020
1021         vncipher        $out1,$out1,v28
1022         vncipher        $out2,$out2,v28
1023         vncipher        $out3,$out3,v28
1024         vncipher        $out4,$out4,v28
1025         vncipher        $out5,$out5,v28
1026         vncipher        $out6,$out6,v28
1027         vncipher        $out7,$out7,v28
1028
1029         vncipher        $out1,$out1,v29
1030         vncipher        $out2,$out2,v29
1031         vncipher        $out3,$out3,v29
1032         vncipher        $out4,$out4,v29
1033         vncipher        $out5,$out5,v29
1034         vncipher        $out6,$out6,v29
1035         vncipher        $out7,$out7,v29
1036
1037         vncipher        $out1,$out1,v30
1038          vxor           $ivec,$ivec,v31         # last round key
1039         vncipher        $out2,$out2,v30
1040          vxor           $in1,$in1,v31
1041         vncipher        $out3,$out3,v30
1042          vxor           $in2,$in2,v31
1043         vncipher        $out4,$out4,v30
1044          vxor           $in3,$in3,v31
1045         vncipher        $out5,$out5,v30
1046          vxor           $in4,$in4,v31
1047         vncipher        $out6,$out6,v30
1048          vxor           $in5,$in5,v31
1049         vncipher        $out7,$out7,v30
1050          vxor           $in6,$in6,v31
1051
1052         cmplwi          $len,32                 # switch($len)
1053         blt             Lcbc_dec8x_one
1054         nop
1055         beq             Lcbc_dec8x_two
1056         cmplwi          $len,64
1057         blt             Lcbc_dec8x_three
1058         nop
1059         beq             Lcbc_dec8x_four
1060         cmplwi          $len,96
1061         blt             Lcbc_dec8x_five
1062         nop
1063         beq             Lcbc_dec8x_six
1064
1065 Lcbc_dec8x_seven:
1066         vncipherlast    $out1,$out1,$ivec
1067         vncipherlast    $out2,$out2,$in1
1068         vncipherlast    $out3,$out3,$in2
1069         vncipherlast    $out4,$out4,$in3
1070         vncipherlast    $out5,$out5,$in4
1071         vncipherlast    $out6,$out6,$in5
1072         vncipherlast    $out7,$out7,$in6
1073         vmr             $ivec,$in7
1074
1075         le?vperm        $out1,$out1,$out1,$inpperm
1076         le?vperm        $out2,$out2,$out2,$inpperm
1077         stvx_u          $out1,$x00,$out
1078         le?vperm        $out3,$out3,$out3,$inpperm
1079         stvx_u          $out2,$x10,$out
1080         le?vperm        $out4,$out4,$out4,$inpperm
1081         stvx_u          $out3,$x20,$out
1082         le?vperm        $out5,$out5,$out5,$inpperm
1083         stvx_u          $out4,$x30,$out
1084         le?vperm        $out6,$out6,$out6,$inpperm
1085         stvx_u          $out5,$x40,$out
1086         le?vperm        $out7,$out7,$out7,$inpperm
1087         stvx_u          $out6,$x50,$out
1088         stvx_u          $out7,$x60,$out
1089         addi            $out,$out,0x70
1090         b               Lcbc_dec8x_done
1091
1092 .align  5
1093 Lcbc_dec8x_six:
1094         vncipherlast    $out2,$out2,$ivec
1095         vncipherlast    $out3,$out3,$in2
1096         vncipherlast    $out4,$out4,$in3
1097         vncipherlast    $out5,$out5,$in4
1098         vncipherlast    $out6,$out6,$in5
1099         vncipherlast    $out7,$out7,$in6
1100         vmr             $ivec,$in7
1101
1102         le?vperm        $out2,$out2,$out2,$inpperm
1103         le?vperm        $out3,$out3,$out3,$inpperm
1104         stvx_u          $out2,$x00,$out
1105         le?vperm        $out4,$out4,$out4,$inpperm
1106         stvx_u          $out3,$x10,$out
1107         le?vperm        $out5,$out5,$out5,$inpperm
1108         stvx_u          $out4,$x20,$out
1109         le?vperm        $out6,$out6,$out6,$inpperm
1110         stvx_u          $out5,$x30,$out
1111         le?vperm        $out7,$out7,$out7,$inpperm
1112         stvx_u          $out6,$x40,$out
1113         stvx_u          $out7,$x50,$out
1114         addi            $out,$out,0x60
1115         b               Lcbc_dec8x_done
1116
1117 .align  5
1118 Lcbc_dec8x_five:
1119         vncipherlast    $out3,$out3,$ivec
1120         vncipherlast    $out4,$out4,$in3
1121         vncipherlast    $out5,$out5,$in4
1122         vncipherlast    $out6,$out6,$in5
1123         vncipherlast    $out7,$out7,$in6
1124         vmr             $ivec,$in7
1125
1126         le?vperm        $out3,$out3,$out3,$inpperm
1127         le?vperm        $out4,$out4,$out4,$inpperm
1128         stvx_u          $out3,$x00,$out
1129         le?vperm        $out5,$out5,$out5,$inpperm
1130         stvx_u          $out4,$x10,$out
1131         le?vperm        $out6,$out6,$out6,$inpperm
1132         stvx_u          $out5,$x20,$out
1133         le?vperm        $out7,$out7,$out7,$inpperm
1134         stvx_u          $out6,$x30,$out
1135         stvx_u          $out7,$x40,$out
1136         addi            $out,$out,0x50
1137         b               Lcbc_dec8x_done
1138
1139 .align  5
1140 Lcbc_dec8x_four:
1141         vncipherlast    $out4,$out4,$ivec
1142         vncipherlast    $out5,$out5,$in4
1143         vncipherlast    $out6,$out6,$in5
1144         vncipherlast    $out7,$out7,$in6
1145         vmr             $ivec,$in7
1146
1147         le?vperm        $out4,$out4,$out4,$inpperm
1148         le?vperm        $out5,$out5,$out5,$inpperm
1149         stvx_u          $out4,$x00,$out
1150         le?vperm        $out6,$out6,$out6,$inpperm
1151         stvx_u          $out5,$x10,$out
1152         le?vperm        $out7,$out7,$out7,$inpperm
1153         stvx_u          $out6,$x20,$out
1154         stvx_u          $out7,$x30,$out
1155         addi            $out,$out,0x40
1156         b               Lcbc_dec8x_done
1157
1158 .align  5
1159 Lcbc_dec8x_three:
1160         vncipherlast    $out5,$out5,$ivec
1161         vncipherlast    $out6,$out6,$in5
1162         vncipherlast    $out7,$out7,$in6
1163         vmr             $ivec,$in7
1164
1165         le?vperm        $out5,$out5,$out5,$inpperm
1166         le?vperm        $out6,$out6,$out6,$inpperm
1167         stvx_u          $out5,$x00,$out
1168         le?vperm        $out7,$out7,$out7,$inpperm
1169         stvx_u          $out6,$x10,$out
1170         stvx_u          $out7,$x20,$out
1171         addi            $out,$out,0x30
1172         b               Lcbc_dec8x_done
1173
1174 .align  5
1175 Lcbc_dec8x_two:
1176         vncipherlast    $out6,$out6,$ivec
1177         vncipherlast    $out7,$out7,$in6
1178         vmr             $ivec,$in7
1179
1180         le?vperm        $out6,$out6,$out6,$inpperm
1181         le?vperm        $out7,$out7,$out7,$inpperm
1182         stvx_u          $out6,$x00,$out
1183         stvx_u          $out7,$x10,$out
1184         addi            $out,$out,0x20
1185         b               Lcbc_dec8x_done
1186
1187 .align  5
1188 Lcbc_dec8x_one:
1189         vncipherlast    $out7,$out7,$ivec
1190         vmr             $ivec,$in7
1191
1192         le?vperm        $out7,$out7,$out7,$inpperm
1193         stvx_u          $out7,0,$out
1194         addi            $out,$out,0x10
1195
1196 Lcbc_dec8x_done:
1197         le?vperm        $ivec,$ivec,$ivec,$inpperm
1198         stvx_u          $ivec,0,$ivp            # write [unaligned] iv
1199
1200         li              r10,`$FRAME+15`
1201         li              r11,`$FRAME+31`
1202         stvx            $inpperm,r10,$sp        # wipe copies of round keys
1203         addi            r10,r10,32
1204         stvx            $inpperm,r11,$sp
1205         addi            r11,r11,32
1206         stvx            $inpperm,r10,$sp
1207         addi            r10,r10,32
1208         stvx            $inpperm,r11,$sp
1209         addi            r11,r11,32
1210         stvx            $inpperm,r10,$sp
1211         addi            r10,r10,32
1212         stvx            $inpperm,r11,$sp
1213         addi            r11,r11,32
1214         stvx            $inpperm,r10,$sp
1215         addi            r10,r10,32
1216         stvx            $inpperm,r11,$sp
1217         addi            r11,r11,32
1218
1219         mtspr           256,$vrsave
1220         lvx             v20,r10,$sp             # ABI says so
1221         addi            r10,r10,32
1222         lvx             v21,r11,$sp
1223         addi            r11,r11,32
1224         lvx             v22,r10,$sp
1225         addi            r10,r10,32
1226         lvx             v23,r11,$sp
1227         addi            r11,r11,32
1228         lvx             v24,r10,$sp
1229         addi            r10,r10,32
1230         lvx             v25,r11,$sp
1231         addi            r11,r11,32
1232         lvx             v26,r10,$sp
1233         addi            r10,r10,32
1234         lvx             v27,r11,$sp
1235         addi            r11,r11,32
1236         lvx             v28,r10,$sp
1237         addi            r10,r10,32
1238         lvx             v29,r11,$sp
1239         addi            r11,r11,32
1240         lvx             v30,r10,$sp
1241         lvx             v31,r11,$sp
1242         $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1243         $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1244         $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1245         $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1246         $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1247         $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1248         addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1249         blr
1250         .long           0
1251         .byte           0,12,0x04,0,0x80,6,6,0
1252         .long           0
1253 .size   .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1254 ___
1255 }}      }}}
1256
1257 #########################################################################
1258 {{{     # CTR procedure[s]                                              #
1259 my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1260 my ($rndkey0,$rndkey1,$inout,$tmp)=             map("v$_",(0..3));
1261 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1262                                                 map("v$_",(4..11));
1263 my $dat=$tmp;
1264
1265 $code.=<<___;
1266 .globl  .${prefix}_ctr32_encrypt_blocks
1267 .align  5
1268 .${prefix}_ctr32_encrypt_blocks:
1269         ${UCMP}i        $len,1
1270         bltlr-
1271
1272         lis             r0,0xfff0
1273         mfspr           $vrsave,256
1274         mtspr           256,r0
1275
1276         li              $idx,15
1277         vxor            $rndkey0,$rndkey0,$rndkey0
1278         le?vspltisb     $tmp,0x0f
1279
1280         lvx             $ivec,0,$ivp            # load [unaligned] iv
1281         lvsl            $inpperm,0,$ivp
1282         lvx             $inptail,$idx,$ivp
1283          vspltisb       $one,1
1284         le?vxor         $inpperm,$inpperm,$tmp
1285         vperm           $ivec,$ivec,$inptail,$inpperm
1286          vsldoi         $one,$rndkey0,$one,1
1287
1288         neg             r11,$inp
1289         ?lvsl           $keyperm,0,$key         # prepare for unaligned key
1290         lwz             $rounds,240($key)
1291
1292         lvsr            $inpperm,0,r11          # prepare for unaligned load
1293         lvx             $inptail,0,$inp
1294         addi            $inp,$inp,15            # 15 is not typo
1295         le?vxor         $inpperm,$inpperm,$tmp
1296
1297         srwi            $rounds,$rounds,1
1298         li              $idx,16
1299         subi            $rounds,$rounds,1
1300
1301         ${UCMP}i        $len,8
1302         bge             _aesp8_ctr32_encrypt8x
1303
1304         ?lvsr           $outperm,0,$out         # prepare for unaligned store
1305         vspltisb        $outmask,-1
1306         lvx             $outhead,0,$out
1307         ?vperm          $outmask,$rndkey0,$outmask,$outperm
1308         le?vxor         $outperm,$outperm,$tmp
1309
1310         lvx             $rndkey0,0,$key
1311         mtctr           $rounds
1312         lvx             $rndkey1,$idx,$key
1313         addi            $idx,$idx,16
1314         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
1315         vxor            $inout,$ivec,$rndkey0
1316         lvx             $rndkey0,$idx,$key
1317         addi            $idx,$idx,16
1318         b               Loop_ctr32_enc
1319
1320 .align  5
1321 Loop_ctr32_enc:
1322         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
1323         vcipher         $inout,$inout,$rndkey1
1324         lvx             $rndkey1,$idx,$key
1325         addi            $idx,$idx,16
1326         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
1327         vcipher         $inout,$inout,$rndkey0
1328         lvx             $rndkey0,$idx,$key
1329         addi            $idx,$idx,16
1330         bdnz            Loop_ctr32_enc
1331
1332         vadduwm         $ivec,$ivec,$one
1333          vmr            $dat,$inptail
1334          lvx            $inptail,0,$inp
1335          addi           $inp,$inp,16
1336          subic.         $len,$len,1             # blocks--
1337
1338         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
1339         vcipher         $inout,$inout,$rndkey1
1340         lvx             $rndkey1,$idx,$key
1341          vperm          $dat,$dat,$inptail,$inpperm
1342          li             $idx,16
1343         ?vperm          $rndkey1,$rndkey0,$rndkey1,$keyperm
1344          lvx            $rndkey0,0,$key
1345         vxor            $dat,$dat,$rndkey1      # last round key
1346         vcipherlast     $inout,$inout,$dat
1347
1348          lvx            $rndkey1,$idx,$key
1349          addi           $idx,$idx,16
1350         vperm           $inout,$inout,$inout,$outperm
1351         vsel            $dat,$outhead,$inout,$outmask
1352          mtctr          $rounds
1353          ?vperm         $rndkey0,$rndkey0,$rndkey1,$keyperm
1354         vmr             $outhead,$inout
1355          vxor           $inout,$ivec,$rndkey0
1356          lvx            $rndkey0,$idx,$key
1357          addi           $idx,$idx,16
1358         stvx            $dat,0,$out
1359         addi            $out,$out,16
1360         bne             Loop_ctr32_enc
1361
1362         addi            $out,$out,-1
1363         lvx             $inout,0,$out           # redundant in aligned case
1364         vsel            $inout,$outhead,$inout,$outmask
1365         stvx            $inout,0,$out
1366
1367         mtspr           256,$vrsave
1368         blr
1369         .long           0
1370         .byte           0,12,0x14,0,0,0,6,0
1371         .long           0
1372 ___
1373 #########################################################################
1374 {{      # Optimized CTR procedure                                       #
1375 my $key_="r11";
1376 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1377     $x00=0 if ($flavour =~ /osx/);
1378 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1379 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1380 my $rndkey0="v23";      # v24-v25 rotating buffer for first found keys
1381                         # v26-v31 last 6 round keys
1382 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1383 my ($two,$three,$four)=($outhead,$outperm,$outmask);
1384
1385 $code.=<<___;
1386 .align  5
1387 _aesp8_ctr32_encrypt8x:
1388         $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1389         li              r10,`$FRAME+8*16+15`
1390         li              r11,`$FRAME+8*16+31`
1391         stvx            v20,r10,$sp             # ABI says so
1392         addi            r10,r10,32
1393         stvx            v21,r11,$sp
1394         addi            r11,r11,32
1395         stvx            v22,r10,$sp
1396         addi            r10,r10,32
1397         stvx            v23,r11,$sp
1398         addi            r11,r11,32
1399         stvx            v24,r10,$sp
1400         addi            r10,r10,32
1401         stvx            v25,r11,$sp
1402         addi            r11,r11,32
1403         stvx            v26,r10,$sp
1404         addi            r10,r10,32
1405         stvx            v27,r11,$sp
1406         addi            r11,r11,32
1407         stvx            v28,r10,$sp
1408         addi            r10,r10,32
1409         stvx            v29,r11,$sp
1410         addi            r11,r11,32
1411         stvx            v30,r10,$sp
1412         stvx            v31,r11,$sp
1413         li              r0,-1
1414         stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
1415         li              $x10,0x10
1416         $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1417         li              $x20,0x20
1418         $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1419         li              $x30,0x30
1420         $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1421         li              $x40,0x40
1422         $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1423         li              $x50,0x50
1424         $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1425         li              $x60,0x60
1426         $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1427         li              $x70,0x70
1428         mtspr           256,r0
1429
1430         subi            $rounds,$rounds,3       # -4 in total
1431
1432         lvx             $rndkey0,$x00,$key      # load key schedule
1433         lvx             v30,$x10,$key
1434         addi            $key,$key,0x20
1435         lvx             v31,$x00,$key
1436         ?vperm          $rndkey0,$rndkey0,v30,$keyperm
1437         addi            $key_,$sp,$FRAME+15
1438         mtctr           $rounds
1439
1440 Load_ctr32_enc_key:
1441         ?vperm          v24,v30,v31,$keyperm
1442         lvx             v30,$x10,$key
1443         addi            $key,$key,0x20
1444         stvx            v24,$x00,$key_          # off-load round[1]
1445         ?vperm          v25,v31,v30,$keyperm
1446         lvx             v31,$x00,$key
1447         stvx            v25,$x10,$key_          # off-load round[2]
1448         addi            $key_,$key_,0x20
1449         bdnz            Load_ctr32_enc_key
1450
1451         lvx             v26,$x10,$key
1452         ?vperm          v24,v30,v31,$keyperm
1453         lvx             v27,$x20,$key
1454         stvx            v24,$x00,$key_          # off-load round[3]
1455         ?vperm          v25,v31,v26,$keyperm
1456         lvx             v28,$x30,$key
1457         stvx            v25,$x10,$key_          # off-load round[4]
1458         addi            $key_,$sp,$FRAME+15     # rewind $key_
1459         ?vperm          v26,v26,v27,$keyperm
1460         lvx             v29,$x40,$key
1461         ?vperm          v27,v27,v28,$keyperm
1462         lvx             v30,$x50,$key
1463         ?vperm          v28,v28,v29,$keyperm
1464         lvx             v31,$x60,$key
1465         ?vperm          v29,v29,v30,$keyperm
1466         lvx             $out0,$x70,$key         # borrow $out0
1467         ?vperm          v30,v30,v31,$keyperm
1468         lvx             v24,$x00,$key_          # pre-load round[1]
1469         ?vperm          v31,v31,$out0,$keyperm
1470         lvx             v25,$x10,$key_          # pre-load round[2]
1471
1472         vadduwm         $two,$one,$one
1473         subi            $inp,$inp,15            # undo "caller"
1474         $SHL            $len,$len,4
1475
1476         vadduwm         $out1,$ivec,$one        # counter values ...
1477         vadduwm         $out2,$ivec,$two
1478         vxor            $out0,$ivec,$rndkey0    # ... xored with rndkey[0]
1479          le?li          $idx,8
1480         vadduwm         $out3,$out1,$two
1481         vxor            $out1,$out1,$rndkey0
1482          le?lvsl        $inpperm,0,$idx
1483         vadduwm         $out4,$out2,$two
1484         vxor            $out2,$out2,$rndkey0
1485          le?vspltisb    $tmp,0x0f
1486         vadduwm         $out5,$out3,$two
1487         vxor            $out3,$out3,$rndkey0
1488          le?vxor        $inpperm,$inpperm,$tmp  # transform for lvx_u/stvx_u
1489         vadduwm         $out6,$out4,$two
1490         vxor            $out4,$out4,$rndkey0
1491         vadduwm         $out7,$out5,$two
1492         vxor            $out5,$out5,$rndkey0
1493         vadduwm         $ivec,$out6,$two        # next counter value
1494         vxor            $out6,$out6,$rndkey0
1495         vxor            $out7,$out7,$rndkey0
1496
1497         mtctr           $rounds
1498         b               Loop_ctr32_enc8x
1499 .align  5
1500 Loop_ctr32_enc8x:
1501         vcipher         $out0,$out0,v24
1502         vcipher         $out1,$out1,v24
1503         vcipher         $out2,$out2,v24
1504         vcipher         $out3,$out3,v24
1505         vcipher         $out4,$out4,v24
1506         vcipher         $out5,$out5,v24
1507         vcipher         $out6,$out6,v24
1508         vcipher         $out7,$out7,v24
1509 Loop_ctr32_enc8x_middle:
1510         lvx             v24,$x20,$key_          # round[3]
1511         addi            $key_,$key_,0x20
1512
1513         vcipher         $out0,$out0,v25
1514         vcipher         $out1,$out1,v25
1515         vcipher         $out2,$out2,v25
1516         vcipher         $out3,$out3,v25
1517         vcipher         $out4,$out4,v25
1518         vcipher         $out5,$out5,v25
1519         vcipher         $out6,$out6,v25
1520         vcipher         $out7,$out7,v25
1521         lvx             v25,$x10,$key_          # round[4]
1522         bdnz            Loop_ctr32_enc8x
1523
1524         subic           r11,$len,256            # $len-256, borrow $key_
1525         vcipher         $out0,$out0,v24
1526         vcipher         $out1,$out1,v24
1527         vcipher         $out2,$out2,v24
1528         vcipher         $out3,$out3,v24
1529         vcipher         $out4,$out4,v24
1530         vcipher         $out5,$out5,v24
1531         vcipher         $out6,$out6,v24
1532         vcipher         $out7,$out7,v24
1533
1534         subfe           r0,r0,r0                # borrow?-1:0
1535         vcipher         $out0,$out0,v25
1536         vcipher         $out1,$out1,v25
1537         vcipher         $out2,$out2,v25
1538         vcipher         $out3,$out3,v25
1539         vcipher         $out4,$out4,v25
1540         vcipher         $out5,$out5,v25
1541         vcipher         $out6,$out6,v25
1542         vcipher         $out7,$out7,v25
1543
1544         and             r0,r0,r11
1545         addi            $key_,$sp,$FRAME+15     # rewind $key_
1546         vcipher         $out0,$out0,v26
1547         vcipher         $out1,$out1,v26
1548         vcipher         $out2,$out2,v26
1549         vcipher         $out3,$out3,v26
1550         vcipher         $out4,$out4,v26
1551         vcipher         $out5,$out5,v26
1552         vcipher         $out6,$out6,v26
1553         vcipher         $out7,$out7,v26
1554         lvx             v24,$x00,$key_          # re-pre-load round[1]
1555
1556         subic           $len,$len,129           # $len-=129
1557         vcipher         $out0,$out0,v27
1558         addi            $len,$len,1             # $len-=128 really
1559         vcipher         $out1,$out1,v27
1560         vcipher         $out2,$out2,v27
1561         vcipher         $out3,$out3,v27
1562         vcipher         $out4,$out4,v27
1563         vcipher         $out5,$out5,v27
1564         vcipher         $out6,$out6,v27
1565         vcipher         $out7,$out7,v27
1566         lvx             v25,$x10,$key_          # re-pre-load round[2]
1567
1568         vcipher         $out0,$out0,v28
1569          lvx_u          $in0,$x00,$inp          # load input
1570         vcipher         $out1,$out1,v28
1571          lvx_u          $in1,$x10,$inp
1572         vcipher         $out2,$out2,v28
1573          lvx_u          $in2,$x20,$inp
1574         vcipher         $out3,$out3,v28
1575          lvx_u          $in3,$x30,$inp
1576         vcipher         $out4,$out4,v28
1577          lvx_u          $in4,$x40,$inp
1578         vcipher         $out5,$out5,v28
1579          lvx_u          $in5,$x50,$inp
1580         vcipher         $out6,$out6,v28
1581          lvx_u          $in6,$x60,$inp
1582         vcipher         $out7,$out7,v28
1583          lvx_u          $in7,$x70,$inp
1584          addi           $inp,$inp,0x80
1585
1586         vcipher         $out0,$out0,v29
1587          le?vperm       $in0,$in0,$in0,$inpperm
1588         vcipher         $out1,$out1,v29
1589          le?vperm       $in1,$in1,$in1,$inpperm
1590         vcipher         $out2,$out2,v29
1591          le?vperm       $in2,$in2,$in2,$inpperm
1592         vcipher         $out3,$out3,v29
1593          le?vperm       $in3,$in3,$in3,$inpperm
1594         vcipher         $out4,$out4,v29
1595          le?vperm       $in4,$in4,$in4,$inpperm
1596         vcipher         $out5,$out5,v29
1597          le?vperm       $in5,$in5,$in5,$inpperm
1598         vcipher         $out6,$out6,v29
1599          le?vperm       $in6,$in6,$in6,$inpperm
1600         vcipher         $out7,$out7,v29
1601          le?vperm       $in7,$in7,$in7,$inpperm
1602
1603         add             $inp,$inp,r0            # $inp is adjusted in such
1604                                                 # way that at exit from the
1605                                                 # loop inX-in7 are loaded
1606                                                 # with last "words"
1607         subfe.          r0,r0,r0                # borrow?-1:0
1608         vcipher         $out0,$out0,v30
1609          vxor           $in0,$in0,v31           # xor with last round key
1610         vcipher         $out1,$out1,v30
1611          vxor           $in1,$in1,v31
1612         vcipher         $out2,$out2,v30
1613          vxor           $in2,$in2,v31
1614         vcipher         $out3,$out3,v30
1615          vxor           $in3,$in3,v31
1616         vcipher         $out4,$out4,v30
1617          vxor           $in4,$in4,v31
1618         vcipher         $out5,$out5,v30
1619          vxor           $in5,$in5,v31
1620         vcipher         $out6,$out6,v30
1621          vxor           $in6,$in6,v31
1622         vcipher         $out7,$out7,v30
1623          vxor           $in7,$in7,v31
1624
1625         bne             Lctr32_enc8x_break      # did $len-129 borrow?
1626
1627         vcipherlast     $in0,$out0,$in0
1628         vcipherlast     $in1,$out1,$in1
1629          vadduwm        $out1,$ivec,$one        # counter values ...
1630         vcipherlast     $in2,$out2,$in2
1631          vadduwm        $out2,$ivec,$two
1632          vxor           $out0,$ivec,$rndkey0    # ... xored with rndkey[0]
1633         vcipherlast     $in3,$out3,$in3
1634          vadduwm        $out3,$out1,$two
1635          vxor           $out1,$out1,$rndkey0
1636         vcipherlast     $in4,$out4,$in4
1637          vadduwm        $out4,$out2,$two
1638          vxor           $out2,$out2,$rndkey0
1639         vcipherlast     $in5,$out5,$in5
1640          vadduwm        $out5,$out3,$two
1641          vxor           $out3,$out3,$rndkey0
1642         vcipherlast     $in6,$out6,$in6
1643          vadduwm        $out6,$out4,$two
1644          vxor           $out4,$out4,$rndkey0
1645         vcipherlast     $in7,$out7,$in7
1646          vadduwm        $out7,$out5,$two
1647          vxor           $out5,$out5,$rndkey0
1648         le?vperm        $in0,$in0,$in0,$inpperm
1649          vadduwm        $ivec,$out6,$two        # next counter value
1650          vxor           $out6,$out6,$rndkey0
1651         le?vperm        $in1,$in1,$in1,$inpperm
1652          vxor           $out7,$out7,$rndkey0
1653         mtctr           $rounds
1654
1655          vcipher        $out0,$out0,v24
1656         stvx_u          $in0,$x00,$out
1657         le?vperm        $in2,$in2,$in2,$inpperm
1658          vcipher        $out1,$out1,v24
1659         stvx_u          $in1,$x10,$out
1660         le?vperm        $in3,$in3,$in3,$inpperm
1661          vcipher        $out2,$out2,v24
1662         stvx_u          $in2,$x20,$out
1663         le?vperm        $in4,$in4,$in4,$inpperm
1664          vcipher        $out3,$out3,v24
1665         stvx_u          $in3,$x30,$out
1666         le?vperm        $in5,$in5,$in5,$inpperm
1667          vcipher        $out4,$out4,v24
1668         stvx_u          $in4,$x40,$out
1669         le?vperm        $in6,$in6,$in6,$inpperm
1670          vcipher        $out5,$out5,v24
1671         stvx_u          $in5,$x50,$out
1672         le?vperm        $in7,$in7,$in7,$inpperm
1673          vcipher        $out6,$out6,v24
1674         stvx_u          $in6,$x60,$out
1675          vcipher        $out7,$out7,v24
1676         stvx_u          $in7,$x70,$out
1677         addi            $out,$out,0x80
1678
1679         b               Loop_ctr32_enc8x_middle
1680
1681 .align  5
1682 Lctr32_enc8x_break:
1683         cmpwi           $len,-0x60
1684         blt             Lctr32_enc8x_one
1685         nop
1686         beq             Lctr32_enc8x_two
1687         cmpwi           $len,-0x40
1688         blt             Lctr32_enc8x_three
1689         nop
1690         beq             Lctr32_enc8x_four
1691         cmpwi           $len,-0x20
1692         blt             Lctr32_enc8x_five
1693         nop
1694         beq             Lctr32_enc8x_six
1695         cmpwi           $len,0x00
1696         blt             Lctr32_enc8x_seven
1697
1698 Lctr32_enc8x_eight:
1699         vcipherlast     $out0,$out0,$in0
1700         vcipherlast     $out1,$out1,$in1
1701         vcipherlast     $out2,$out2,$in2
1702         vcipherlast     $out3,$out3,$in3
1703         vcipherlast     $out4,$out4,$in4
1704         vcipherlast     $out5,$out5,$in5
1705         vcipherlast     $out6,$out6,$in6
1706         vcipherlast     $out7,$out7,$in7
1707
1708         le?vperm        $out0,$out0,$out0,$inpperm
1709         le?vperm        $out1,$out1,$out1,$inpperm
1710         stvx_u          $out0,$x00,$out
1711         le?vperm        $out2,$out2,$out2,$inpperm
1712         stvx_u          $out1,$x10,$out
1713         le?vperm        $out3,$out3,$out3,$inpperm
1714         stvx_u          $out2,$x20,$out
1715         le?vperm        $out4,$out4,$out4,$inpperm
1716         stvx_u          $out3,$x30,$out
1717         le?vperm        $out5,$out5,$out5,$inpperm
1718         stvx_u          $out4,$x40,$out
1719         le?vperm        $out6,$out6,$out6,$inpperm
1720         stvx_u          $out5,$x50,$out
1721         le?vperm        $out7,$out7,$out7,$inpperm
1722         stvx_u          $out6,$x60,$out
1723         stvx_u          $out7,$x70,$out
1724         addi            $out,$out,0x80
1725         b               Lctr32_enc8x_done
1726
1727 .align  5
1728 Lctr32_enc8x_seven:
1729         vcipherlast     $out0,$out0,$in1
1730         vcipherlast     $out1,$out1,$in2
1731         vcipherlast     $out2,$out2,$in3
1732         vcipherlast     $out3,$out3,$in4
1733         vcipherlast     $out4,$out4,$in5
1734         vcipherlast     $out5,$out5,$in6
1735         vcipherlast     $out6,$out6,$in7
1736
1737         le?vperm        $out0,$out0,$out0,$inpperm
1738         le?vperm        $out1,$out1,$out1,$inpperm
1739         stvx_u          $out0,$x00,$out
1740         le?vperm        $out2,$out2,$out2,$inpperm
1741         stvx_u          $out1,$x10,$out
1742         le?vperm        $out3,$out3,$out3,$inpperm
1743         stvx_u          $out2,$x20,$out
1744         le?vperm        $out4,$out4,$out4,$inpperm
1745         stvx_u          $out3,$x30,$out
1746         le?vperm        $out5,$out5,$out5,$inpperm
1747         stvx_u          $out4,$x40,$out
1748         le?vperm        $out6,$out6,$out6,$inpperm
1749         stvx_u          $out5,$x50,$out
1750         stvx_u          $out6,$x60,$out
1751         addi            $out,$out,0x70
1752         b               Lctr32_enc8x_done
1753
1754 .align  5
1755 Lctr32_enc8x_six:
1756         vcipherlast     $out0,$out0,$in2
1757         vcipherlast     $out1,$out1,$in3
1758         vcipherlast     $out2,$out2,$in4
1759         vcipherlast     $out3,$out3,$in5
1760         vcipherlast     $out4,$out4,$in6
1761         vcipherlast     $out5,$out5,$in7
1762
1763         le?vperm        $out0,$out0,$out0,$inpperm
1764         le?vperm        $out1,$out1,$out1,$inpperm
1765         stvx_u          $out0,$x00,$out
1766         le?vperm        $out2,$out2,$out2,$inpperm
1767         stvx_u          $out1,$x10,$out
1768         le?vperm        $out3,$out3,$out3,$inpperm
1769         stvx_u          $out2,$x20,$out
1770         le?vperm        $out4,$out4,$out4,$inpperm
1771         stvx_u          $out3,$x30,$out
1772         le?vperm        $out5,$out5,$out5,$inpperm
1773         stvx_u          $out4,$x40,$out
1774         stvx_u          $out5,$x50,$out
1775         addi            $out,$out,0x60
1776         b               Lctr32_enc8x_done
1777
1778 .align  5
1779 Lctr32_enc8x_five:
1780         vcipherlast     $out0,$out0,$in3
1781         vcipherlast     $out1,$out1,$in4
1782         vcipherlast     $out2,$out2,$in5
1783         vcipherlast     $out3,$out3,$in6
1784         vcipherlast     $out4,$out4,$in7
1785
1786         le?vperm        $out0,$out0,$out0,$inpperm
1787         le?vperm        $out1,$out1,$out1,$inpperm
1788         stvx_u          $out0,$x00,$out
1789         le?vperm        $out2,$out2,$out2,$inpperm
1790         stvx_u          $out1,$x10,$out
1791         le?vperm        $out3,$out3,$out3,$inpperm
1792         stvx_u          $out2,$x20,$out
1793         le?vperm        $out4,$out4,$out4,$inpperm
1794         stvx_u          $out3,$x30,$out
1795         stvx_u          $out4,$x40,$out
1796         addi            $out,$out,0x50
1797         b               Lctr32_enc8x_done
1798
1799 .align  5
1800 Lctr32_enc8x_four:
1801         vcipherlast     $out0,$out0,$in4
1802         vcipherlast     $out1,$out1,$in5
1803         vcipherlast     $out2,$out2,$in6
1804         vcipherlast     $out3,$out3,$in7
1805
1806         le?vperm        $out0,$out0,$out0,$inpperm
1807         le?vperm        $out1,$out1,$out1,$inpperm
1808         stvx_u          $out0,$x00,$out
1809         le?vperm        $out2,$out2,$out2,$inpperm
1810         stvx_u          $out1,$x10,$out
1811         le?vperm        $out3,$out3,$out3,$inpperm
1812         stvx_u          $out2,$x20,$out
1813         stvx_u          $out3,$x30,$out
1814         addi            $out,$out,0x40
1815         b               Lctr32_enc8x_done
1816
1817 .align  5
1818 Lctr32_enc8x_three:
1819         vcipherlast     $out0,$out0,$in5
1820         vcipherlast     $out1,$out1,$in6
1821         vcipherlast     $out2,$out2,$in7
1822
1823         le?vperm        $out0,$out0,$out0,$inpperm
1824         le?vperm        $out1,$out1,$out1,$inpperm
1825         stvx_u          $out0,$x00,$out
1826         le?vperm        $out2,$out2,$out2,$inpperm
1827         stvx_u          $out1,$x10,$out
1828         stvx_u          $out2,$x20,$out
1829         addi            $out,$out,0x30
1830         b               Lcbc_dec8x_done
1831
1832 .align  5
1833 Lctr32_enc8x_two:
1834         vcipherlast     $out0,$out0,$in6
1835         vcipherlast     $out1,$out1,$in7
1836
1837         le?vperm        $out0,$out0,$out0,$inpperm
1838         le?vperm        $out1,$out1,$out1,$inpperm
1839         stvx_u          $out0,$x00,$out
1840         stvx_u          $out1,$x10,$out
1841         addi            $out,$out,0x20
1842         b               Lcbc_dec8x_done
1843
1844 .align  5
1845 Lctr32_enc8x_one:
1846         vcipherlast     $out0,$out0,$in7
1847
1848         le?vperm        $out0,$out0,$out0,$inpperm
1849         stvx_u          $out0,0,$out
1850         addi            $out,$out,0x10
1851
1852 Lctr32_enc8x_done:
1853         li              r10,`$FRAME+15`
1854         li              r11,`$FRAME+31`
1855         stvx            $inpperm,r10,$sp        # wipe copies of round keys
1856         addi            r10,r10,32
1857         stvx            $inpperm,r11,$sp
1858         addi            r11,r11,32
1859         stvx            $inpperm,r10,$sp
1860         addi            r10,r10,32
1861         stvx            $inpperm,r11,$sp
1862         addi            r11,r11,32
1863         stvx            $inpperm,r10,$sp
1864         addi            r10,r10,32
1865         stvx            $inpperm,r11,$sp
1866         addi            r11,r11,32
1867         stvx            $inpperm,r10,$sp
1868         addi            r10,r10,32
1869         stvx            $inpperm,r11,$sp
1870         addi            r11,r11,32
1871
1872         mtspr           256,$vrsave
1873         lvx             v20,r10,$sp             # ABI says so
1874         addi            r10,r10,32
1875         lvx             v21,r11,$sp
1876         addi            r11,r11,32
1877         lvx             v22,r10,$sp
1878         addi            r10,r10,32
1879         lvx             v23,r11,$sp
1880         addi            r11,r11,32
1881         lvx             v24,r10,$sp
1882         addi            r10,r10,32
1883         lvx             v25,r11,$sp
1884         addi            r11,r11,32
1885         lvx             v26,r10,$sp
1886         addi            r10,r10,32
1887         lvx             v27,r11,$sp
1888         addi            r11,r11,32
1889         lvx             v28,r10,$sp
1890         addi            r10,r10,32
1891         lvx             v29,r11,$sp
1892         addi            r11,r11,32
1893         lvx             v30,r10,$sp
1894         lvx             v31,r11,$sp
1895         $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1896         $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1897         $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1898         $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1899         $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1900         $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1901         addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1902         blr
1903         .long           0
1904         .byte           0,12,0x04,0,0x80,6,6,0
1905         .long           0
1906 .size   .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
1907 ___
1908 }}      }}}
1909
1910 #########################################################################
1911 {{{     # XTS procedures                                                #
1912 my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) =     map("r$_",(3..10));
1913 my ($rndkey0,$rndkey1,$inout) =                         map("v$_",(0..2));
1914 my ($output,$inptail,$inpperm,$leperm,$keyperm) =       map("v$_",(3..7));
1915 my ($tweak,$seven,$eighty7,$tmp,$tweak1) =              map("v$_",(8..12));
1916 my $taillen = $key2;
1917
1918    ($inp,$idx) = ($idx,$inp);                           # reassign
1919
1920 $code.=<<___;
1921 .globl  .${prefix}_xts_encrypt
1922 .align  5
1923 .${prefix}_xts_encrypt:
1924         mr              $inp,r3                         # reassign
1925         li              r3,-1
1926         ${UCMP}i        $len,16
1927         bltlr-
1928
1929         lis             r0,0xfff0
1930         mfspr           r12,256                         # save vrsave
1931         li              r11,0
1932         mtspr           256,r0
1933
1934         vspltisb        $seven,0x07                     # 0x070707..07
1935         le?lvsl         $leperm,r11,r11
1936         le?vspltisb     $tmp,0x0f
1937         le?vxor         $leperm,$leperm,$seven
1938
1939         li              $idx,15
1940         lvx             $tweak,0,$ivp                   # load [unaligned] iv
1941         lvsl            $inpperm,0,$ivp
1942         lvx             $inptail,$idx,$ivp
1943         le?vxor         $inpperm,$inpperm,$tmp
1944         vperm           $tweak,$tweak,$inptail,$inpperm
1945
1946         ?lvsl           $keyperm,0,$key2                # prepare for unaligned key
1947         lwz             $rounds,240($key2)
1948         srwi            $rounds,$rounds,1
1949         subi            $rounds,$rounds,1
1950         li              $idx,16
1951
1952         neg             r11,$inp
1953         lvsr            $inpperm,0,r11                  # prepare for unaligned load
1954         lvx             $inout,0,$inp
1955         addi            $inp,$inp,15                    # 15 is not typo
1956         le?vxor         $inpperm,$inpperm,$tmp
1957
1958         lvx             $rndkey0,0,$key2
1959         lvx             $rndkey1,$idx,$key2
1960         addi            $idx,$idx,16
1961         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
1962         vxor            $tweak,$tweak,$rndkey0
1963         lvx             $rndkey0,$idx,$key2
1964         addi            $idx,$idx,16
1965         mtctr           $rounds
1966
1967 Ltweak_xts_enc:
1968         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
1969         vcipher         $tweak,$tweak,$rndkey1
1970         lvx             $rndkey1,$idx,$key2
1971         addi            $idx,$idx,16
1972         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
1973         vcipher         $tweak,$tweak,$rndkey0
1974         lvx             $rndkey0,$idx,$key2
1975         addi            $idx,$idx,16
1976         bdnz            Ltweak_xts_enc
1977
1978         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
1979         vcipher         $tweak,$tweak,$rndkey1
1980         lvx             $rndkey1,$idx,$key2
1981         li              $idx,16
1982         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
1983         vcipherlast     $tweak,$tweak,$rndkey0
1984
1985         lvx             $inptail,0,$inp
1986         addi            $inp,$inp,16
1987
1988         ?lvsl           $keyperm,0,$key1                # prepare for unaligned key
1989         lwz             $rounds,240($key1)
1990         srwi            $rounds,$rounds,1
1991         subi            $rounds,$rounds,1
1992         li              $idx,16
1993
1994         vslb            $eighty7,$seven,$seven          # 0x808080..80
1995         vor             $eighty7,$eighty7,$seven        # 0x878787..87
1996         vspltisb        $tmp,1                          # 0x010101..01
1997         vsldoi          $eighty7,$eighty7,$tmp,15       # 0x870101..01
1998
1999         ${UCMP}i        $len,96
2000         bge             _aesp8_xts_encrypt6x
2001
2002         andi.           $taillen,$len,15
2003         subic           r0,$len,32
2004         subi            $taillen,$taillen,16
2005         subfe           r0,r0,r0
2006         and             r0,r0,$taillen
2007         add             $inp,$inp,r0
2008
2009         lvx             $rndkey0,0,$key1
2010         lvx             $rndkey1,$idx,$key1
2011         addi            $idx,$idx,16
2012         vperm           $inout,$inout,$inptail,$inpperm
2013         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2014         vxor            $inout,$inout,$tweak
2015         vxor            $inout,$inout,$rndkey0
2016         lvx             $rndkey0,$idx,$key1
2017         addi            $idx,$idx,16
2018         mtctr           $rounds
2019         b               Loop_xts_enc
2020
2021 .align  5
2022 Loop_xts_enc:
2023         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2024         vcipher         $inout,$inout,$rndkey1
2025         lvx             $rndkey1,$idx,$key1
2026         addi            $idx,$idx,16
2027         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2028         vcipher         $inout,$inout,$rndkey0
2029         lvx             $rndkey0,$idx,$key1
2030         addi            $idx,$idx,16
2031         bdnz            Loop_xts_enc
2032
2033         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2034         vcipher         $inout,$inout,$rndkey1
2035         lvx             $rndkey1,$idx,$key1
2036         li              $idx,16
2037         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2038         vxor            $rndkey0,$rndkey0,$tweak
2039         vcipherlast     $output,$inout,$rndkey0
2040
2041         le?vperm        $tmp,$output,$output,$leperm
2042         be?nop
2043         le?stvx_u       $tmp,0,$out
2044         be?stvx_u       $output,0,$out
2045         addi            $out,$out,16
2046
2047         subic.          $len,$len,16
2048         beq             Lxts_enc_done
2049
2050         vmr             $inout,$inptail
2051         lvx             $inptail,0,$inp
2052         addi            $inp,$inp,16
2053         lvx             $rndkey0,0,$key1
2054         lvx             $rndkey1,$idx,$key1
2055         addi            $idx,$idx,16
2056
2057         subic           r0,$len,32
2058         subfe           r0,r0,r0
2059         and             r0,r0,$taillen
2060         add             $inp,$inp,r0
2061
2062         vsrab           $tmp,$tweak,$seven              # next tweak value
2063         vaddubm         $tweak,$tweak,$tweak
2064         vsldoi          $tmp,$tmp,$tmp,15
2065         vand            $tmp,$tmp,$eighty7
2066         vxor            $tweak,$tweak,$tmp
2067
2068         vperm           $inout,$inout,$inptail,$inpperm
2069         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2070         vxor            $inout,$inout,$tweak
2071         vxor            $output,$output,$rndkey0        # just in case $len<16
2072         vxor            $inout,$inout,$rndkey0
2073         lvx             $rndkey0,$idx,$key1
2074         addi            $idx,$idx,16
2075
2076         mtctr           $rounds
2077         ${UCMP}i        $len,16
2078         bge             Loop_xts_enc
2079
2080         vxor            $output,$output,$tweak
2081         lvsr            $inpperm,0,$len                 # $inpperm is no longer needed
2082         vxor            $inptail,$inptail,$inptail      # $inptail is no longer needed
2083         vspltisb        $tmp,-1
2084         vperm           $inptail,$inptail,$tmp,$inpperm
2085         vsel            $inout,$inout,$output,$inptail
2086
2087         subi            r11,$out,17
2088         subi            $out,$out,16
2089         mtctr           $len
2090         li              $len,16
2091 Loop_xts_enc_steal:
2092         lbzu            r0,1(r11)
2093         stb             r0,16(r11)
2094         bdnz            Loop_xts_enc_steal
2095
2096         mtctr           $rounds
2097         b               Loop_xts_enc                    # one more time...
2098
2099 Lxts_enc_done:
2100         mtspr           256,r12                         # restore vrsave
2101         li              r3,0
2102         blr
2103         .long           0
2104         .byte           0,12,0x04,0,0x80,6,6,0
2105         .long           0
2106 .size   .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
2107
2108 .globl  .${prefix}_xts_decrypt
2109 .align  5
2110 .${prefix}_xts_decrypt:
2111         mr              $inp,r3                         # reassign
2112         li              r3,-1
2113         ${UCMP}i        $len,16
2114         bltlr-
2115
2116         lis             r0,0xfff8
2117         mfspr           r12,256                         # save vrsave
2118         li              r11,0
2119         mtspr           256,r0
2120
2121         andi.           r0,$len,15
2122         neg             r0,r0
2123         andi.           r0,r0,16
2124         sub             $len,$len,r0
2125
2126         vspltisb        $seven,0x07                     # 0x070707..07
2127         le?lvsl         $leperm,r11,r11
2128         le?vspltisb     $tmp,0x0f
2129         le?vxor         $leperm,$leperm,$seven
2130
2131         li              $idx,15
2132         lvx             $tweak,0,$ivp                   # load [unaligned] iv
2133         lvsl            $inpperm,0,$ivp
2134         lvx             $inptail,$idx,$ivp
2135         le?vxor         $inpperm,$inpperm,$tmp
2136         vperm           $tweak,$tweak,$inptail,$inpperm
2137
2138         ?lvsl           $keyperm,0,$key2                # prepare for unaligned key
2139         lwz             $rounds,240($key2)
2140         srwi            $rounds,$rounds,1
2141         subi            $rounds,$rounds,1
2142         li              $idx,16
2143
2144         neg             r11,$inp
2145         lvsr            $inpperm,0,r11                  # prepare for unaligned load
2146         lvx             $inout,0,$inp
2147         addi            $inp,$inp,15                    # 15 is not typo
2148         le?vxor         $inpperm,$inpperm,$tmp
2149
2150         lvx             $rndkey0,0,$key2
2151         lvx             $rndkey1,$idx,$key2
2152         addi            $idx,$idx,16
2153         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2154         vxor            $tweak,$tweak,$rndkey0
2155         lvx             $rndkey0,$idx,$key2
2156         addi            $idx,$idx,16
2157         mtctr           $rounds
2158
2159 Ltweak_xts_dec:
2160         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2161         vcipher         $tweak,$tweak,$rndkey1
2162         lvx             $rndkey1,$idx,$key2
2163         addi            $idx,$idx,16
2164         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2165         vcipher         $tweak,$tweak,$rndkey0
2166         lvx             $rndkey0,$idx,$key2
2167         addi            $idx,$idx,16
2168         bdnz            Ltweak_xts_dec
2169
2170         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2171         vcipher         $tweak,$tweak,$rndkey1
2172         lvx             $rndkey1,$idx,$key2
2173         li              $idx,16
2174         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2175         vcipherlast     $tweak,$tweak,$rndkey0
2176
2177         lvx             $inptail,0,$inp
2178         addi            $inp,$inp,16
2179
2180         ?lvsl           $keyperm,0,$key1                # prepare for unaligned key
2181         lwz             $rounds,240($key1)
2182         srwi            $rounds,$rounds,1
2183         subi            $rounds,$rounds,1
2184         li              $idx,16
2185
2186         vslb            $eighty7,$seven,$seven          # 0x808080..80
2187         vor             $eighty7,$eighty7,$seven        # 0x878787..87
2188         vspltisb        $tmp,1                          # 0x010101..01
2189         vsldoi          $eighty7,$eighty7,$tmp,15       # 0x870101..01
2190
2191         ${UCMP}i        $len,96
2192         bge             _aesp8_xts_decrypt6x
2193
2194         lvx             $rndkey0,0,$key1
2195         lvx             $rndkey1,$idx,$key1
2196         addi            $idx,$idx,16
2197         vperm           $inout,$inout,$inptail,$inpperm
2198         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2199         vxor            $inout,$inout,$tweak
2200         vxor            $inout,$inout,$rndkey0
2201         lvx             $rndkey0,$idx,$key1
2202         addi            $idx,$idx,16
2203         mtctr           $rounds
2204
2205         ${UCMP}i        $len,16
2206         blt             Ltail_xts_dec
2207         be?b            Loop_xts_dec
2208
2209 .align  5
2210 Loop_xts_dec:
2211         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2212         vncipher        $inout,$inout,$rndkey1
2213         lvx             $rndkey1,$idx,$key1
2214         addi            $idx,$idx,16
2215         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2216         vncipher        $inout,$inout,$rndkey0
2217         lvx             $rndkey0,$idx,$key1
2218         addi            $idx,$idx,16
2219         bdnz            Loop_xts_dec
2220
2221         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2222         vncipher        $inout,$inout,$rndkey1
2223         lvx             $rndkey1,$idx,$key1
2224         li              $idx,16
2225         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2226         vxor            $rndkey0,$rndkey0,$tweak
2227         vncipherlast    $output,$inout,$rndkey0
2228
2229         le?vperm        $tmp,$output,$output,$leperm
2230         be?nop
2231         le?stvx_u       $tmp,0,$out
2232         be?stvx_u       $output,0,$out
2233         addi            $out,$out,16
2234
2235         subic.          $len,$len,16
2236         beq             Lxts_dec_done
2237
2238         vmr             $inout,$inptail
2239         lvx             $inptail,0,$inp
2240         addi            $inp,$inp,16
2241         lvx             $rndkey0,0,$key1
2242         lvx             $rndkey1,$idx,$key1
2243         addi            $idx,$idx,16
2244
2245         vsrab           $tmp,$tweak,$seven              # next tweak value
2246         vaddubm         $tweak,$tweak,$tweak
2247         vsldoi          $tmp,$tmp,$tmp,15
2248         vand            $tmp,$tmp,$eighty7
2249         vxor            $tweak,$tweak,$tmp
2250
2251         vperm           $inout,$inout,$inptail,$inpperm
2252         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2253         vxor            $inout,$inout,$tweak
2254         vxor            $inout,$inout,$rndkey0
2255         lvx             $rndkey0,$idx,$key1
2256         addi            $idx,$idx,16
2257
2258         mtctr           $rounds
2259         ${UCMP}i        $len,16
2260         bge             Loop_xts_dec
2261
2262 Ltail_xts_dec:
2263         vsrab           $tmp,$tweak,$seven              # next tweak value
2264         vaddubm         $tweak1,$tweak,$tweak
2265         vsldoi          $tmp,$tmp,$tmp,15
2266         vand            $tmp,$tmp,$eighty7
2267         vxor            $tweak1,$tweak1,$tmp
2268
2269         subi            $inp,$inp,16
2270         add             $inp,$inp,$len
2271
2272         vxor            $inout,$inout,$tweak            # :-(
2273         vxor            $inout,$inout,$tweak1           # :-)
2274
2275 Loop_xts_dec_short:
2276         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2277         vncipher        $inout,$inout,$rndkey1
2278         lvx             $rndkey1,$idx,$key1
2279         addi            $idx,$idx,16
2280         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2281         vncipher        $inout,$inout,$rndkey0
2282         lvx             $rndkey0,$idx,$key1
2283         addi            $idx,$idx,16
2284         bdnz            Loop_xts_dec_short
2285
2286         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2287         vncipher        $inout,$inout,$rndkey1
2288         lvx             $rndkey1,$idx,$key1
2289         li              $idx,16
2290         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2291         vxor            $rndkey0,$rndkey0,$tweak1
2292         vncipherlast    $output,$inout,$rndkey0
2293
2294         le?vperm        $tmp,$output,$output,$leperm
2295         be?nop
2296         le?stvx_u       $tmp,0,$out
2297         be?stvx_u       $output,0,$out
2298
2299         vmr             $inout,$inptail
2300         lvx             $inptail,0,$inp
2301         #addi           $inp,$inp,16
2302         lvx             $rndkey0,0,$key1
2303         lvx             $rndkey1,$idx,$key1
2304         addi            $idx,$idx,16
2305         vperm           $inout,$inout,$inptail,$inpperm
2306         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2307
2308         lvsr            $inpperm,0,$len                 # $inpperm is no longer needed
2309         vxor            $inptail,$inptail,$inptail      # $inptail is no longer needed
2310         vspltisb        $tmp,-1
2311         vperm           $inptail,$inptail,$tmp,$inpperm
2312         vsel            $inout,$inout,$output,$inptail
2313
2314         vxor            $rndkey0,$rndkey0,$tweak
2315         vxor            $inout,$inout,$rndkey0
2316         lvx             $rndkey0,$idx,$key1
2317         addi            $idx,$idx,16
2318
2319         subi            r11,$out,1
2320         mtctr           $len
2321         li              $len,16
2322 Loop_xts_dec_steal:
2323         lbzu            r0,1(r11)
2324         stb             r0,16(r11)
2325         bdnz            Loop_xts_dec_steal
2326
2327         mtctr           $rounds
2328         b               Loop_xts_dec                    # one more time...
2329
2330 Lxts_dec_done:
2331         mtspr           256,r12                         # restore vrsave
2332         li              r3,0
2333         blr
2334         .long           0
2335         .byte           0,12,0x04,0,0x80,6,6,0
2336         .long           0
2337 .size   .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
2338 ___
2339 #########################################################################
2340 {{      # Optimized XTS procedures                                      #
2341 my $key_="r11";
2342 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
2343     $x00=0 if ($flavour =~ /osx/);
2344 my ($in0,  $in1,  $in2,  $in3,  $in4,  $in5 )=map("v$_",(0..5));
2345 my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
2346 my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
2347 my $rndkey0="v23";      # v24-v25 rotating buffer for first found keys
2348                         # v26-v31 last 6 round keys
2349 my ($keyperm)=($out0);  # aliases with "caller", redundant assignment
2350 my $taillen=$x70;
2351
2352 $code.=<<___;
2353 .align  5
2354 _aesp8_xts_encrypt6x:
2355         $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
2356         mflr            r0
2357         li              r7,`$FRAME+8*16+15`
2358         li              r8,`$FRAME+8*16+31`
2359         $PUSH           r0,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
2360         stvx            v20,r7,$sp              # ABI says so
2361         addi            r7,r7,32
2362         stvx            v21,r8,$sp
2363         addi            r8,r8,32
2364         stvx            v22,r7,$sp
2365         addi            r7,r7,32
2366         stvx            v23,r8,$sp
2367         addi            r8,r8,32
2368         stvx            v24,r7,$sp
2369         addi            r7,r7,32
2370         stvx            v25,r8,$sp
2371         addi            r8,r8,32
2372         stvx            v26,r7,$sp
2373         addi            r7,r7,32
2374         stvx            v27,r8,$sp
2375         addi            r8,r8,32
2376         stvx            v28,r7,$sp
2377         addi            r7,r7,32
2378         stvx            v29,r8,$sp
2379         addi            r8,r8,32
2380         stvx            v30,r7,$sp
2381         stvx            v31,r8,$sp
2382         mr              r7,r0
2383         li              r0,-1
2384         stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
2385         li              $x10,0x10
2386         $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2387         li              $x20,0x20
2388         $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2389         li              $x30,0x30
2390         $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2391         li              $x40,0x40
2392         $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2393         li              $x50,0x50
2394         $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2395         li              $x60,0x60
2396         $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2397         li              $x70,0x70
2398         mtspr           256,r0
2399
2400         subi            $rounds,$rounds,3       # -4 in total
2401
2402         lvx             $rndkey0,$x00,$key1     # load key schedule
2403         lvx             v30,$x10,$key1
2404         addi            $key1,$key1,0x20
2405         lvx             v31,$x00,$key1
2406         ?vperm          $rndkey0,$rndkey0,v30,$keyperm
2407         addi            $key_,$sp,$FRAME+15
2408         mtctr           $rounds
2409
2410 Load_xts_enc_key:
2411         ?vperm          v24,v30,v31,$keyperm
2412         lvx             v30,$x10,$key1
2413         addi            $key1,$key1,0x20
2414         stvx            v24,$x00,$key_          # off-load round[1]
2415         ?vperm          v25,v31,v30,$keyperm
2416         lvx             v31,$x00,$key1
2417         stvx            v25,$x10,$key_          # off-load round[2]
2418         addi            $key_,$key_,0x20
2419         bdnz            Load_xts_enc_key
2420
2421         lvx             v26,$x10,$key1
2422         ?vperm          v24,v30,v31,$keyperm
2423         lvx             v27,$x20,$key1
2424         stvx            v24,$x00,$key_          # off-load round[3]
2425         ?vperm          v25,v31,v26,$keyperm
2426         lvx             v28,$x30,$key1
2427         stvx            v25,$x10,$key_          # off-load round[4]
2428         addi            $key_,$sp,$FRAME+15     # rewind $key_
2429         ?vperm          v26,v26,v27,$keyperm
2430         lvx             v29,$x40,$key1
2431         ?vperm          v27,v27,v28,$keyperm
2432         lvx             v30,$x50,$key1
2433         ?vperm          v28,v28,v29,$keyperm
2434         lvx             v31,$x60,$key1
2435         ?vperm          v29,v29,v30,$keyperm
2436         lvx             $twk5,$x70,$key1        # borrow $twk5
2437         ?vperm          v30,v30,v31,$keyperm
2438         lvx             v24,$x00,$key_          # pre-load round[1]
2439         ?vperm          v31,v31,$twk5,$keyperm
2440         lvx             v25,$x10,$key_          # pre-load round[2]
2441
2442          vperm          $in0,$inout,$inptail,$inpperm
2443          subi           $inp,$inp,31            # undo "caller"
2444         vxor            $twk0,$tweak,$rndkey0
2445         vsrab           $tmp,$tweak,$seven      # next tweak value
2446         vaddubm         $tweak,$tweak,$tweak
2447         vsldoi          $tmp,$tmp,$tmp,15
2448         vand            $tmp,$tmp,$eighty7
2449          vxor           $out0,$in0,$twk0
2450         vxor            $tweak,$tweak,$tmp
2451
2452          lvx_u          $in1,$x10,$inp
2453         vxor            $twk1,$tweak,$rndkey0
2454         vsrab           $tmp,$tweak,$seven      # next tweak value
2455         vaddubm         $tweak,$tweak,$tweak
2456         vsldoi          $tmp,$tmp,$tmp,15
2457          le?vperm       $in1,$in1,$in1,$leperm
2458         vand            $tmp,$tmp,$eighty7
2459          vxor           $out1,$in1,$twk1
2460         vxor            $tweak,$tweak,$tmp
2461
2462          lvx_u          $in2,$x20,$inp
2463          andi.          $taillen,$len,15
2464         vxor            $twk2,$tweak,$rndkey0
2465         vsrab           $tmp,$tweak,$seven      # next tweak value
2466         vaddubm         $tweak,$tweak,$tweak
2467         vsldoi          $tmp,$tmp,$tmp,15
2468          le?vperm       $in2,$in2,$in2,$leperm
2469         vand            $tmp,$tmp,$eighty7
2470          vxor           $out2,$in2,$twk2
2471         vxor            $tweak,$tweak,$tmp
2472
2473          lvx_u          $in3,$x30,$inp
2474          sub            $len,$len,$taillen
2475         vxor            $twk3,$tweak,$rndkey0
2476         vsrab           $tmp,$tweak,$seven      # next tweak value
2477         vaddubm         $tweak,$tweak,$tweak
2478         vsldoi          $tmp,$tmp,$tmp,15
2479          le?vperm       $in3,$in3,$in3,$leperm
2480         vand            $tmp,$tmp,$eighty7
2481          vxor           $out3,$in3,$twk3
2482         vxor            $tweak,$tweak,$tmp
2483
2484          lvx_u          $in4,$x40,$inp
2485          subi           $len,$len,0x60
2486         vxor            $twk4,$tweak,$rndkey0
2487         vsrab           $tmp,$tweak,$seven      # next tweak value
2488         vaddubm         $tweak,$tweak,$tweak
2489         vsldoi          $tmp,$tmp,$tmp,15
2490          le?vperm       $in4,$in4,$in4,$leperm
2491         vand            $tmp,$tmp,$eighty7
2492          vxor           $out4,$in4,$twk4
2493         vxor            $tweak,$tweak,$tmp
2494
2495          lvx_u          $in5,$x50,$inp
2496          addi           $inp,$inp,0x60
2497         vxor            $twk5,$tweak,$rndkey0
2498         vsrab           $tmp,$tweak,$seven      # next tweak value
2499         vaddubm         $tweak,$tweak,$tweak
2500         vsldoi          $tmp,$tmp,$tmp,15
2501          le?vperm       $in5,$in5,$in5,$leperm
2502         vand            $tmp,$tmp,$eighty7
2503          vxor           $out5,$in5,$twk5
2504         vxor            $tweak,$tweak,$tmp
2505
2506         vxor            v31,v31,$rndkey0
2507         mtctr           $rounds
2508         b               Loop_xts_enc6x
2509
2510 .align  5
2511 Loop_xts_enc6x:
2512         vcipher         $out0,$out0,v24
2513         vcipher         $out1,$out1,v24
2514         vcipher         $out2,$out2,v24
2515         vcipher         $out3,$out3,v24
2516         vcipher         $out4,$out4,v24
2517         vcipher         $out5,$out5,v24
2518         lvx             v24,$x20,$key_          # round[3]
2519         addi            $key_,$key_,0x20
2520
2521         vcipher         $out0,$out0,v25
2522         vcipher         $out1,$out1,v25
2523         vcipher         $out2,$out2,v25
2524         vcipher         $out3,$out3,v25
2525         vcipher         $out4,$out4,v25
2526         vcipher         $out5,$out5,v25
2527         lvx             v25,$x10,$key_          # round[4]
2528         bdnz            Loop_xts_enc6x
2529
2530         subic           $len,$len,96            # $len-=96
2531          vxor           $in0,$twk0,v31          # xor with last round key
2532         vcipher         $out0,$out0,v24
2533         vcipher         $out1,$out1,v24
2534          vsrab          $tmp,$tweak,$seven      # next tweak value
2535          vxor           $twk0,$tweak,$rndkey0
2536          vaddubm        $tweak,$tweak,$tweak
2537         vcipher         $out2,$out2,v24
2538         vcipher         $out3,$out3,v24
2539          vsldoi         $tmp,$tmp,$tmp,15
2540         vcipher         $out4,$out4,v24
2541         vcipher         $out5,$out5,v24
2542
2543         subfe.          r0,r0,r0                # borrow?-1:0
2544          vand           $tmp,$tmp,$eighty7
2545         vcipher         $out0,$out0,v25
2546         vcipher         $out1,$out1,v25
2547          vxor           $tweak,$tweak,$tmp
2548         vcipher         $out2,$out2,v25
2549         vcipher         $out3,$out3,v25
2550          vxor           $in1,$twk1,v31
2551          vsrab          $tmp,$tweak,$seven      # next tweak value
2552          vxor           $twk1,$tweak,$rndkey0
2553         vcipher         $out4,$out4,v25
2554         vcipher         $out5,$out5,v25
2555
2556         and             r0,r0,$len
2557          vaddubm        $tweak,$tweak,$tweak
2558          vsldoi         $tmp,$tmp,$tmp,15
2559         vcipher         $out0,$out0,v26
2560         vcipher         $out1,$out1,v26
2561          vand           $tmp,$tmp,$eighty7
2562         vcipher         $out2,$out2,v26
2563         vcipher         $out3,$out3,v26
2564          vxor           $tweak,$tweak,$tmp
2565         vcipher         $out4,$out4,v26
2566         vcipher         $out5,$out5,v26
2567
2568         add             $inp,$inp,r0            # $inp is adjusted in such
2569                                                 # way that at exit from the
2570                                                 # loop inX-in5 are loaded
2571                                                 # with last "words"
2572          vxor           $in2,$twk2,v31
2573          vsrab          $tmp,$tweak,$seven      # next tweak value
2574          vxor           $twk2,$tweak,$rndkey0
2575          vaddubm        $tweak,$tweak,$tweak
2576         vcipher         $out0,$out0,v27
2577         vcipher         $out1,$out1,v27
2578          vsldoi         $tmp,$tmp,$tmp,15
2579         vcipher         $out2,$out2,v27
2580         vcipher         $out3,$out3,v27
2581          vand           $tmp,$tmp,$eighty7
2582         vcipher         $out4,$out4,v27
2583         vcipher         $out5,$out5,v27
2584
2585         addi            $key_,$sp,$FRAME+15     # rewind $key_
2586          vxor           $tweak,$tweak,$tmp
2587         vcipher         $out0,$out0,v28
2588         vcipher         $out1,$out1,v28
2589          vxor           $in3,$twk3,v31
2590          vsrab          $tmp,$tweak,$seven      # next tweak value
2591          vxor           $twk3,$tweak,$rndkey0
2592         vcipher         $out2,$out2,v28
2593         vcipher         $out3,$out3,v28
2594          vaddubm        $tweak,$tweak,$tweak
2595          vsldoi         $tmp,$tmp,$tmp,15
2596         vcipher         $out4,$out4,v28
2597         vcipher         $out5,$out5,v28
2598         lvx             v24,$x00,$key_          # re-pre-load round[1]
2599          vand           $tmp,$tmp,$eighty7
2600
2601         vcipher         $out0,$out0,v29
2602         vcipher         $out1,$out1,v29
2603          vxor           $tweak,$tweak,$tmp
2604         vcipher         $out2,$out2,v29
2605         vcipher         $out3,$out3,v29
2606          vxor           $in4,$twk4,v31
2607          vsrab          $tmp,$tweak,$seven      # next tweak value
2608          vxor           $twk4,$tweak,$rndkey0
2609         vcipher         $out4,$out4,v29
2610         vcipher         $out5,$out5,v29
2611         lvx             v25,$x10,$key_          # re-pre-load round[2]
2612          vaddubm        $tweak,$tweak,$tweak
2613          vsldoi         $tmp,$tmp,$tmp,15
2614
2615         vcipher         $out0,$out0,v30
2616         vcipher         $out1,$out1,v30
2617          vand           $tmp,$tmp,$eighty7
2618         vcipher         $out2,$out2,v30
2619         vcipher         $out3,$out3,v30
2620          vxor           $tweak,$tweak,$tmp
2621         vcipher         $out4,$out4,v30
2622         vcipher         $out5,$out5,v30
2623          vxor           $in5,$twk5,v31
2624          vsrab          $tmp,$tweak,$seven      # next tweak value
2625          vxor           $twk5,$tweak,$rndkey0
2626
2627         vcipherlast     $out0,$out0,$in0
2628          lvx_u          $in0,$x00,$inp          # load next input block
2629          vaddubm        $tweak,$tweak,$tweak
2630          vsldoi         $tmp,$tmp,$tmp,15
2631         vcipherlast     $out1,$out1,$in1
2632          lvx_u          $in1,$x10,$inp
2633         vcipherlast     $out2,$out2,$in2
2634          le?vperm       $in0,$in0,$in0,$leperm
2635          lvx_u          $in2,$x20,$inp
2636          vand           $tmp,$tmp,$eighty7
2637         vcipherlast     $out3,$out3,$in3
2638          le?vperm       $in1,$in1,$in1,$leperm
2639          lvx_u          $in3,$x30,$inp
2640         vcipherlast     $out4,$out4,$in4
2641          le?vperm       $in2,$in2,$in2,$leperm
2642          lvx_u          $in4,$x40,$inp
2643          vxor           $tweak,$tweak,$tmp
2644         vcipherlast     $tmp,$out5,$in5         # last block might be needed
2645                                                 # in stealing mode
2646          le?vperm       $in3,$in3,$in3,$leperm
2647          lvx_u          $in5,$x50,$inp
2648          addi           $inp,$inp,0x60
2649          le?vperm       $in4,$in4,$in4,$leperm
2650          le?vperm       $in5,$in5,$in5,$leperm
2651
2652         le?vperm        $out0,$out0,$out0,$leperm
2653         le?vperm        $out1,$out1,$out1,$leperm
2654         stvx_u          $out0,$x00,$out         # store output
2655          vxor           $out0,$in0,$twk0
2656         le?vperm        $out2,$out2,$out2,$leperm
2657         stvx_u          $out1,$x10,$out
2658          vxor           $out1,$in1,$twk1
2659         le?vperm        $out3,$out3,$out3,$leperm
2660         stvx_u          $out2,$x20,$out
2661          vxor           $out2,$in2,$twk2
2662         le?vperm        $out4,$out4,$out4,$leperm
2663         stvx_u          $out3,$x30,$out
2664          vxor           $out3,$in3,$twk3
2665         le?vperm        $out5,$tmp,$tmp,$leperm
2666         stvx_u          $out4,$x40,$out
2667          vxor           $out4,$in4,$twk4
2668         le?stvx_u       $out5,$x50,$out
2669         be?stvx_u       $tmp, $x50,$out
2670          vxor           $out5,$in5,$twk5
2671         addi            $out,$out,0x60
2672
2673         mtctr           $rounds
2674         beq             Loop_xts_enc6x          # did $len-=96 borrow?
2675
2676         addic.          $len,$len,0x60
2677         beq             Lxts_enc6x_zero
2678         cmpwi           $len,0x20
2679         blt             Lxts_enc6x_one
2680         nop
2681         beq             Lxts_enc6x_two
2682         cmpwi           $len,0x40
2683         blt             Lxts_enc6x_three
2684         nop
2685         beq             Lxts_enc6x_four
2686
2687 Lxts_enc6x_five:
2688         vxor            $out0,$in1,$twk0
2689         vxor            $out1,$in2,$twk1
2690         vxor            $out2,$in3,$twk2
2691         vxor            $out3,$in4,$twk3
2692         vxor            $out4,$in5,$twk4
2693
2694         bl              _aesp8_xts_enc5x
2695
2696         le?vperm        $out0,$out0,$out0,$leperm
2697         vmr             $twk0,$twk5             # unused tweak
2698         le?vperm        $out1,$out1,$out1,$leperm
2699         stvx_u          $out0,$x00,$out         # store output
2700         le?vperm        $out2,$out2,$out2,$leperm
2701         stvx_u          $out1,$x10,$out
2702         le?vperm        $out3,$out3,$out3,$leperm
2703         stvx_u          $out2,$x20,$out
2704         vxor            $tmp,$out4,$twk5        # last block prep for stealing
2705         le?vperm        $out4,$out4,$out4,$leperm
2706         stvx_u          $out3,$x30,$out
2707         stvx_u          $out4,$x40,$out
2708         addi            $out,$out,0x50
2709         bne             Lxts_enc6x_steal
2710         b               Lxts_enc6x_done
2711
2712 .align  4
2713 Lxts_enc6x_four:
2714         vxor            $out0,$in2,$twk0
2715         vxor            $out1,$in3,$twk1
2716         vxor            $out2,$in4,$twk2
2717         vxor            $out3,$in5,$twk3
2718         vxor            $out4,$out4,$out4
2719
2720         bl              _aesp8_xts_enc5x
2721
2722         le?vperm        $out0,$out0,$out0,$leperm
2723         vmr             $twk0,$twk4             # unused tweak
2724         le?vperm        $out1,$out1,$out1,$leperm
2725         stvx_u          $out0,$x00,$out         # store output
2726         le?vperm        $out2,$out2,$out2,$leperm
2727         stvx_u          $out1,$x10,$out
2728         vxor            $tmp,$out3,$twk4        # last block prep for stealing
2729         le?vperm        $out3,$out3,$out3,$leperm
2730         stvx_u          $out2,$x20,$out
2731         stvx_u          $out3,$x30,$out
2732         addi            $out,$out,0x40
2733         bne             Lxts_enc6x_steal
2734         b               Lxts_enc6x_done
2735
2736 .align  4
2737 Lxts_enc6x_three:
2738         vxor            $out0,$in3,$twk0
2739         vxor            $out1,$in4,$twk1
2740         vxor            $out2,$in5,$twk2
2741         vxor            $out3,$out3,$out3
2742         vxor            $out4,$out4,$out4
2743
2744         bl              _aesp8_xts_enc5x
2745
2746         le?vperm        $out0,$out0,$out0,$leperm
2747         vmr             $twk0,$twk3             # unused tweak
2748         le?vperm        $out1,$out1,$out1,$leperm
2749         stvx_u          $out0,$x00,$out         # store output
2750         vxor            $tmp,$out2,$twk3        # last block prep for stealing
2751         le?vperm        $out2,$out2,$out2,$leperm
2752         stvx_u          $out1,$x10,$out
2753         stvx_u          $out2,$x20,$out
2754         addi            $out,$out,0x30
2755         bne             Lxts_enc6x_steal
2756         b               Lxts_enc6x_done
2757
2758 .align  4
2759 Lxts_enc6x_two:
2760         vxor            $out0,$in4,$twk0
2761         vxor            $out1,$in5,$twk1
2762         vxor            $out2,$out2,$out2
2763         vxor            $out3,$out3,$out3
2764         vxor            $out4,$out4,$out4
2765
2766         bl              _aesp8_xts_enc5x
2767
2768         le?vperm        $out0,$out0,$out0,$leperm
2769         vmr             $twk0,$twk2             # unused tweak
2770         vxor            $tmp,$out1,$twk2        # last block prep for stealing
2771         le?vperm        $out1,$out1,$out1,$leperm
2772         stvx_u          $out0,$x00,$out         # store output
2773         stvx_u          $out1,$x10,$out
2774         addi            $out,$out,0x20
2775         bne             Lxts_enc6x_steal
2776         b               Lxts_enc6x_done
2777
2778 .align  4
2779 Lxts_enc6x_one:
2780         vxor            $out0,$in5,$twk0
2781         nop
2782 Loop_xts_enc1x:
2783         vcipher         $out0,$out0,v24
2784         lvx             v24,$x20,$key_          # round[3]
2785         addi            $key_,$key_,0x20
2786
2787         vcipher         $out0,$out0,v25
2788         lvx             v25,$x10,$key_          # round[4]
2789         bdnz            Loop_xts_enc1x
2790
2791         add             $inp,$inp,$taillen
2792         cmpwi           $taillen,0
2793         vcipher         $out0,$out0,v24
2794
2795         subi            $inp,$inp,16
2796         vcipher         $out0,$out0,v25
2797
2798         lvsr            $inpperm,0,$taillen
2799         vcipher         $out0,$out0,v26
2800
2801         lvx_u           $in0,0,$inp
2802         vcipher         $out0,$out0,v27
2803
2804         addi            $key_,$sp,$FRAME+15     # rewind $key_
2805         vcipher         $out0,$out0,v28
2806         lvx             v24,$x00,$key_          # re-pre-load round[1]
2807
2808         vcipher         $out0,$out0,v29
2809         lvx             v25,$x10,$key_          # re-pre-load round[2]
2810          vxor           $twk0,$twk0,v31
2811
2812         le?vperm        $in0,$in0,$in0,$leperm
2813         vcipher         $out0,$out0,v30
2814
2815         vperm           $in0,$in0,$in0,$inpperm
2816         vcipherlast     $out0,$out0,$twk0
2817
2818         vmr             $twk0,$twk1             # unused tweak
2819         vxor            $tmp,$out0,$twk1        # last block prep for stealing
2820         le?vperm        $out0,$out0,$out0,$leperm
2821         stvx_u          $out0,$x00,$out         # store output
2822         addi            $out,$out,0x10
2823         bne             Lxts_enc6x_steal
2824         b               Lxts_enc6x_done
2825
2826 .align  4
2827 Lxts_enc6x_zero:
2828         cmpwi           $taillen,0
2829         beq             Lxts_enc6x_done
2830
2831         add             $inp,$inp,$taillen
2832         subi            $inp,$inp,16
2833         lvx_u           $in0,0,$inp
2834         lvsr            $inpperm,0,$taillen     # $in5 is no more
2835         le?vperm        $in0,$in0,$in0,$leperm
2836         vperm           $in0,$in0,$in0,$inpperm
2837         vxor            $tmp,$tmp,$twk0
2838 Lxts_enc6x_steal:
2839         vxor            $in0,$in0,$twk0
2840         vxor            $out0,$out0,$out0
2841         vspltisb        $out1,-1
2842         vperm           $out0,$out0,$out1,$inpperm
2843         vsel            $out0,$in0,$tmp,$out0   # $tmp is last block, remember?
2844
2845         subi            r3,$out,17
2846         subi            $out,$out,16
2847         mtctr           $taillen
2848 Loop_xts_enc6x_steal:
2849         lbzu            r0,1(r3)
2850         stb             r0,16(r3)
2851         bdnz            Loop_xts_enc6x_steal
2852
2853         li              $taillen,0
2854         mtctr           $rounds
2855         b               Loop_xts_enc1x          # one more time...
2856
2857 .align  4
2858 Lxts_enc6x_done:
2859         mtlr            r7
2860         li              r10,`$FRAME+15`
2861         li              r11,`$FRAME+31`
2862         stvx            $seven,r10,$sp          # wipe copies of round keys
2863         addi            r10,r10,32
2864         stvx            $seven,r11,$sp
2865         addi            r11,r11,32
2866         stvx            $seven,r10,$sp
2867         addi            r10,r10,32
2868         stvx            $seven,r11,$sp
2869         addi            r11,r11,32
2870         stvx            $seven,r10,$sp
2871         addi            r10,r10,32
2872         stvx            $seven,r11,$sp
2873         addi            r11,r11,32
2874         stvx            $seven,r10,$sp
2875         addi            r10,r10,32
2876         stvx            $seven,r11,$sp
2877         addi            r11,r11,32
2878
2879         mtspr           256,$vrsave
2880         lvx             v20,r10,$sp             # ABI says so
2881         addi            r10,r10,32
2882         lvx             v21,r11,$sp
2883         addi            r11,r11,32
2884         lvx             v22,r10,$sp
2885         addi            r10,r10,32
2886         lvx             v23,r11,$sp
2887         addi            r11,r11,32
2888         lvx             v24,r10,$sp
2889         addi            r10,r10,32
2890         lvx             v25,r11,$sp
2891         addi            r11,r11,32
2892         lvx             v26,r10,$sp
2893         addi            r10,r10,32
2894         lvx             v27,r11,$sp
2895         addi            r11,r11,32
2896         lvx             v28,r10,$sp
2897         addi            r10,r10,32
2898         lvx             v29,r11,$sp
2899         addi            r11,r11,32
2900         lvx             v30,r10,$sp
2901         lvx             v31,r11,$sp
2902         $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2903         $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2904         $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2905         $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2906         $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2907         $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2908         addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
2909         blr
2910         .long           0
2911         .byte           0,12,0x04,1,0x80,6,6,0
2912         .long           0
2913
2914 .align  5
2915 _aesp8_xts_enc5x:
2916         vcipher         $out0,$out0,v24
2917         vcipher         $out1,$out1,v24
2918         vcipher         $out2,$out2,v24
2919         vcipher         $out3,$out3,v24
2920         vcipher         $out4,$out4,v24
2921         lvx             v24,$x20,$key_          # round[3]
2922         addi            $key_,$key_,0x20
2923
2924         vcipher         $out0,$out0,v25
2925         vcipher         $out1,$out1,v25
2926         vcipher         $out2,$out2,v25
2927         vcipher         $out3,$out3,v25
2928         vcipher         $out4,$out4,v25
2929         lvx             v25,$x10,$key_          # round[4]
2930         bdnz            _aesp8_xts_enc5x
2931
2932         add             $inp,$inp,$taillen
2933         cmpwi           $taillen,0
2934         vcipher         $out0,$out0,v24
2935         vcipher         $out1,$out1,v24
2936         vcipher         $out2,$out2,v24
2937         vcipher         $out3,$out3,v24
2938         vcipher         $out4,$out4,v24
2939
2940         subi            $inp,$inp,16
2941         vcipher         $out0,$out0,v25
2942         vcipher         $out1,$out1,v25
2943         vcipher         $out2,$out2,v25
2944         vcipher         $out3,$out3,v25
2945         vcipher         $out4,$out4,v25
2946          vxor           $twk0,$twk0,v31
2947
2948         vcipher         $out0,$out0,v26
2949         lvsr            $inpperm,r0,$taillen    # $in5 is no more
2950         vcipher         $out1,$out1,v26
2951         vcipher         $out2,$out2,v26
2952         vcipher         $out3,$out3,v26
2953         vcipher         $out4,$out4,v26
2954          vxor           $in1,$twk1,v31
2955
2956         vcipher         $out0,$out0,v27
2957         lvx_u           $in0,0,$inp
2958         vcipher         $out1,$out1,v27
2959         vcipher         $out2,$out2,v27
2960         vcipher         $out3,$out3,v27
2961         vcipher         $out4,$out4,v27
2962          vxor           $in2,$twk2,v31
2963
2964         addi            $key_,$sp,$FRAME+15     # rewind $key_
2965         vcipher         $out0,$out0,v28
2966         vcipher         $out1,$out1,v28
2967         vcipher         $out2,$out2,v28
2968         vcipher         $out3,$out3,v28
2969         vcipher         $out4,$out4,v28
2970         lvx             v24,$x00,$key_          # re-pre-load round[1]
2971          vxor           $in3,$twk3,v31
2972
2973         vcipher         $out0,$out0,v29
2974         le?vperm        $in0,$in0,$in0,$leperm
2975         vcipher         $out1,$out1,v29
2976         vcipher         $out2,$out2,v29
2977         vcipher         $out3,$out3,v29
2978         vcipher         $out4,$out4,v29
2979         lvx             v25,$x10,$key_          # re-pre-load round[2]
2980          vxor           $in4,$twk4,v31
2981
2982         vcipher         $out0,$out0,v30
2983         vperm           $in0,$in0,$in0,$inpperm
2984         vcipher         $out1,$out1,v30
2985         vcipher         $out2,$out2,v30
2986         vcipher         $out3,$out3,v30
2987         vcipher         $out4,$out4,v30
2988
2989         vcipherlast     $out0,$out0,$twk0
2990         vcipherlast     $out1,$out1,$in1
2991         vcipherlast     $out2,$out2,$in2
2992         vcipherlast     $out3,$out3,$in3
2993         vcipherlast     $out4,$out4,$in4
2994         blr
2995         .long           0
2996         .byte           0,12,0x14,0,0,0,0,0
2997
2998 .align  5
2999 _aesp8_xts_decrypt6x:
3000         $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
3001         mflr            r0
3002         li              r7,`$FRAME+8*16+15`
3003         li              r8,`$FRAME+8*16+31`
3004         $PUSH           r0,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
3005         stvx            v20,r7,$sp              # ABI says so
3006         addi            r7,r7,32
3007         stvx            v21,r8,$sp
3008         addi            r8,r8,32
3009         stvx            v22,r7,$sp
3010         addi            r7,r7,32
3011         stvx            v23,r8,$sp
3012         addi            r8,r8,32
3013         stvx            v24,r7,$sp
3014         addi            r7,r7,32
3015         stvx            v25,r8,$sp
3016         addi            r8,r8,32
3017         stvx            v26,r7,$sp
3018         addi            r7,r7,32
3019         stvx            v27,r8,$sp
3020         addi            r8,r8,32
3021         stvx            v28,r7,$sp
3022         addi            r7,r7,32
3023         stvx            v29,r8,$sp
3024         addi            r8,r8,32
3025         stvx            v30,r7,$sp
3026         stvx            v31,r8,$sp
3027         mr              r7,r0
3028         li              r0,-1
3029         stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
3030         li              $x10,0x10
3031         $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3032         li              $x20,0x20
3033         $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3034         li              $x30,0x30
3035         $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3036         li              $x40,0x40
3037         $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3038         li              $x50,0x50
3039         $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3040         li              $x60,0x60
3041         $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3042         li              $x70,0x70
3043         mtspr           256,r0
3044
3045         subi            $rounds,$rounds,3       # -4 in total
3046
3047         lvx             $rndkey0,$x00,$key1     # load key schedule
3048         lvx             v30,$x10,$key1
3049         addi            $key1,$key1,0x20
3050         lvx             v31,$x00,$key1
3051         ?vperm          $rndkey0,$rndkey0,v30,$keyperm
3052         addi            $key_,$sp,$FRAME+15
3053         mtctr           $rounds
3054
3055 Load_xts_dec_key:
3056         ?vperm          v24,v30,v31,$keyperm
3057         lvx             v30,$x10,$key1
3058         addi            $key1,$key1,0x20
3059         stvx            v24,$x00,$key_          # off-load round[1]
3060         ?vperm          v25,v31,v30,$keyperm
3061         lvx             v31,$x00,$key1
3062         stvx            v25,$x10,$key_          # off-load round[2]
3063         addi            $key_,$key_,0x20
3064         bdnz            Load_xts_dec_key
3065
3066         lvx             v26,$x10,$key1
3067         ?vperm          v24,v30,v31,$keyperm
3068         lvx             v27,$x20,$key1
3069         stvx            v24,$x00,$key_          # off-load round[3]
3070         ?vperm          v25,v31,v26,$keyperm
3071         lvx             v28,$x30,$key1
3072         stvx            v25,$x10,$key_          # off-load round[4]
3073         addi            $key_,$sp,$FRAME+15     # rewind $key_
3074         ?vperm          v26,v26,v27,$keyperm
3075         lvx             v29,$x40,$key1
3076         ?vperm          v27,v27,v28,$keyperm
3077         lvx             v30,$x50,$key1
3078         ?vperm          v28,v28,v29,$keyperm
3079         lvx             v31,$x60,$key1
3080         ?vperm          v29,v29,v30,$keyperm
3081         lvx             $twk5,$x70,$key1        # borrow $twk5
3082         ?vperm          v30,v30,v31,$keyperm
3083         lvx             v24,$x00,$key_          # pre-load round[1]
3084         ?vperm          v31,v31,$twk5,$keyperm
3085         lvx             v25,$x10,$key_          # pre-load round[2]
3086
3087          vperm          $in0,$inout,$inptail,$inpperm
3088          subi           $inp,$inp,31            # undo "caller"
3089         vxor            $twk0,$tweak,$rndkey0
3090         vsrab           $tmp,$tweak,$seven      # next tweak value
3091         vaddubm         $tweak,$tweak,$tweak
3092         vsldoi          $tmp,$tmp,$tmp,15
3093         vand            $tmp,$tmp,$eighty7
3094          vxor           $out0,$in0,$twk0
3095         vxor            $tweak,$tweak,$tmp
3096
3097          lvx_u          $in1,$x10,$inp
3098         vxor            $twk1,$tweak,$rndkey0
3099         vsrab           $tmp,$tweak,$seven      # next tweak value
3100         vaddubm         $tweak,$tweak,$tweak
3101         vsldoi          $tmp,$tmp,$tmp,15
3102          le?vperm       $in1,$in1,$in1,$leperm
3103         vand            $tmp,$tmp,$eighty7
3104          vxor           $out1,$in1,$twk1
3105         vxor            $tweak,$tweak,$tmp
3106
3107          lvx_u          $in2,$x20,$inp
3108          andi.          $taillen,$len,15
3109         vxor            $twk2,$tweak,$rndkey0
3110         vsrab           $tmp,$tweak,$seven      # next tweak value
3111         vaddubm         $tweak,$tweak,$tweak
3112         vsldoi          $tmp,$tmp,$tmp,15
3113          le?vperm       $in2,$in2,$in2,$leperm
3114         vand            $tmp,$tmp,$eighty7
3115          vxor           $out2,$in2,$twk2
3116         vxor            $tweak,$tweak,$tmp
3117
3118          lvx_u          $in3,$x30,$inp
3119          sub            $len,$len,$taillen
3120         vxor            $twk3,$tweak,$rndkey0
3121         vsrab           $tmp,$tweak,$seven      # next tweak value
3122         vaddubm         $tweak,$tweak,$tweak
3123         vsldoi          $tmp,$tmp,$tmp,15
3124          le?vperm       $in3,$in3,$in3,$leperm
3125         vand            $tmp,$tmp,$eighty7
3126          vxor           $out3,$in3,$twk3
3127         vxor            $tweak,$tweak,$tmp
3128
3129          lvx_u          $in4,$x40,$inp
3130          subi           $len,$len,0x60
3131         vxor            $twk4,$tweak,$rndkey0
3132         vsrab           $tmp,$tweak,$seven      # next tweak value
3133         vaddubm         $tweak,$tweak,$tweak
3134         vsldoi          $tmp,$tmp,$tmp,15
3135          le?vperm       $in4,$in4,$in4,$leperm
3136         vand            $tmp,$tmp,$eighty7
3137          vxor           $out4,$in4,$twk4
3138         vxor            $tweak,$tweak,$tmp
3139
3140          lvx_u          $in5,$x50,$inp
3141          addi           $inp,$inp,0x60
3142         vxor            $twk5,$tweak,$rndkey0
3143         vsrab           $tmp,$tweak,$seven      # next tweak value
3144         vaddubm         $tweak,$tweak,$tweak
3145         vsldoi          $tmp,$tmp,$tmp,15
3146          le?vperm       $in5,$in5,$in5,$leperm
3147         vand            $tmp,$tmp,$eighty7
3148          vxor           $out5,$in5,$twk5
3149         vxor            $tweak,$tweak,$tmp
3150
3151         vxor            v31,v31,$rndkey0
3152         mtctr           $rounds
3153         b               Loop_xts_dec6x
3154
3155 .align  5
3156 Loop_xts_dec6x:
3157         vncipher        $out0,$out0,v24
3158         vncipher        $out1,$out1,v24
3159         vncipher        $out2,$out2,v24
3160         vncipher        $out3,$out3,v24
3161         vncipher        $out4,$out4,v24
3162         vncipher        $out5,$out5,v24
3163         lvx             v24,$x20,$key_          # round[3]
3164         addi            $key_,$key_,0x20
3165
3166         vncipher        $out0,$out0,v25
3167         vncipher        $out1,$out1,v25
3168         vncipher        $out2,$out2,v25
3169         vncipher        $out3,$out3,v25
3170         vncipher        $out4,$out4,v25
3171         vncipher        $out5,$out5,v25
3172         lvx             v25,$x10,$key_          # round[4]
3173         bdnz            Loop_xts_dec6x
3174
3175         subic           $len,$len,96            # $len-=96
3176          vxor           $in0,$twk0,v31          # xor with last round key
3177         vncipher        $out0,$out0,v24
3178         vncipher        $out1,$out1,v24
3179          vsrab          $tmp,$tweak,$seven      # next tweak value
3180          vxor           $twk0,$tweak,$rndkey0
3181          vaddubm        $tweak,$tweak,$tweak
3182         vncipher        $out2,$out2,v24
3183         vncipher        $out3,$out3,v24
3184          vsldoi         $tmp,$tmp,$tmp,15
3185         vncipher        $out4,$out4,v24
3186         vncipher        $out5,$out5,v24
3187
3188         subfe.          r0,r0,r0                # borrow?-1:0
3189          vand           $tmp,$tmp,$eighty7
3190         vncipher        $out0,$out0,v25
3191         vncipher        $out1,$out1,v25
3192          vxor           $tweak,$tweak,$tmp
3193         vncipher        $out2,$out2,v25
3194         vncipher        $out3,$out3,v25
3195          vxor           $in1,$twk1,v31
3196          vsrab          $tmp,$tweak,$seven      # next tweak value
3197          vxor           $twk1,$tweak,$rndkey0
3198         vncipher        $out4,$out4,v25
3199         vncipher        $out5,$out5,v25
3200
3201         and             r0,r0,$len
3202          vaddubm        $tweak,$tweak,$tweak
3203          vsldoi         $tmp,$tmp,$tmp,15
3204         vncipher        $out0,$out0,v26
3205         vncipher        $out1,$out1,v26
3206          vand           $tmp,$tmp,$eighty7
3207         vncipher        $out2,$out2,v26
3208         vncipher        $out3,$out3,v26
3209          vxor           $tweak,$tweak,$tmp
3210         vncipher        $out4,$out4,v26
3211         vncipher        $out5,$out5,v26
3212
3213         add             $inp,$inp,r0            # $inp is adjusted in such
3214                                                 # way that at exit from the
3215                                                 # loop inX-in5 are loaded
3216                                                 # with last "words"
3217          vxor           $in2,$twk2,v31
3218          vsrab          $tmp,$tweak,$seven      # next tweak value
3219          vxor           $twk2,$tweak,$rndkey0
3220          vaddubm        $tweak,$tweak,$tweak
3221         vncipher        $out0,$out0,v27
3222         vncipher        $out1,$out1,v27
3223          vsldoi         $tmp,$tmp,$tmp,15
3224         vncipher        $out2,$out2,v27
3225         vncipher        $out3,$out3,v27
3226          vand           $tmp,$tmp,$eighty7
3227         vncipher        $out4,$out4,v27
3228         vncipher        $out5,$out5,v27
3229
3230         addi            $key_,$sp,$FRAME+15     # rewind $key_
3231          vxor           $tweak,$tweak,$tmp
3232         vncipher        $out0,$out0,v28
3233         vncipher        $out1,$out1,v28
3234          vxor           $in3,$twk3,v31
3235          vsrab          $tmp,$tweak,$seven      # next tweak value
3236          vxor           $twk3,$tweak,$rndkey0
3237         vncipher        $out2,$out2,v28
3238         vncipher        $out3,$out3,v28
3239          vaddubm        $tweak,$tweak,$tweak
3240          vsldoi         $tmp,$tmp,$tmp,15
3241         vncipher        $out4,$out4,v28
3242         vncipher        $out5,$out5,v28
3243         lvx             v24,$x00,$key_          # re-pre-load round[1]
3244          vand           $tmp,$tmp,$eighty7
3245
3246         vncipher        $out0,$out0,v29
3247         vncipher        $out1,$out1,v29
3248          vxor           $tweak,$tweak,$tmp
3249         vncipher        $out2,$out2,v29
3250         vncipher        $out3,$out3,v29
3251          vxor           $in4,$twk4,v31
3252          vsrab          $tmp,$tweak,$seven      # next tweak value
3253          vxor           $twk4,$tweak,$rndkey0
3254         vncipher        $out4,$out4,v29
3255         vncipher        $out5,$out5,v29
3256         lvx             v25,$x10,$key_          # re-pre-load round[2]
3257          vaddubm        $tweak,$tweak,$tweak
3258          vsldoi         $tmp,$tmp,$tmp,15
3259
3260         vncipher        $out0,$out0,v30
3261         vncipher        $out1,$out1,v30
3262          vand           $tmp,$tmp,$eighty7
3263         vncipher        $out2,$out2,v30
3264         vncipher        $out3,$out3,v30
3265          vxor           $tweak,$tweak,$tmp
3266         vncipher        $out4,$out4,v30
3267         vncipher        $out5,$out5,v30
3268          vxor           $in5,$twk5,v31
3269          vsrab          $tmp,$tweak,$seven      # next tweak value
3270          vxor           $twk5,$tweak,$rndkey0
3271
3272         vncipherlast    $out0,$out0,$in0
3273          lvx_u          $in0,$x00,$inp          # load next input block
3274          vaddubm        $tweak,$tweak,$tweak
3275          vsldoi         $tmp,$tmp,$tmp,15
3276         vncipherlast    $out1,$out1,$in1
3277          lvx_u          $in1,$x10,$inp
3278         vncipherlast    $out2,$out2,$in2
3279          le?vperm       $in0,$in0,$in0,$leperm
3280          lvx_u          $in2,$x20,$inp
3281          vand           $tmp,$tmp,$eighty7
3282         vncipherlast    $out3,$out3,$in3
3283          le?vperm       $in1,$in1,$in1,$leperm
3284          lvx_u          $in3,$x30,$inp
3285         vncipherlast    $out4,$out4,$in4
3286          le?vperm       $in2,$in2,$in2,$leperm
3287          lvx_u          $in4,$x40,$inp
3288          vxor           $tweak,$tweak,$tmp
3289         vncipherlast    $out5,$out5,$in5
3290          le?vperm       $in3,$in3,$in3,$leperm
3291          lvx_u          $in5,$x50,$inp
3292          addi           $inp,$inp,0x60
3293          le?vperm       $in4,$in4,$in4,$leperm
3294          le?vperm       $in5,$in5,$in5,$leperm
3295
3296         le?vperm        $out0,$out0,$out0,$leperm
3297         le?vperm        $out1,$out1,$out1,$leperm
3298         stvx_u          $out0,$x00,$out         # store output
3299          vxor           $out0,$in0,$twk0
3300         le?vperm        $out2,$out2,$out2,$leperm
3301         stvx_u          $out1,$x10,$out
3302          vxor           $out1,$in1,$twk1
3303         le?vperm        $out3,$out3,$out3,$leperm
3304         stvx_u          $out2,$x20,$out
3305          vxor           $out2,$in2,$twk2
3306         le?vperm        $out4,$out4,$out4,$leperm
3307         stvx_u          $out3,$x30,$out
3308          vxor           $out3,$in3,$twk3
3309         le?vperm        $out5,$out5,$out5,$leperm
3310         stvx_u          $out4,$x40,$out
3311          vxor           $out4,$in4,$twk4
3312         stvx_u          $out5,$x50,$out
3313          vxor           $out5,$in5,$twk5
3314         addi            $out,$out,0x60
3315
3316         mtctr           $rounds
3317         beq             Loop_xts_dec6x          # did $len-=96 borrow?
3318
3319         addic.          $len,$len,0x60
3320         beq             Lxts_dec6x_zero
3321         cmpwi           $len,0x20
3322         blt             Lxts_dec6x_one
3323         nop
3324         beq             Lxts_dec6x_two
3325         cmpwi           $len,0x40
3326         blt             Lxts_dec6x_three
3327         nop
3328         beq             Lxts_dec6x_four
3329
3330 Lxts_dec6x_five:
3331         vxor            $out0,$in1,$twk0
3332         vxor            $out1,$in2,$twk1
3333         vxor            $out2,$in3,$twk2
3334         vxor            $out3,$in4,$twk3
3335         vxor            $out4,$in5,$twk4
3336
3337         bl              _aesp8_xts_dec5x
3338
3339         le?vperm        $out0,$out0,$out0,$leperm
3340         vmr             $twk0,$twk5             # unused tweak
3341         vxor            $twk1,$tweak,$rndkey0
3342         le?vperm        $out1,$out1,$out1,$leperm
3343         stvx_u          $out0,$x00,$out         # store output
3344         vxor            $out0,$in0,$twk1
3345         le?vperm        $out2,$out2,$out2,$leperm
3346         stvx_u          $out1,$x10,$out
3347         le?vperm        $out3,$out3,$out3,$leperm
3348         stvx_u          $out2,$x20,$out
3349         le?vperm        $out4,$out4,$out4,$leperm
3350         stvx_u          $out3,$x30,$out
3351         stvx_u          $out4,$x40,$out
3352         addi            $out,$out,0x50
3353         bne             Lxts_dec6x_steal
3354         b               Lxts_dec6x_done
3355
3356 .align  4
3357 Lxts_dec6x_four:
3358         vxor            $out0,$in2,$twk0
3359         vxor            $out1,$in3,$twk1
3360         vxor            $out2,$in4,$twk2
3361         vxor            $out3,$in5,$twk3
3362         vxor            $out4,$out4,$out4
3363
3364         bl              _aesp8_xts_dec5x
3365
3366         le?vperm        $out0,$out0,$out0,$leperm
3367         vmr             $twk0,$twk4             # unused tweak
3368         vmr             $twk1,$twk5
3369         le?vperm        $out1,$out1,$out1,$leperm
3370         stvx_u          $out0,$x00,$out         # store output
3371         vxor            $out0,$in0,$twk5
3372         le?vperm        $out2,$out2,$out2,$leperm
3373         stvx_u          $out1,$x10,$out
3374         le?vperm        $out3,$out3,$out3,$leperm
3375         stvx_u          $out2,$x20,$out
3376         stvx_u          $out3,$x30,$out
3377         addi            $out,$out,0x40
3378         bne             Lxts_dec6x_steal
3379         b               Lxts_dec6x_done
3380
3381 .align  4
3382 Lxts_dec6x_three:
3383         vxor            $out0,$in3,$twk0
3384         vxor            $out1,$in4,$twk1
3385         vxor            $out2,$in5,$twk2
3386         vxor            $out3,$out3,$out3
3387         vxor            $out4,$out4,$out4
3388
3389         bl              _aesp8_xts_dec5x
3390
3391         le?vperm        $out0,$out0,$out0,$leperm
3392         vmr             $twk0,$twk3             # unused tweak
3393         vmr             $twk1,$twk4
3394         le?vperm        $out1,$out1,$out1,$leperm
3395         stvx_u          $out0,$x00,$out         # store output
3396         vxor            $out0,$in0,$twk4
3397         le?vperm        $out2,$out2,$out2,$leperm
3398         stvx_u          $out1,$x10,$out
3399         stvx_u          $out2,$x20,$out
3400         addi            $out,$out,0x30
3401         bne             Lxts_dec6x_steal
3402         b               Lxts_dec6x_done
3403
3404 .align  4
3405 Lxts_dec6x_two:
3406         vxor            $out0,$in4,$twk0
3407         vxor            $out1,$in5,$twk1
3408         vxor            $out2,$out2,$out2
3409         vxor            $out3,$out3,$out3
3410         vxor            $out4,$out4,$out4
3411
3412         bl              _aesp8_xts_dec5x
3413
3414         le?vperm        $out0,$out0,$out0,$leperm
3415         vmr             $twk0,$twk2             # unused tweak
3416         vmr             $twk1,$twk3
3417         le?vperm        $out1,$out1,$out1,$leperm
3418         stvx_u          $out0,$x00,$out         # store output
3419         vxor            $out0,$in0,$twk3
3420         stvx_u          $out1,$x10,$out
3421         addi            $out,$out,0x20
3422         bne             Lxts_dec6x_steal
3423         b               Lxts_dec6x_done
3424
3425 .align  4
3426 Lxts_dec6x_one:
3427         vxor            $out0,$in5,$twk0
3428         nop
3429 Loop_xts_dec1x:
3430         vncipher        $out0,$out0,v24
3431         lvx             v24,$x20,$key_          # round[3]
3432         addi            $key_,$key_,0x20
3433
3434         vncipher        $out0,$out0,v25
3435         lvx             v25,$x10,$key_          # round[4]
3436         bdnz            Loop_xts_dec1x
3437
3438         subi            r0,$taillen,1
3439         vncipher        $out0,$out0,v24
3440
3441         andi.           r0,r0,16
3442         cmpwi           $taillen,0
3443         vncipher        $out0,$out0,v25
3444
3445         sub             $inp,$inp,r0
3446         vncipher        $out0,$out0,v26
3447
3448         lvx_u           $in0,0,$inp
3449         vncipher        $out0,$out0,v27
3450
3451         addi            $key_,$sp,$FRAME+15     # rewind $key_
3452         vncipher        $out0,$out0,v28
3453         lvx             v24,$x00,$key_          # re-pre-load round[1]
3454
3455         vncipher        $out0,$out0,v29
3456         lvx             v25,$x10,$key_          # re-pre-load round[2]
3457          vxor           $twk0,$twk0,v31
3458
3459         le?vperm        $in0,$in0,$in0,$leperm
3460         vncipher        $out0,$out0,v30
3461
3462         mtctr           $rounds
3463         vncipherlast    $out0,$out0,$twk0
3464
3465         vmr             $twk0,$twk1             # unused tweak
3466         vmr             $twk1,$twk2
3467         le?vperm        $out0,$out0,$out0,$leperm
3468         stvx_u          $out0,$x00,$out         # store output
3469         addi            $out,$out,0x10
3470         vxor            $out0,$in0,$twk2
3471         bne             Lxts_dec6x_steal
3472         b               Lxts_dec6x_done
3473
3474 .align  4
3475 Lxts_dec6x_zero:
3476         cmpwi           $taillen,0
3477         beq             Lxts_dec6x_done
3478
3479         lvx_u           $in0,0,$inp
3480         le?vperm        $in0,$in0,$in0,$leperm
3481         vxor            $out0,$in0,$twk1
3482 Lxts_dec6x_steal:
3483         vncipher        $out0,$out0,v24
3484         lvx             v24,$x20,$key_          # round[3]
3485         addi            $key_,$key_,0x20
3486
3487         vncipher        $out0,$out0,v25
3488         lvx             v25,$x10,$key_          # round[4]
3489         bdnz            Lxts_dec6x_steal
3490
3491         add             $inp,$inp,$taillen
3492         vncipher        $out0,$out0,v24
3493
3494         cmpwi           $taillen,0
3495         vncipher        $out0,$out0,v25
3496
3497         lvx_u           $in0,0,$inp
3498         vncipher        $out0,$out0,v26
3499
3500         lvsr            $inpperm,0,$taillen     # $in5 is no more
3501         vncipher        $out0,$out0,v27
3502
3503         addi            $key_,$sp,$FRAME+15     # rewind $key_
3504         vncipher        $out0,$out0,v28
3505         lvx             v24,$x00,$key_          # re-pre-load round[1]
3506
3507         vncipher        $out0,$out0,v29
3508         lvx             v25,$x10,$key_          # re-pre-load round[2]
3509          vxor           $twk1,$twk1,v31
3510
3511         le?vperm        $in0,$in0,$in0,$leperm
3512         vncipher        $out0,$out0,v30
3513
3514         vperm           $in0,$in0,$in0,$inpperm
3515         vncipherlast    $tmp,$out0,$twk1
3516
3517         le?vperm        $out0,$tmp,$tmp,$leperm
3518         le?stvx_u       $out0,0,$out
3519         be?stvx_u       $tmp,0,$out
3520
3521         vxor            $out0,$out0,$out0
3522         vspltisb        $out1,-1
3523         vperm           $out0,$out0,$out1,$inpperm
3524         vsel            $out0,$in0,$tmp,$out0
3525         vxor            $out0,$out0,$twk0
3526
3527         subi            r3,$out,1
3528         mtctr           $taillen
3529 Loop_xts_dec6x_steal:
3530         lbzu            r0,1(r3)
3531         stb             r0,16(r3)
3532         bdnz            Loop_xts_dec6x_steal
3533
3534         li              $taillen,0
3535         mtctr           $rounds
3536         b               Loop_xts_dec1x          # one more time...
3537
3538 .align  4
3539 Lxts_dec6x_done:
3540         mtlr            r7
3541         li              r10,`$FRAME+15`
3542         li              r11,`$FRAME+31`
3543         stvx            $seven,r10,$sp          # wipe copies of round keys
3544         addi            r10,r10,32
3545         stvx            $seven,r11,$sp
3546         addi            r11,r11,32
3547         stvx            $seven,r10,$sp
3548         addi            r10,r10,32
3549         stvx            $seven,r11,$sp
3550         addi            r11,r11,32
3551         stvx            $seven,r10,$sp
3552         addi            r10,r10,32
3553         stvx            $seven,r11,$sp
3554         addi            r11,r11,32
3555         stvx            $seven,r10,$sp
3556         addi            r10,r10,32
3557         stvx            $seven,r11,$sp
3558         addi            r11,r11,32
3559
3560         mtspr           256,$vrsave
3561         lvx             v20,r10,$sp             # ABI says so
3562         addi            r10,r10,32
3563         lvx             v21,r11,$sp
3564         addi            r11,r11,32
3565         lvx             v22,r10,$sp
3566         addi            r10,r10,32
3567         lvx             v23,r11,$sp
3568         addi            r11,r11,32
3569         lvx             v24,r10,$sp
3570         addi            r10,r10,32
3571         lvx             v25,r11,$sp
3572         addi            r11,r11,32
3573         lvx             v26,r10,$sp
3574         addi            r10,r10,32
3575         lvx             v27,r11,$sp
3576         addi            r11,r11,32
3577         lvx             v28,r10,$sp
3578         addi            r10,r10,32
3579         lvx             v29,r11,$sp
3580         addi            r11,r11,32
3581         lvx             v30,r10,$sp
3582         lvx             v31,r11,$sp
3583         $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3584         $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3585         $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3586         $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3587         $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3588         $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3589         addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3590         blr
3591         .long           0
3592         .byte           0,12,0x04,1,0x80,6,6,0
3593         .long           0
3594
3595 .align  5
3596 _aesp8_xts_dec5x:
3597         vncipher        $out0,$out0,v24
3598         vncipher        $out1,$out1,v24
3599         vncipher        $out2,$out2,v24
3600         vncipher        $out3,$out3,v24
3601         vncipher        $out4,$out4,v24
3602         lvx             v24,$x20,$key_          # round[3]
3603         addi            $key_,$key_,0x20
3604
3605         vncipher        $out0,$out0,v25
3606         vncipher        $out1,$out1,v25
3607         vncipher        $out2,$out2,v25
3608         vncipher        $out3,$out3,v25
3609         vncipher        $out4,$out4,v25
3610         lvx             v25,$x10,$key_          # round[4]
3611         bdnz            _aesp8_xts_dec5x
3612
3613         subi            r0,$taillen,1
3614         vncipher        $out0,$out0,v24
3615         vncipher        $out1,$out1,v24
3616         vncipher        $out2,$out2,v24
3617         vncipher        $out3,$out3,v24
3618         vncipher        $out4,$out4,v24
3619
3620         andi.           r0,r0,16
3621         cmpwi           $taillen,0
3622         vncipher        $out0,$out0,v25
3623         vncipher        $out1,$out1,v25
3624         vncipher        $out2,$out2,v25
3625         vncipher        $out3,$out3,v25
3626         vncipher        $out4,$out4,v25
3627          vxor           $twk0,$twk0,v31
3628
3629         sub             $inp,$inp,r0
3630         vncipher        $out0,$out0,v26
3631         vncipher        $out1,$out1,v26
3632         vncipher        $out2,$out2,v26
3633         vncipher        $out3,$out3,v26
3634         vncipher        $out4,$out4,v26
3635          vxor           $in1,$twk1,v31
3636
3637         vncipher        $out0,$out0,v27
3638         lvx_u           $in0,0,$inp
3639         vncipher        $out1,$out1,v27
3640         vncipher        $out2,$out2,v27
3641         vncipher        $out3,$out3,v27
3642         vncipher        $out4,$out4,v27
3643          vxor           $in2,$twk2,v31
3644
3645         addi            $key_,$sp,$FRAME+15     # rewind $key_
3646         vncipher        $out0,$out0,v28
3647         vncipher        $out1,$out1,v28
3648         vncipher        $out2,$out2,v28
3649         vncipher        $out3,$out3,v28
3650         vncipher        $out4,$out4,v28
3651         lvx             v24,$x00,$key_          # re-pre-load round[1]
3652          vxor           $in3,$twk3,v31
3653
3654         vncipher        $out0,$out0,v29
3655         le?vperm        $in0,$in0,$in0,$leperm
3656         vncipher        $out1,$out1,v29
3657         vncipher        $out2,$out2,v29
3658         vncipher        $out3,$out3,v29
3659         vncipher        $out4,$out4,v29
3660         lvx             v25,$x10,$key_          # re-pre-load round[2]
3661          vxor           $in4,$twk4,v31
3662
3663         vncipher        $out0,$out0,v30
3664         vncipher        $out1,$out1,v30
3665         vncipher        $out2,$out2,v30
3666         vncipher        $out3,$out3,v30
3667         vncipher        $out4,$out4,v30
3668
3669         vncipherlast    $out0,$out0,$twk0
3670         vncipherlast    $out1,$out1,$in1
3671         vncipherlast    $out2,$out2,$in2
3672         vncipherlast    $out3,$out3,$in3
3673         vncipherlast    $out4,$out4,$in4
3674         mtctr           $rounds
3675         blr
3676         .long           0
3677         .byte           0,12,0x14,0,0,0,0,0
3678 ___
3679 }}      }}}
3680
3681 my $consts=1;
3682 foreach(split("\n",$code)) {
3683         s/\`([^\`]*)\`/eval($1)/geo;
3684
3685         # constants table endian-specific conversion
3686         if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
3687             my $conv=$3;
3688             my @bytes=();
3689
3690             # convert to endian-agnostic format
3691             if ($1 eq "long") {
3692               foreach (split(/,\s*/,$2)) {
3693                 my $l = /^0/?oct:int;
3694                 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
3695               }
3696             } else {
3697                 @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
3698             }
3699
3700             # little-endian conversion
3701             if ($flavour =~ /le$/o) {
3702                 SWITCH: for($conv)  {
3703                     /\?inv/ && do   { @bytes=map($_^0xf,@bytes); last; };
3704                     /\?rev/ && do   { @bytes=reverse(@bytes);    last; }; 
3705                 }
3706             }
3707
3708             #emit
3709             print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
3710             next;
3711         }
3712         $consts=0 if (m/Lconsts:/o);    # end of table
3713
3714         # instructions prefixed with '?' are endian-specific and need
3715         # to be adjusted accordingly...
3716         if ($flavour =~ /le$/o) {       # little-endian
3717             s/le\?//o           or
3718             s/be\?/#be#/o       or
3719             s/\?lvsr/lvsl/o     or
3720             s/\?lvsl/lvsr/o     or
3721             s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
3722             s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
3723             s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
3724         } else {                        # big-endian
3725             s/le\?/#le#/o       or
3726             s/be\?//o           or
3727             s/\?([a-z]+)/$1/o;
3728         }
3729
3730         print $_,"\n";
3731 }
3732
3733 close STDOUT;