Add OpenSSL copyright to .pl files
[openssl.git] / crypto / aes / asm / aesp8-ppc.pl
1 #! /usr/bin/env perl
2 # Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
3 #
4 # Licensed under the OpenSSL license (the "License").  You may not use
5 # this file except in compliance with the License.  You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
8
9 #
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
16 #
17 # This module implements support for AES instructions as per PowerISA
18 # specification version 2.07, first implemented by POWER8 processor.
19 # The module is endian-agnostic in sense that it supports both big-
20 # and little-endian cases. Data alignment in parallelizable modes is
21 # handled with VSX loads and stores, which implies MSR.VSX flag being
22 # set. It should also be noted that ISA specification doesn't prohibit
23 # alignment exceptions for these instructions on page boundaries.
24 # Initially alignment was handled in pure AltiVec/VMX way [when data
25 # is aligned programmatically, which in turn guarantees exception-
26 # free execution], but it turned to hamper performance when vcipher
27 # instructions are interleaved. It's reckoned that eventual
28 # misalignment penalties at page boundaries are in average lower
29 # than additional overhead in pure AltiVec approach.
30
31 $flavour = shift;
32
33 if ($flavour =~ /64/) {
34         $SIZE_T =8;
35         $LRSAVE =2*$SIZE_T;
36         $STU    ="stdu";
37         $POP    ="ld";
38         $PUSH   ="std";
39         $UCMP   ="cmpld";
40         $SHL    ="sldi";
41 } elsif ($flavour =~ /32/) {
42         $SIZE_T =4;
43         $LRSAVE =$SIZE_T;
44         $STU    ="stwu";
45         $POP    ="lwz";
46         $PUSH   ="stw";
47         $UCMP   ="cmplw";
48         $SHL    ="slwi";
49 } else { die "nonsense $flavour"; }
50
51 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
52
53 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
54 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
55 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
56 die "can't locate ppc-xlate.pl";
57
58 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
59
60 $FRAME=8*$SIZE_T;
61 $prefix="aes_p8";
62
63 $sp="r1";
64 $vrsave="r12";
65
66 #########################################################################
67 {{{     # Key setup procedures                                          #
68 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
69 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
70 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
71
72 $code.=<<___;
73 .machine        "any"
74
75 .text
76
77 .align  7
78 rcon:
79 .long   0x01000000, 0x01000000, 0x01000000, 0x01000000  ?rev
80 .long   0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000  ?rev
81 .long   0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c  ?rev
82 .long   0,0,0,0                                         ?asis
83 Lconsts:
84         mflr    r0
85         bcl     20,31,\$+4
86         mflr    $ptr     #vvvvv "distance between . and rcon
87         addi    $ptr,$ptr,-0x48
88         mtlr    r0
89         blr
90         .long   0
91         .byte   0,12,0x14,0,0,0,0,0
92 .asciz  "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
93
94 .globl  .${prefix}_set_encrypt_key
95 .align  5
96 .${prefix}_set_encrypt_key:
97 Lset_encrypt_key:
98         mflr            r11
99         $PUSH           r11,$LRSAVE($sp)
100
101         li              $ptr,-1
102         ${UCMP}i        $inp,0
103         beq-            Lenc_key_abort          # if ($inp==0) return -1;
104         ${UCMP}i        $out,0
105         beq-            Lenc_key_abort          # if ($out==0) return -1;
106         li              $ptr,-2
107         cmpwi           $bits,128
108         blt-            Lenc_key_abort
109         cmpwi           $bits,256
110         bgt-            Lenc_key_abort
111         andi.           r0,$bits,0x3f
112         bne-            Lenc_key_abort
113
114         lis             r0,0xfff0
115         mfspr           $vrsave,256
116         mtspr           256,r0
117
118         bl              Lconsts
119         mtlr            r11
120
121         neg             r9,$inp
122         lvx             $in0,0,$inp
123         addi            $inp,$inp,15            # 15 is not typo
124         lvsr            $key,0,r9               # borrow $key
125         li              r8,0x20
126         cmpwi           $bits,192
127         lvx             $in1,0,$inp
128         le?vspltisb     $mask,0x0f              # borrow $mask
129         lvx             $rcon,0,$ptr
130         le?vxor         $key,$key,$mask         # adjust for byte swap
131         lvx             $mask,r8,$ptr
132         addi            $ptr,$ptr,0x10
133         vperm           $in0,$in0,$in1,$key     # align [and byte swap in LE]
134         li              $cnt,8
135         vxor            $zero,$zero,$zero
136         mtctr           $cnt
137
138         ?lvsr           $outperm,0,$out
139         vspltisb        $outmask,-1
140         lvx             $outhead,0,$out
141         ?vperm          $outmask,$zero,$outmask,$outperm
142
143         blt             Loop128
144         addi            $inp,$inp,8
145         beq             L192
146         addi            $inp,$inp,8
147         b               L256
148
149 .align  4
150 Loop128:
151         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
152         vsldoi          $tmp,$zero,$in0,12      # >>32
153          vperm          $outtail,$in0,$in0,$outperm     # rotate
154          vsel           $stage,$outhead,$outtail,$outmask
155          vmr            $outhead,$outtail
156         vcipherlast     $key,$key,$rcon
157          stvx           $stage,0,$out
158          addi           $out,$out,16
159
160         vxor            $in0,$in0,$tmp
161         vsldoi          $tmp,$zero,$tmp,12      # >>32
162         vxor            $in0,$in0,$tmp
163         vsldoi          $tmp,$zero,$tmp,12      # >>32
164         vxor            $in0,$in0,$tmp
165          vadduwm        $rcon,$rcon,$rcon
166         vxor            $in0,$in0,$key
167         bdnz            Loop128
168
169         lvx             $rcon,0,$ptr            # last two round keys
170
171         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
172         vsldoi          $tmp,$zero,$in0,12      # >>32
173          vperm          $outtail,$in0,$in0,$outperm     # rotate
174          vsel           $stage,$outhead,$outtail,$outmask
175          vmr            $outhead,$outtail
176         vcipherlast     $key,$key,$rcon
177          stvx           $stage,0,$out
178          addi           $out,$out,16
179
180         vxor            $in0,$in0,$tmp
181         vsldoi          $tmp,$zero,$tmp,12      # >>32
182         vxor            $in0,$in0,$tmp
183         vsldoi          $tmp,$zero,$tmp,12      # >>32
184         vxor            $in0,$in0,$tmp
185          vadduwm        $rcon,$rcon,$rcon
186         vxor            $in0,$in0,$key
187
188         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
189         vsldoi          $tmp,$zero,$in0,12      # >>32
190          vperm          $outtail,$in0,$in0,$outperm     # rotate
191          vsel           $stage,$outhead,$outtail,$outmask
192          vmr            $outhead,$outtail
193         vcipherlast     $key,$key,$rcon
194          stvx           $stage,0,$out
195          addi           $out,$out,16
196
197         vxor            $in0,$in0,$tmp
198         vsldoi          $tmp,$zero,$tmp,12      # >>32
199         vxor            $in0,$in0,$tmp
200         vsldoi          $tmp,$zero,$tmp,12      # >>32
201         vxor            $in0,$in0,$tmp
202         vxor            $in0,$in0,$key
203          vperm          $outtail,$in0,$in0,$outperm     # rotate
204          vsel           $stage,$outhead,$outtail,$outmask
205          vmr            $outhead,$outtail
206          stvx           $stage,0,$out
207
208         addi            $inp,$out,15            # 15 is not typo
209         addi            $out,$out,0x50
210
211         li              $rounds,10
212         b               Ldone
213
214 .align  4
215 L192:
216         lvx             $tmp,0,$inp
217         li              $cnt,4
218          vperm          $outtail,$in0,$in0,$outperm     # rotate
219          vsel           $stage,$outhead,$outtail,$outmask
220          vmr            $outhead,$outtail
221          stvx           $stage,0,$out
222          addi           $out,$out,16
223         vperm           $in1,$in1,$tmp,$key     # align [and byte swap in LE]
224         vspltisb        $key,8                  # borrow $key
225         mtctr           $cnt
226         vsububm         $mask,$mask,$key        # adjust the mask
227
228 Loop192:
229         vperm           $key,$in1,$in1,$mask    # roate-n-splat
230         vsldoi          $tmp,$zero,$in0,12      # >>32
231         vcipherlast     $key,$key,$rcon
232
233         vxor            $in0,$in0,$tmp
234         vsldoi          $tmp,$zero,$tmp,12      # >>32
235         vxor            $in0,$in0,$tmp
236         vsldoi          $tmp,$zero,$tmp,12      # >>32
237         vxor            $in0,$in0,$tmp
238
239          vsldoi         $stage,$zero,$in1,8
240         vspltw          $tmp,$in0,3
241         vxor            $tmp,$tmp,$in1
242         vsldoi          $in1,$zero,$in1,12      # >>32
243          vadduwm        $rcon,$rcon,$rcon
244         vxor            $in1,$in1,$tmp
245         vxor            $in0,$in0,$key
246         vxor            $in1,$in1,$key
247          vsldoi         $stage,$stage,$in0,8
248
249         vperm           $key,$in1,$in1,$mask    # rotate-n-splat
250         vsldoi          $tmp,$zero,$in0,12      # >>32
251          vperm          $outtail,$stage,$stage,$outperm # rotate
252          vsel           $stage,$outhead,$outtail,$outmask
253          vmr            $outhead,$outtail
254         vcipherlast     $key,$key,$rcon
255          stvx           $stage,0,$out
256          addi           $out,$out,16
257
258          vsldoi         $stage,$in0,$in1,8
259         vxor            $in0,$in0,$tmp
260         vsldoi          $tmp,$zero,$tmp,12      # >>32
261          vperm          $outtail,$stage,$stage,$outperm # rotate
262          vsel           $stage,$outhead,$outtail,$outmask
263          vmr            $outhead,$outtail
264         vxor            $in0,$in0,$tmp
265         vsldoi          $tmp,$zero,$tmp,12      # >>32
266         vxor            $in0,$in0,$tmp
267          stvx           $stage,0,$out
268          addi           $out,$out,16
269
270         vspltw          $tmp,$in0,3
271         vxor            $tmp,$tmp,$in1
272         vsldoi          $in1,$zero,$in1,12      # >>32
273          vadduwm        $rcon,$rcon,$rcon
274         vxor            $in1,$in1,$tmp
275         vxor            $in0,$in0,$key
276         vxor            $in1,$in1,$key
277          vperm          $outtail,$in0,$in0,$outperm     # rotate
278          vsel           $stage,$outhead,$outtail,$outmask
279          vmr            $outhead,$outtail
280          stvx           $stage,0,$out
281          addi           $inp,$out,15            # 15 is not typo
282          addi           $out,$out,16
283         bdnz            Loop192
284
285         li              $rounds,12
286         addi            $out,$out,0x20
287         b               Ldone
288
289 .align  4
290 L256:
291         lvx             $tmp,0,$inp
292         li              $cnt,7
293         li              $rounds,14
294          vperm          $outtail,$in0,$in0,$outperm     # rotate
295          vsel           $stage,$outhead,$outtail,$outmask
296          vmr            $outhead,$outtail
297          stvx           $stage,0,$out
298          addi           $out,$out,16
299         vperm           $in1,$in1,$tmp,$key     # align [and byte swap in LE]
300         mtctr           $cnt
301
302 Loop256:
303         vperm           $key,$in1,$in1,$mask    # rotate-n-splat
304         vsldoi          $tmp,$zero,$in0,12      # >>32
305          vperm          $outtail,$in1,$in1,$outperm     # rotate
306          vsel           $stage,$outhead,$outtail,$outmask
307          vmr            $outhead,$outtail
308         vcipherlast     $key,$key,$rcon
309          stvx           $stage,0,$out
310          addi           $out,$out,16
311
312         vxor            $in0,$in0,$tmp
313         vsldoi          $tmp,$zero,$tmp,12      # >>32
314         vxor            $in0,$in0,$tmp
315         vsldoi          $tmp,$zero,$tmp,12      # >>32
316         vxor            $in0,$in0,$tmp
317          vadduwm        $rcon,$rcon,$rcon
318         vxor            $in0,$in0,$key
319          vperm          $outtail,$in0,$in0,$outperm     # rotate
320          vsel           $stage,$outhead,$outtail,$outmask
321          vmr            $outhead,$outtail
322          stvx           $stage,0,$out
323          addi           $inp,$out,15            # 15 is not typo
324          addi           $out,$out,16
325         bdz             Ldone
326
327         vspltw          $key,$in0,3             # just splat
328         vsldoi          $tmp,$zero,$in1,12      # >>32
329         vsbox           $key,$key
330
331         vxor            $in1,$in1,$tmp
332         vsldoi          $tmp,$zero,$tmp,12      # >>32
333         vxor            $in1,$in1,$tmp
334         vsldoi          $tmp,$zero,$tmp,12      # >>32
335         vxor            $in1,$in1,$tmp
336
337         vxor            $in1,$in1,$key
338         b               Loop256
339
340 .align  4
341 Ldone:
342         lvx             $in1,0,$inp             # redundant in aligned case
343         vsel            $in1,$outhead,$in1,$outmask
344         stvx            $in1,0,$inp
345         li              $ptr,0
346         mtspr           256,$vrsave
347         stw             $rounds,0($out)
348
349 Lenc_key_abort:
350         mr              r3,$ptr
351         blr
352         .long           0
353         .byte           0,12,0x14,1,0,0,3,0
354         .long           0
355 .size   .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
356
357 .globl  .${prefix}_set_decrypt_key
358 .align  5
359 .${prefix}_set_decrypt_key:
360         $STU            $sp,-$FRAME($sp)
361         mflr            r10
362         $PUSH           r10,$FRAME+$LRSAVE($sp)
363         bl              Lset_encrypt_key
364         mtlr            r10
365
366         cmpwi           r3,0
367         bne-            Ldec_key_abort
368
369         slwi            $cnt,$rounds,4
370         subi            $inp,$out,240           # first round key
371         srwi            $rounds,$rounds,1
372         add             $out,$inp,$cnt          # last round key
373         mtctr           $rounds
374
375 Ldeckey:
376         lwz             r0, 0($inp)
377         lwz             r6, 4($inp)
378         lwz             r7, 8($inp)
379         lwz             r8, 12($inp)
380         addi            $inp,$inp,16
381         lwz             r9, 0($out)
382         lwz             r10,4($out)
383         lwz             r11,8($out)
384         lwz             r12,12($out)
385         stw             r0, 0($out)
386         stw             r6, 4($out)
387         stw             r7, 8($out)
388         stw             r8, 12($out)
389         subi            $out,$out,16
390         stw             r9, -16($inp)
391         stw             r10,-12($inp)
392         stw             r11,-8($inp)
393         stw             r12,-4($inp)
394         bdnz            Ldeckey
395
396         xor             r3,r3,r3                # return value
397 Ldec_key_abort:
398         addi            $sp,$sp,$FRAME
399         blr
400         .long           0
401         .byte           0,12,4,1,0x80,0,3,0
402         .long           0
403 .size   .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
404 ___
405 }}}
406 #########################################################################
407 {{{     # Single block en- and decrypt procedures                       #
408 sub gen_block () {
409 my $dir = shift;
410 my $n   = $dir eq "de" ? "n" : "";
411 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
412
413 $code.=<<___;
414 .globl  .${prefix}_${dir}crypt
415 .align  5
416 .${prefix}_${dir}crypt:
417         lwz             $rounds,240($key)
418         lis             r0,0xfc00
419         mfspr           $vrsave,256
420         li              $idx,15                 # 15 is not typo
421         mtspr           256,r0
422
423         lvx             v0,0,$inp
424         neg             r11,$out
425         lvx             v1,$idx,$inp
426         lvsl            v2,0,$inp               # inpperm
427         le?vspltisb     v4,0x0f
428         ?lvsl           v3,0,r11                # outperm
429         le?vxor         v2,v2,v4
430         li              $idx,16
431         vperm           v0,v0,v1,v2             # align [and byte swap in LE]
432         lvx             v1,0,$key
433         ?lvsl           v5,0,$key               # keyperm
434         srwi            $rounds,$rounds,1
435         lvx             v2,$idx,$key
436         addi            $idx,$idx,16
437         subi            $rounds,$rounds,1
438         ?vperm          v1,v1,v2,v5             # align round key
439
440         vxor            v0,v0,v1
441         lvx             v1,$idx,$key
442         addi            $idx,$idx,16
443         mtctr           $rounds
444
445 Loop_${dir}c:
446         ?vperm          v2,v2,v1,v5
447         v${n}cipher     v0,v0,v2
448         lvx             v2,$idx,$key
449         addi            $idx,$idx,16
450         ?vperm          v1,v1,v2,v5
451         v${n}cipher     v0,v0,v1
452         lvx             v1,$idx,$key
453         addi            $idx,$idx,16
454         bdnz            Loop_${dir}c
455
456         ?vperm          v2,v2,v1,v5
457         v${n}cipher     v0,v0,v2
458         lvx             v2,$idx,$key
459         ?vperm          v1,v1,v2,v5
460         v${n}cipherlast v0,v0,v1
461
462         vspltisb        v2,-1
463         vxor            v1,v1,v1
464         li              $idx,15                 # 15 is not typo
465         ?vperm          v2,v1,v2,v3             # outmask
466         le?vxor         v3,v3,v4
467         lvx             v1,0,$out               # outhead
468         vperm           v0,v0,v0,v3             # rotate [and byte swap in LE]
469         vsel            v1,v1,v0,v2
470         lvx             v4,$idx,$out
471         stvx            v1,0,$out
472         vsel            v0,v0,v4,v2
473         stvx            v0,$idx,$out
474
475         mtspr           256,$vrsave
476         blr
477         .long           0
478         .byte           0,12,0x14,0,0,0,3,0
479         .long           0
480 .size   .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
481 ___
482 }
483 &gen_block("en");
484 &gen_block("de");
485 }}}
486 #########################################################################
487 {{{     # CBC en- and decrypt procedures                                #
488 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
489 my ($rndkey0,$rndkey1,$inout,$tmp)=             map("v$_",(0..3));
490 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
491                                                 map("v$_",(4..10));
492 $code.=<<___;
493 .globl  .${prefix}_cbc_encrypt
494 .align  5
495 .${prefix}_cbc_encrypt:
496         ${UCMP}i        $len,16
497         bltlr-
498
499         cmpwi           $enc,0                  # test direction
500         lis             r0,0xffe0
501         mfspr           $vrsave,256
502         mtspr           256,r0
503
504         li              $idx,15
505         vxor            $rndkey0,$rndkey0,$rndkey0
506         le?vspltisb     $tmp,0x0f
507
508         lvx             $ivec,0,$ivp            # load [unaligned] iv
509         lvsl            $inpperm,0,$ivp
510         lvx             $inptail,$idx,$ivp
511         le?vxor         $inpperm,$inpperm,$tmp
512         vperm           $ivec,$ivec,$inptail,$inpperm
513
514         neg             r11,$inp
515         ?lvsl           $keyperm,0,$key         # prepare for unaligned key
516         lwz             $rounds,240($key)
517
518         lvsr            $inpperm,0,r11          # prepare for unaligned load
519         lvx             $inptail,0,$inp
520         addi            $inp,$inp,15            # 15 is not typo
521         le?vxor         $inpperm,$inpperm,$tmp
522
523         ?lvsr           $outperm,0,$out         # prepare for unaligned store
524         vspltisb        $outmask,-1
525         lvx             $outhead,0,$out
526         ?vperm          $outmask,$rndkey0,$outmask,$outperm
527         le?vxor         $outperm,$outperm,$tmp
528
529         srwi            $rounds,$rounds,1
530         li              $idx,16
531         subi            $rounds,$rounds,1
532         beq             Lcbc_dec
533
534 Lcbc_enc:
535         vmr             $inout,$inptail
536         lvx             $inptail,0,$inp
537         addi            $inp,$inp,16
538         mtctr           $rounds
539         subi            $len,$len,16            # len-=16
540
541         lvx             $rndkey0,0,$key
542          vperm          $inout,$inout,$inptail,$inpperm
543         lvx             $rndkey1,$idx,$key
544         addi            $idx,$idx,16
545         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
546         vxor            $inout,$inout,$rndkey0
547         lvx             $rndkey0,$idx,$key
548         addi            $idx,$idx,16
549         vxor            $inout,$inout,$ivec
550
551 Loop_cbc_enc:
552         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
553         vcipher         $inout,$inout,$rndkey1
554         lvx             $rndkey1,$idx,$key
555         addi            $idx,$idx,16
556         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
557         vcipher         $inout,$inout,$rndkey0
558         lvx             $rndkey0,$idx,$key
559         addi            $idx,$idx,16
560         bdnz            Loop_cbc_enc
561
562         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
563         vcipher         $inout,$inout,$rndkey1
564         lvx             $rndkey1,$idx,$key
565         li              $idx,16
566         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
567         vcipherlast     $ivec,$inout,$rndkey0
568         ${UCMP}i        $len,16
569
570         vperm           $tmp,$ivec,$ivec,$outperm
571         vsel            $inout,$outhead,$tmp,$outmask
572         vmr             $outhead,$tmp
573         stvx            $inout,0,$out
574         addi            $out,$out,16
575         bge             Lcbc_enc
576
577         b               Lcbc_done
578
579 .align  4
580 Lcbc_dec:
581         ${UCMP}i        $len,128
582         bge             _aesp8_cbc_decrypt8x
583         vmr             $tmp,$inptail
584         lvx             $inptail,0,$inp
585         addi            $inp,$inp,16
586         mtctr           $rounds
587         subi            $len,$len,16            # len-=16
588
589         lvx             $rndkey0,0,$key
590          vperm          $tmp,$tmp,$inptail,$inpperm
591         lvx             $rndkey1,$idx,$key
592         addi            $idx,$idx,16
593         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
594         vxor            $inout,$tmp,$rndkey0
595         lvx             $rndkey0,$idx,$key
596         addi            $idx,$idx,16
597
598 Loop_cbc_dec:
599         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
600         vncipher        $inout,$inout,$rndkey1
601         lvx             $rndkey1,$idx,$key
602         addi            $idx,$idx,16
603         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
604         vncipher        $inout,$inout,$rndkey0
605         lvx             $rndkey0,$idx,$key
606         addi            $idx,$idx,16
607         bdnz            Loop_cbc_dec
608
609         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
610         vncipher        $inout,$inout,$rndkey1
611         lvx             $rndkey1,$idx,$key
612         li              $idx,16
613         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
614         vncipherlast    $inout,$inout,$rndkey0
615         ${UCMP}i        $len,16
616
617         vxor            $inout,$inout,$ivec
618         vmr             $ivec,$tmp
619         vperm           $tmp,$inout,$inout,$outperm
620         vsel            $inout,$outhead,$tmp,$outmask
621         vmr             $outhead,$tmp
622         stvx            $inout,0,$out
623         addi            $out,$out,16
624         bge             Lcbc_dec
625
626 Lcbc_done:
627         addi            $out,$out,-1
628         lvx             $inout,0,$out           # redundant in aligned case
629         vsel            $inout,$outhead,$inout,$outmask
630         stvx            $inout,0,$out
631
632         neg             $enc,$ivp               # write [unaligned] iv
633         li              $idx,15                 # 15 is not typo
634         vxor            $rndkey0,$rndkey0,$rndkey0
635         vspltisb        $outmask,-1
636         le?vspltisb     $tmp,0x0f
637         ?lvsl           $outperm,0,$enc
638         ?vperm          $outmask,$rndkey0,$outmask,$outperm
639         le?vxor         $outperm,$outperm,$tmp
640         lvx             $outhead,0,$ivp
641         vperm           $ivec,$ivec,$ivec,$outperm
642         vsel            $inout,$outhead,$ivec,$outmask
643         lvx             $inptail,$idx,$ivp
644         stvx            $inout,0,$ivp
645         vsel            $inout,$ivec,$inptail,$outmask
646         stvx            $inout,$idx,$ivp
647
648         mtspr           256,$vrsave
649         blr
650         .long           0
651         .byte           0,12,0x14,0,0,0,6,0
652         .long           0
653 ___
654 #########################################################################
655 {{      # Optimized CBC decrypt procedure                               #
656 my $key_="r11";
657 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
658     $x00=0 if ($flavour =~ /osx/);
659 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
660 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
661 my $rndkey0="v23";      # v24-v25 rotating buffer for first found keys
662                         # v26-v31 last 6 round keys
663 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
664
665 $code.=<<___;
666 .align  5
667 _aesp8_cbc_decrypt8x:
668         $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
669         li              r10,`$FRAME+8*16+15`
670         li              r11,`$FRAME+8*16+31`
671         stvx            v20,r10,$sp             # ABI says so
672         addi            r10,r10,32
673         stvx            v21,r11,$sp
674         addi            r11,r11,32
675         stvx            v22,r10,$sp
676         addi            r10,r10,32
677         stvx            v23,r11,$sp
678         addi            r11,r11,32
679         stvx            v24,r10,$sp
680         addi            r10,r10,32
681         stvx            v25,r11,$sp
682         addi            r11,r11,32
683         stvx            v26,r10,$sp
684         addi            r10,r10,32
685         stvx            v27,r11,$sp
686         addi            r11,r11,32
687         stvx            v28,r10,$sp
688         addi            r10,r10,32
689         stvx            v29,r11,$sp
690         addi            r11,r11,32
691         stvx            v30,r10,$sp
692         stvx            v31,r11,$sp
693         li              r0,-1
694         stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
695         li              $x10,0x10
696         $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
697         li              $x20,0x20
698         $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
699         li              $x30,0x30
700         $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
701         li              $x40,0x40
702         $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
703         li              $x50,0x50
704         $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
705         li              $x60,0x60
706         $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
707         li              $x70,0x70
708         mtspr           256,r0
709
710         subi            $rounds,$rounds,3       # -4 in total
711         subi            $len,$len,128           # bias
712
713         lvx             $rndkey0,$x00,$key      # load key schedule
714         lvx             v30,$x10,$key
715         addi            $key,$key,0x20
716         lvx             v31,$x00,$key
717         ?vperm          $rndkey0,$rndkey0,v30,$keyperm
718         addi            $key_,$sp,$FRAME+15
719         mtctr           $rounds
720
721 Load_cbc_dec_key:
722         ?vperm          v24,v30,v31,$keyperm
723         lvx             v30,$x10,$key
724         addi            $key,$key,0x20
725         stvx            v24,$x00,$key_          # off-load round[1]
726         ?vperm          v25,v31,v30,$keyperm
727         lvx             v31,$x00,$key
728         stvx            v25,$x10,$key_          # off-load round[2]
729         addi            $key_,$key_,0x20
730         bdnz            Load_cbc_dec_key
731
732         lvx             v26,$x10,$key
733         ?vperm          v24,v30,v31,$keyperm
734         lvx             v27,$x20,$key
735         stvx            v24,$x00,$key_          # off-load round[3]
736         ?vperm          v25,v31,v26,$keyperm
737         lvx             v28,$x30,$key
738         stvx            v25,$x10,$key_          # off-load round[4]
739         addi            $key_,$sp,$FRAME+15     # rewind $key_
740         ?vperm          v26,v26,v27,$keyperm
741         lvx             v29,$x40,$key
742         ?vperm          v27,v27,v28,$keyperm
743         lvx             v30,$x50,$key
744         ?vperm          v28,v28,v29,$keyperm
745         lvx             v31,$x60,$key
746         ?vperm          v29,v29,v30,$keyperm
747         lvx             $out0,$x70,$key         # borrow $out0
748         ?vperm          v30,v30,v31,$keyperm
749         lvx             v24,$x00,$key_          # pre-load round[1]
750         ?vperm          v31,v31,$out0,$keyperm
751         lvx             v25,$x10,$key_          # pre-load round[2]
752
753         #lvx            $inptail,0,$inp         # "caller" already did this
754         #addi           $inp,$inp,15            # 15 is not typo
755         subi            $inp,$inp,15            # undo "caller"
756
757          le?li          $idx,8
758         lvx_u           $in0,$x00,$inp          # load first 8 "words"
759          le?lvsl        $inpperm,0,$idx
760          le?vspltisb    $tmp,0x0f
761         lvx_u           $in1,$x10,$inp
762          le?vxor        $inpperm,$inpperm,$tmp  # transform for lvx_u/stvx_u
763         lvx_u           $in2,$x20,$inp
764          le?vperm       $in0,$in0,$in0,$inpperm
765         lvx_u           $in3,$x30,$inp
766          le?vperm       $in1,$in1,$in1,$inpperm
767         lvx_u           $in4,$x40,$inp
768          le?vperm       $in2,$in2,$in2,$inpperm
769         vxor            $out0,$in0,$rndkey0
770         lvx_u           $in5,$x50,$inp
771          le?vperm       $in3,$in3,$in3,$inpperm
772         vxor            $out1,$in1,$rndkey0
773         lvx_u           $in6,$x60,$inp
774          le?vperm       $in4,$in4,$in4,$inpperm
775         vxor            $out2,$in2,$rndkey0
776         lvx_u           $in7,$x70,$inp
777         addi            $inp,$inp,0x80
778          le?vperm       $in5,$in5,$in5,$inpperm
779         vxor            $out3,$in3,$rndkey0
780          le?vperm       $in6,$in6,$in6,$inpperm
781         vxor            $out4,$in4,$rndkey0
782          le?vperm       $in7,$in7,$in7,$inpperm
783         vxor            $out5,$in5,$rndkey0
784         vxor            $out6,$in6,$rndkey0
785         vxor            $out7,$in7,$rndkey0
786
787         mtctr           $rounds
788         b               Loop_cbc_dec8x
789 .align  5
790 Loop_cbc_dec8x:
791         vncipher        $out0,$out0,v24
792         vncipher        $out1,$out1,v24
793         vncipher        $out2,$out2,v24
794         vncipher        $out3,$out3,v24
795         vncipher        $out4,$out4,v24
796         vncipher        $out5,$out5,v24
797         vncipher        $out6,$out6,v24
798         vncipher        $out7,$out7,v24
799         lvx             v24,$x20,$key_          # round[3]
800         addi            $key_,$key_,0x20
801
802         vncipher        $out0,$out0,v25
803         vncipher        $out1,$out1,v25
804         vncipher        $out2,$out2,v25
805         vncipher        $out3,$out3,v25
806         vncipher        $out4,$out4,v25
807         vncipher        $out5,$out5,v25
808         vncipher        $out6,$out6,v25
809         vncipher        $out7,$out7,v25
810         lvx             v25,$x10,$key_          # round[4]
811         bdnz            Loop_cbc_dec8x
812
813         subic           $len,$len,128           # $len-=128
814         vncipher        $out0,$out0,v24
815         vncipher        $out1,$out1,v24
816         vncipher        $out2,$out2,v24
817         vncipher        $out3,$out3,v24
818         vncipher        $out4,$out4,v24
819         vncipher        $out5,$out5,v24
820         vncipher        $out6,$out6,v24
821         vncipher        $out7,$out7,v24
822
823         subfe.          r0,r0,r0                # borrow?-1:0
824         vncipher        $out0,$out0,v25
825         vncipher        $out1,$out1,v25
826         vncipher        $out2,$out2,v25
827         vncipher        $out3,$out3,v25
828         vncipher        $out4,$out4,v25
829         vncipher        $out5,$out5,v25
830         vncipher        $out6,$out6,v25
831         vncipher        $out7,$out7,v25
832
833         and             r0,r0,$len
834         vncipher        $out0,$out0,v26
835         vncipher        $out1,$out1,v26
836         vncipher        $out2,$out2,v26
837         vncipher        $out3,$out3,v26
838         vncipher        $out4,$out4,v26
839         vncipher        $out5,$out5,v26
840         vncipher        $out6,$out6,v26
841         vncipher        $out7,$out7,v26
842
843         add             $inp,$inp,r0            # $inp is adjusted in such
844                                                 # way that at exit from the
845                                                 # loop inX-in7 are loaded
846                                                 # with last "words"
847         vncipher        $out0,$out0,v27
848         vncipher        $out1,$out1,v27
849         vncipher        $out2,$out2,v27
850         vncipher        $out3,$out3,v27
851         vncipher        $out4,$out4,v27
852         vncipher        $out5,$out5,v27
853         vncipher        $out6,$out6,v27
854         vncipher        $out7,$out7,v27
855
856         addi            $key_,$sp,$FRAME+15     # rewind $key_
857         vncipher        $out0,$out0,v28
858         vncipher        $out1,$out1,v28
859         vncipher        $out2,$out2,v28
860         vncipher        $out3,$out3,v28
861         vncipher        $out4,$out4,v28
862         vncipher        $out5,$out5,v28
863         vncipher        $out6,$out6,v28
864         vncipher        $out7,$out7,v28
865         lvx             v24,$x00,$key_          # re-pre-load round[1]
866
867         vncipher        $out0,$out0,v29
868         vncipher        $out1,$out1,v29
869         vncipher        $out2,$out2,v29
870         vncipher        $out3,$out3,v29
871         vncipher        $out4,$out4,v29
872         vncipher        $out5,$out5,v29
873         vncipher        $out6,$out6,v29
874         vncipher        $out7,$out7,v29
875         lvx             v25,$x10,$key_          # re-pre-load round[2]
876
877         vncipher        $out0,$out0,v30
878          vxor           $ivec,$ivec,v31         # xor with last round key
879         vncipher        $out1,$out1,v30
880          vxor           $in0,$in0,v31
881         vncipher        $out2,$out2,v30
882          vxor           $in1,$in1,v31
883         vncipher        $out3,$out3,v30
884          vxor           $in2,$in2,v31
885         vncipher        $out4,$out4,v30
886          vxor           $in3,$in3,v31
887         vncipher        $out5,$out5,v30
888          vxor           $in4,$in4,v31
889         vncipher        $out6,$out6,v30
890          vxor           $in5,$in5,v31
891         vncipher        $out7,$out7,v30
892          vxor           $in6,$in6,v31
893
894         vncipherlast    $out0,$out0,$ivec
895         vncipherlast    $out1,$out1,$in0
896          lvx_u          $in0,$x00,$inp          # load next input block
897         vncipherlast    $out2,$out2,$in1
898          lvx_u          $in1,$x10,$inp
899         vncipherlast    $out3,$out3,$in2
900          le?vperm       $in0,$in0,$in0,$inpperm
901          lvx_u          $in2,$x20,$inp
902         vncipherlast    $out4,$out4,$in3
903          le?vperm       $in1,$in1,$in1,$inpperm
904          lvx_u          $in3,$x30,$inp
905         vncipherlast    $out5,$out5,$in4
906          le?vperm       $in2,$in2,$in2,$inpperm
907          lvx_u          $in4,$x40,$inp
908         vncipherlast    $out6,$out6,$in5
909          le?vperm       $in3,$in3,$in3,$inpperm
910          lvx_u          $in5,$x50,$inp
911         vncipherlast    $out7,$out7,$in6
912          le?vperm       $in4,$in4,$in4,$inpperm
913          lvx_u          $in6,$x60,$inp
914         vmr             $ivec,$in7
915          le?vperm       $in5,$in5,$in5,$inpperm
916          lvx_u          $in7,$x70,$inp
917          addi           $inp,$inp,0x80
918
919         le?vperm        $out0,$out0,$out0,$inpperm
920         le?vperm        $out1,$out1,$out1,$inpperm
921         stvx_u          $out0,$x00,$out
922          le?vperm       $in6,$in6,$in6,$inpperm
923          vxor           $out0,$in0,$rndkey0
924         le?vperm        $out2,$out2,$out2,$inpperm
925         stvx_u          $out1,$x10,$out
926          le?vperm       $in7,$in7,$in7,$inpperm
927          vxor           $out1,$in1,$rndkey0
928         le?vperm        $out3,$out3,$out3,$inpperm
929         stvx_u          $out2,$x20,$out
930          vxor           $out2,$in2,$rndkey0
931         le?vperm        $out4,$out4,$out4,$inpperm
932         stvx_u          $out3,$x30,$out
933          vxor           $out3,$in3,$rndkey0
934         le?vperm        $out5,$out5,$out5,$inpperm
935         stvx_u          $out4,$x40,$out
936          vxor           $out4,$in4,$rndkey0
937         le?vperm        $out6,$out6,$out6,$inpperm
938         stvx_u          $out5,$x50,$out
939          vxor           $out5,$in5,$rndkey0
940         le?vperm        $out7,$out7,$out7,$inpperm
941         stvx_u          $out6,$x60,$out
942          vxor           $out6,$in6,$rndkey0
943         stvx_u          $out7,$x70,$out
944         addi            $out,$out,0x80
945          vxor           $out7,$in7,$rndkey0
946
947         mtctr           $rounds
948         beq             Loop_cbc_dec8x          # did $len-=128 borrow?
949
950         addic.          $len,$len,128
951         beq             Lcbc_dec8x_done
952         nop
953         nop
954
955 Loop_cbc_dec8x_tail:                            # up to 7 "words" tail...
956         vncipher        $out1,$out1,v24
957         vncipher        $out2,$out2,v24
958         vncipher        $out3,$out3,v24
959         vncipher        $out4,$out4,v24
960         vncipher        $out5,$out5,v24
961         vncipher        $out6,$out6,v24
962         vncipher        $out7,$out7,v24
963         lvx             v24,$x20,$key_          # round[3]
964         addi            $key_,$key_,0x20
965
966         vncipher        $out1,$out1,v25
967         vncipher        $out2,$out2,v25
968         vncipher        $out3,$out3,v25
969         vncipher        $out4,$out4,v25
970         vncipher        $out5,$out5,v25
971         vncipher        $out6,$out6,v25
972         vncipher        $out7,$out7,v25
973         lvx             v25,$x10,$key_          # round[4]
974         bdnz            Loop_cbc_dec8x_tail
975
976         vncipher        $out1,$out1,v24
977         vncipher        $out2,$out2,v24
978         vncipher        $out3,$out3,v24
979         vncipher        $out4,$out4,v24
980         vncipher        $out5,$out5,v24
981         vncipher        $out6,$out6,v24
982         vncipher        $out7,$out7,v24
983
984         vncipher        $out1,$out1,v25
985         vncipher        $out2,$out2,v25
986         vncipher        $out3,$out3,v25
987         vncipher        $out4,$out4,v25
988         vncipher        $out5,$out5,v25
989         vncipher        $out6,$out6,v25
990         vncipher        $out7,$out7,v25
991
992         vncipher        $out1,$out1,v26
993         vncipher        $out2,$out2,v26
994         vncipher        $out3,$out3,v26
995         vncipher        $out4,$out4,v26
996         vncipher        $out5,$out5,v26
997         vncipher        $out6,$out6,v26
998         vncipher        $out7,$out7,v26
999
1000         vncipher        $out1,$out1,v27
1001         vncipher        $out2,$out2,v27
1002         vncipher        $out3,$out3,v27
1003         vncipher        $out4,$out4,v27
1004         vncipher        $out5,$out5,v27
1005         vncipher        $out6,$out6,v27
1006         vncipher        $out7,$out7,v27
1007
1008         vncipher        $out1,$out1,v28
1009         vncipher        $out2,$out2,v28
1010         vncipher        $out3,$out3,v28
1011         vncipher        $out4,$out4,v28
1012         vncipher        $out5,$out5,v28
1013         vncipher        $out6,$out6,v28
1014         vncipher        $out7,$out7,v28
1015
1016         vncipher        $out1,$out1,v29
1017         vncipher        $out2,$out2,v29
1018         vncipher        $out3,$out3,v29
1019         vncipher        $out4,$out4,v29
1020         vncipher        $out5,$out5,v29
1021         vncipher        $out6,$out6,v29
1022         vncipher        $out7,$out7,v29
1023
1024         vncipher        $out1,$out1,v30
1025          vxor           $ivec,$ivec,v31         # last round key
1026         vncipher        $out2,$out2,v30
1027          vxor           $in1,$in1,v31
1028         vncipher        $out3,$out3,v30
1029          vxor           $in2,$in2,v31
1030         vncipher        $out4,$out4,v30
1031          vxor           $in3,$in3,v31
1032         vncipher        $out5,$out5,v30
1033          vxor           $in4,$in4,v31
1034         vncipher        $out6,$out6,v30
1035          vxor           $in5,$in5,v31
1036         vncipher        $out7,$out7,v30
1037          vxor           $in6,$in6,v31
1038
1039         cmplwi          $len,32                 # switch($len)
1040         blt             Lcbc_dec8x_one
1041         nop
1042         beq             Lcbc_dec8x_two
1043         cmplwi          $len,64
1044         blt             Lcbc_dec8x_three
1045         nop
1046         beq             Lcbc_dec8x_four
1047         cmplwi          $len,96
1048         blt             Lcbc_dec8x_five
1049         nop
1050         beq             Lcbc_dec8x_six
1051
1052 Lcbc_dec8x_seven:
1053         vncipherlast    $out1,$out1,$ivec
1054         vncipherlast    $out2,$out2,$in1
1055         vncipherlast    $out3,$out3,$in2
1056         vncipherlast    $out4,$out4,$in3
1057         vncipherlast    $out5,$out5,$in4
1058         vncipherlast    $out6,$out6,$in5
1059         vncipherlast    $out7,$out7,$in6
1060         vmr             $ivec,$in7
1061
1062         le?vperm        $out1,$out1,$out1,$inpperm
1063         le?vperm        $out2,$out2,$out2,$inpperm
1064         stvx_u          $out1,$x00,$out
1065         le?vperm        $out3,$out3,$out3,$inpperm
1066         stvx_u          $out2,$x10,$out
1067         le?vperm        $out4,$out4,$out4,$inpperm
1068         stvx_u          $out3,$x20,$out
1069         le?vperm        $out5,$out5,$out5,$inpperm
1070         stvx_u          $out4,$x30,$out
1071         le?vperm        $out6,$out6,$out6,$inpperm
1072         stvx_u          $out5,$x40,$out
1073         le?vperm        $out7,$out7,$out7,$inpperm
1074         stvx_u          $out6,$x50,$out
1075         stvx_u          $out7,$x60,$out
1076         addi            $out,$out,0x70
1077         b               Lcbc_dec8x_done
1078
1079 .align  5
1080 Lcbc_dec8x_six:
1081         vncipherlast    $out2,$out2,$ivec
1082         vncipherlast    $out3,$out3,$in2
1083         vncipherlast    $out4,$out4,$in3
1084         vncipherlast    $out5,$out5,$in4
1085         vncipherlast    $out6,$out6,$in5
1086         vncipherlast    $out7,$out7,$in6
1087         vmr             $ivec,$in7
1088
1089         le?vperm        $out2,$out2,$out2,$inpperm
1090         le?vperm        $out3,$out3,$out3,$inpperm
1091         stvx_u          $out2,$x00,$out
1092         le?vperm        $out4,$out4,$out4,$inpperm
1093         stvx_u          $out3,$x10,$out
1094         le?vperm        $out5,$out5,$out5,$inpperm
1095         stvx_u          $out4,$x20,$out
1096         le?vperm        $out6,$out6,$out6,$inpperm
1097         stvx_u          $out5,$x30,$out
1098         le?vperm        $out7,$out7,$out7,$inpperm
1099         stvx_u          $out6,$x40,$out
1100         stvx_u          $out7,$x50,$out
1101         addi            $out,$out,0x60
1102         b               Lcbc_dec8x_done
1103
1104 .align  5
1105 Lcbc_dec8x_five:
1106         vncipherlast    $out3,$out3,$ivec
1107         vncipherlast    $out4,$out4,$in3
1108         vncipherlast    $out5,$out5,$in4
1109         vncipherlast    $out6,$out6,$in5
1110         vncipherlast    $out7,$out7,$in6
1111         vmr             $ivec,$in7
1112
1113         le?vperm        $out3,$out3,$out3,$inpperm
1114         le?vperm        $out4,$out4,$out4,$inpperm
1115         stvx_u          $out3,$x00,$out
1116         le?vperm        $out5,$out5,$out5,$inpperm
1117         stvx_u          $out4,$x10,$out
1118         le?vperm        $out6,$out6,$out6,$inpperm
1119         stvx_u          $out5,$x20,$out
1120         le?vperm        $out7,$out7,$out7,$inpperm
1121         stvx_u          $out6,$x30,$out
1122         stvx_u          $out7,$x40,$out
1123         addi            $out,$out,0x50
1124         b               Lcbc_dec8x_done
1125
1126 .align  5
1127 Lcbc_dec8x_four:
1128         vncipherlast    $out4,$out4,$ivec
1129         vncipherlast    $out5,$out5,$in4
1130         vncipherlast    $out6,$out6,$in5
1131         vncipherlast    $out7,$out7,$in6
1132         vmr             $ivec,$in7
1133
1134         le?vperm        $out4,$out4,$out4,$inpperm
1135         le?vperm        $out5,$out5,$out5,$inpperm
1136         stvx_u          $out4,$x00,$out
1137         le?vperm        $out6,$out6,$out6,$inpperm
1138         stvx_u          $out5,$x10,$out
1139         le?vperm        $out7,$out7,$out7,$inpperm
1140         stvx_u          $out6,$x20,$out
1141         stvx_u          $out7,$x30,$out
1142         addi            $out,$out,0x40
1143         b               Lcbc_dec8x_done
1144
1145 .align  5
1146 Lcbc_dec8x_three:
1147         vncipherlast    $out5,$out5,$ivec
1148         vncipherlast    $out6,$out6,$in5
1149         vncipherlast    $out7,$out7,$in6
1150         vmr             $ivec,$in7
1151
1152         le?vperm        $out5,$out5,$out5,$inpperm
1153         le?vperm        $out6,$out6,$out6,$inpperm
1154         stvx_u          $out5,$x00,$out
1155         le?vperm        $out7,$out7,$out7,$inpperm
1156         stvx_u          $out6,$x10,$out
1157         stvx_u          $out7,$x20,$out
1158         addi            $out,$out,0x30
1159         b               Lcbc_dec8x_done
1160
1161 .align  5
1162 Lcbc_dec8x_two:
1163         vncipherlast    $out6,$out6,$ivec
1164         vncipherlast    $out7,$out7,$in6
1165         vmr             $ivec,$in7
1166
1167         le?vperm        $out6,$out6,$out6,$inpperm
1168         le?vperm        $out7,$out7,$out7,$inpperm
1169         stvx_u          $out6,$x00,$out
1170         stvx_u          $out7,$x10,$out
1171         addi            $out,$out,0x20
1172         b               Lcbc_dec8x_done
1173
1174 .align  5
1175 Lcbc_dec8x_one:
1176         vncipherlast    $out7,$out7,$ivec
1177         vmr             $ivec,$in7
1178
1179         le?vperm        $out7,$out7,$out7,$inpperm
1180         stvx_u          $out7,0,$out
1181         addi            $out,$out,0x10
1182
1183 Lcbc_dec8x_done:
1184         le?vperm        $ivec,$ivec,$ivec,$inpperm
1185         stvx_u          $ivec,0,$ivp            # write [unaligned] iv
1186
1187         li              r10,`$FRAME+15`
1188         li              r11,`$FRAME+31`
1189         stvx            $inpperm,r10,$sp        # wipe copies of round keys
1190         addi            r10,r10,32
1191         stvx            $inpperm,r11,$sp
1192         addi            r11,r11,32
1193         stvx            $inpperm,r10,$sp
1194         addi            r10,r10,32
1195         stvx            $inpperm,r11,$sp
1196         addi            r11,r11,32
1197         stvx            $inpperm,r10,$sp
1198         addi            r10,r10,32
1199         stvx            $inpperm,r11,$sp
1200         addi            r11,r11,32
1201         stvx            $inpperm,r10,$sp
1202         addi            r10,r10,32
1203         stvx            $inpperm,r11,$sp
1204         addi            r11,r11,32
1205
1206         mtspr           256,$vrsave
1207         lvx             v20,r10,$sp             # ABI says so
1208         addi            r10,r10,32
1209         lvx             v21,r11,$sp
1210         addi            r11,r11,32
1211         lvx             v22,r10,$sp
1212         addi            r10,r10,32
1213         lvx             v23,r11,$sp
1214         addi            r11,r11,32
1215         lvx             v24,r10,$sp
1216         addi            r10,r10,32
1217         lvx             v25,r11,$sp
1218         addi            r11,r11,32
1219         lvx             v26,r10,$sp
1220         addi            r10,r10,32
1221         lvx             v27,r11,$sp
1222         addi            r11,r11,32
1223         lvx             v28,r10,$sp
1224         addi            r10,r10,32
1225         lvx             v29,r11,$sp
1226         addi            r11,r11,32
1227         lvx             v30,r10,$sp
1228         lvx             v31,r11,$sp
1229         $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1230         $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1231         $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1232         $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1233         $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1234         $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1235         addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1236         blr
1237         .long           0
1238         .byte           0,12,0x04,0,0x80,6,6,0
1239         .long           0
1240 .size   .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1241 ___
1242 }}      }}}
1243
1244 #########################################################################
1245 {{{     # CTR procedure[s]                                              #
1246 my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1247 my ($rndkey0,$rndkey1,$inout,$tmp)=             map("v$_",(0..3));
1248 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1249                                                 map("v$_",(4..11));
1250 my $dat=$tmp;
1251
1252 $code.=<<___;
1253 .globl  .${prefix}_ctr32_encrypt_blocks
1254 .align  5
1255 .${prefix}_ctr32_encrypt_blocks:
1256         ${UCMP}i        $len,1
1257         bltlr-
1258
1259         lis             r0,0xfff0
1260         mfspr           $vrsave,256
1261         mtspr           256,r0
1262
1263         li              $idx,15
1264         vxor            $rndkey0,$rndkey0,$rndkey0
1265         le?vspltisb     $tmp,0x0f
1266
1267         lvx             $ivec,0,$ivp            # load [unaligned] iv
1268         lvsl            $inpperm,0,$ivp
1269         lvx             $inptail,$idx,$ivp
1270          vspltisb       $one,1
1271         le?vxor         $inpperm,$inpperm,$tmp
1272         vperm           $ivec,$ivec,$inptail,$inpperm
1273          vsldoi         $one,$rndkey0,$one,1
1274
1275         neg             r11,$inp
1276         ?lvsl           $keyperm,0,$key         # prepare for unaligned key
1277         lwz             $rounds,240($key)
1278
1279         lvsr            $inpperm,0,r11          # prepare for unaligned load
1280         lvx             $inptail,0,$inp
1281         addi            $inp,$inp,15            # 15 is not typo
1282         le?vxor         $inpperm,$inpperm,$tmp
1283
1284         srwi            $rounds,$rounds,1
1285         li              $idx,16
1286         subi            $rounds,$rounds,1
1287
1288         ${UCMP}i        $len,8
1289         bge             _aesp8_ctr32_encrypt8x
1290
1291         ?lvsr           $outperm,0,$out         # prepare for unaligned store
1292         vspltisb        $outmask,-1
1293         lvx             $outhead,0,$out
1294         ?vperm          $outmask,$rndkey0,$outmask,$outperm
1295         le?vxor         $outperm,$outperm,$tmp
1296
1297         lvx             $rndkey0,0,$key
1298         mtctr           $rounds
1299         lvx             $rndkey1,$idx,$key
1300         addi            $idx,$idx,16
1301         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
1302         vxor            $inout,$ivec,$rndkey0
1303         lvx             $rndkey0,$idx,$key
1304         addi            $idx,$idx,16
1305         b               Loop_ctr32_enc
1306
1307 .align  5
1308 Loop_ctr32_enc:
1309         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
1310         vcipher         $inout,$inout,$rndkey1
1311         lvx             $rndkey1,$idx,$key
1312         addi            $idx,$idx,16
1313         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
1314         vcipher         $inout,$inout,$rndkey0
1315         lvx             $rndkey0,$idx,$key
1316         addi            $idx,$idx,16
1317         bdnz            Loop_ctr32_enc
1318
1319         vadduwm         $ivec,$ivec,$one
1320          vmr            $dat,$inptail
1321          lvx            $inptail,0,$inp
1322          addi           $inp,$inp,16
1323          subic.         $len,$len,1             # blocks--
1324
1325         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
1326         vcipher         $inout,$inout,$rndkey1
1327         lvx             $rndkey1,$idx,$key
1328          vperm          $dat,$dat,$inptail,$inpperm
1329          li             $idx,16
1330         ?vperm          $rndkey1,$rndkey0,$rndkey1,$keyperm
1331          lvx            $rndkey0,0,$key
1332         vxor            $dat,$dat,$rndkey1      # last round key
1333         vcipherlast     $inout,$inout,$dat
1334
1335          lvx            $rndkey1,$idx,$key
1336          addi           $idx,$idx,16
1337         vperm           $inout,$inout,$inout,$outperm
1338         vsel            $dat,$outhead,$inout,$outmask
1339          mtctr          $rounds
1340          ?vperm         $rndkey0,$rndkey0,$rndkey1,$keyperm
1341         vmr             $outhead,$inout
1342          vxor           $inout,$ivec,$rndkey0
1343          lvx            $rndkey0,$idx,$key
1344          addi           $idx,$idx,16
1345         stvx            $dat,0,$out
1346         addi            $out,$out,16
1347         bne             Loop_ctr32_enc
1348
1349         addi            $out,$out,-1
1350         lvx             $inout,0,$out           # redundant in aligned case
1351         vsel            $inout,$outhead,$inout,$outmask
1352         stvx            $inout,0,$out
1353
1354         mtspr           256,$vrsave
1355         blr
1356         .long           0
1357         .byte           0,12,0x14,0,0,0,6,0
1358         .long           0
1359 ___
1360 #########################################################################
1361 {{      # Optimized CTR procedure                                       #
1362 my $key_="r11";
1363 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1364     $x00=0 if ($flavour =~ /osx/);
1365 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1366 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1367 my $rndkey0="v23";      # v24-v25 rotating buffer for first found keys
1368                         # v26-v31 last 6 round keys
1369 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1370 my ($two,$three,$four)=($outhead,$outperm,$outmask);
1371
1372 $code.=<<___;
1373 .align  5
1374 _aesp8_ctr32_encrypt8x:
1375         $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1376         li              r10,`$FRAME+8*16+15`
1377         li              r11,`$FRAME+8*16+31`
1378         stvx            v20,r10,$sp             # ABI says so
1379         addi            r10,r10,32
1380         stvx            v21,r11,$sp
1381         addi            r11,r11,32
1382         stvx            v22,r10,$sp
1383         addi            r10,r10,32
1384         stvx            v23,r11,$sp
1385         addi            r11,r11,32
1386         stvx            v24,r10,$sp
1387         addi            r10,r10,32
1388         stvx            v25,r11,$sp
1389         addi            r11,r11,32
1390         stvx            v26,r10,$sp
1391         addi            r10,r10,32
1392         stvx            v27,r11,$sp
1393         addi            r11,r11,32
1394         stvx            v28,r10,$sp
1395         addi            r10,r10,32
1396         stvx            v29,r11,$sp
1397         addi            r11,r11,32
1398         stvx            v30,r10,$sp
1399         stvx            v31,r11,$sp
1400         li              r0,-1
1401         stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
1402         li              $x10,0x10
1403         $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1404         li              $x20,0x20
1405         $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1406         li              $x30,0x30
1407         $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1408         li              $x40,0x40
1409         $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1410         li              $x50,0x50
1411         $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1412         li              $x60,0x60
1413         $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1414         li              $x70,0x70
1415         mtspr           256,r0
1416
1417         subi            $rounds,$rounds,3       # -4 in total
1418
1419         lvx             $rndkey0,$x00,$key      # load key schedule
1420         lvx             v30,$x10,$key
1421         addi            $key,$key,0x20
1422         lvx             v31,$x00,$key
1423         ?vperm          $rndkey0,$rndkey0,v30,$keyperm
1424         addi            $key_,$sp,$FRAME+15
1425         mtctr           $rounds
1426
1427 Load_ctr32_enc_key:
1428         ?vperm          v24,v30,v31,$keyperm
1429         lvx             v30,$x10,$key
1430         addi            $key,$key,0x20
1431         stvx            v24,$x00,$key_          # off-load round[1]
1432         ?vperm          v25,v31,v30,$keyperm
1433         lvx             v31,$x00,$key
1434         stvx            v25,$x10,$key_          # off-load round[2]
1435         addi            $key_,$key_,0x20
1436         bdnz            Load_ctr32_enc_key
1437
1438         lvx             v26,$x10,$key
1439         ?vperm          v24,v30,v31,$keyperm
1440         lvx             v27,$x20,$key
1441         stvx            v24,$x00,$key_          # off-load round[3]
1442         ?vperm          v25,v31,v26,$keyperm
1443         lvx             v28,$x30,$key
1444         stvx            v25,$x10,$key_          # off-load round[4]
1445         addi            $key_,$sp,$FRAME+15     # rewind $key_
1446         ?vperm          v26,v26,v27,$keyperm
1447         lvx             v29,$x40,$key
1448         ?vperm          v27,v27,v28,$keyperm
1449         lvx             v30,$x50,$key
1450         ?vperm          v28,v28,v29,$keyperm
1451         lvx             v31,$x60,$key
1452         ?vperm          v29,v29,v30,$keyperm
1453         lvx             $out0,$x70,$key         # borrow $out0
1454         ?vperm          v30,v30,v31,$keyperm
1455         lvx             v24,$x00,$key_          # pre-load round[1]
1456         ?vperm          v31,v31,$out0,$keyperm
1457         lvx             v25,$x10,$key_          # pre-load round[2]
1458
1459         vadduwm         $two,$one,$one
1460         subi            $inp,$inp,15            # undo "caller"
1461         $SHL            $len,$len,4
1462
1463         vadduwm         $out1,$ivec,$one        # counter values ...
1464         vadduwm         $out2,$ivec,$two
1465         vxor            $out0,$ivec,$rndkey0    # ... xored with rndkey[0]
1466          le?li          $idx,8
1467         vadduwm         $out3,$out1,$two
1468         vxor            $out1,$out1,$rndkey0
1469          le?lvsl        $inpperm,0,$idx
1470         vadduwm         $out4,$out2,$two
1471         vxor            $out2,$out2,$rndkey0
1472          le?vspltisb    $tmp,0x0f
1473         vadduwm         $out5,$out3,$two
1474         vxor            $out3,$out3,$rndkey0
1475          le?vxor        $inpperm,$inpperm,$tmp  # transform for lvx_u/stvx_u
1476         vadduwm         $out6,$out4,$two
1477         vxor            $out4,$out4,$rndkey0
1478         vadduwm         $out7,$out5,$two
1479         vxor            $out5,$out5,$rndkey0
1480         vadduwm         $ivec,$out6,$two        # next counter value
1481         vxor            $out6,$out6,$rndkey0
1482         vxor            $out7,$out7,$rndkey0
1483
1484         mtctr           $rounds
1485         b               Loop_ctr32_enc8x
1486 .align  5
1487 Loop_ctr32_enc8x:
1488         vcipher         $out0,$out0,v24
1489         vcipher         $out1,$out1,v24
1490         vcipher         $out2,$out2,v24
1491         vcipher         $out3,$out3,v24
1492         vcipher         $out4,$out4,v24
1493         vcipher         $out5,$out5,v24
1494         vcipher         $out6,$out6,v24
1495         vcipher         $out7,$out7,v24
1496 Loop_ctr32_enc8x_middle:
1497         lvx             v24,$x20,$key_          # round[3]
1498         addi            $key_,$key_,0x20
1499
1500         vcipher         $out0,$out0,v25
1501         vcipher         $out1,$out1,v25
1502         vcipher         $out2,$out2,v25
1503         vcipher         $out3,$out3,v25
1504         vcipher         $out4,$out4,v25
1505         vcipher         $out5,$out5,v25
1506         vcipher         $out6,$out6,v25
1507         vcipher         $out7,$out7,v25
1508         lvx             v25,$x10,$key_          # round[4]
1509         bdnz            Loop_ctr32_enc8x
1510
1511         subic           r11,$len,256            # $len-256, borrow $key_
1512         vcipher         $out0,$out0,v24
1513         vcipher         $out1,$out1,v24
1514         vcipher         $out2,$out2,v24
1515         vcipher         $out3,$out3,v24
1516         vcipher         $out4,$out4,v24
1517         vcipher         $out5,$out5,v24
1518         vcipher         $out6,$out6,v24
1519         vcipher         $out7,$out7,v24
1520
1521         subfe           r0,r0,r0                # borrow?-1:0
1522         vcipher         $out0,$out0,v25
1523         vcipher         $out1,$out1,v25
1524         vcipher         $out2,$out2,v25
1525         vcipher         $out3,$out3,v25
1526         vcipher         $out4,$out4,v25
1527         vcipher         $out5,$out5,v25
1528         vcipher         $out6,$out6,v25
1529         vcipher         $out7,$out7,v25
1530
1531         and             r0,r0,r11
1532         addi            $key_,$sp,$FRAME+15     # rewind $key_
1533         vcipher         $out0,$out0,v26
1534         vcipher         $out1,$out1,v26
1535         vcipher         $out2,$out2,v26
1536         vcipher         $out3,$out3,v26
1537         vcipher         $out4,$out4,v26
1538         vcipher         $out5,$out5,v26
1539         vcipher         $out6,$out6,v26
1540         vcipher         $out7,$out7,v26
1541         lvx             v24,$x00,$key_          # re-pre-load round[1]
1542
1543         subic           $len,$len,129           # $len-=129
1544         vcipher         $out0,$out0,v27
1545         addi            $len,$len,1             # $len-=128 really
1546         vcipher         $out1,$out1,v27
1547         vcipher         $out2,$out2,v27
1548         vcipher         $out3,$out3,v27
1549         vcipher         $out4,$out4,v27
1550         vcipher         $out5,$out5,v27
1551         vcipher         $out6,$out6,v27
1552         vcipher         $out7,$out7,v27
1553         lvx             v25,$x10,$key_          # re-pre-load round[2]
1554
1555         vcipher         $out0,$out0,v28
1556          lvx_u          $in0,$x00,$inp          # load input
1557         vcipher         $out1,$out1,v28
1558          lvx_u          $in1,$x10,$inp
1559         vcipher         $out2,$out2,v28
1560          lvx_u          $in2,$x20,$inp
1561         vcipher         $out3,$out3,v28
1562          lvx_u          $in3,$x30,$inp
1563         vcipher         $out4,$out4,v28
1564          lvx_u          $in4,$x40,$inp
1565         vcipher         $out5,$out5,v28
1566          lvx_u          $in5,$x50,$inp
1567         vcipher         $out6,$out6,v28
1568          lvx_u          $in6,$x60,$inp
1569         vcipher         $out7,$out7,v28
1570          lvx_u          $in7,$x70,$inp
1571          addi           $inp,$inp,0x80
1572
1573         vcipher         $out0,$out0,v29
1574          le?vperm       $in0,$in0,$in0,$inpperm
1575         vcipher         $out1,$out1,v29
1576          le?vperm       $in1,$in1,$in1,$inpperm
1577         vcipher         $out2,$out2,v29
1578          le?vperm       $in2,$in2,$in2,$inpperm
1579         vcipher         $out3,$out3,v29
1580          le?vperm       $in3,$in3,$in3,$inpperm
1581         vcipher         $out4,$out4,v29
1582          le?vperm       $in4,$in4,$in4,$inpperm
1583         vcipher         $out5,$out5,v29
1584          le?vperm       $in5,$in5,$in5,$inpperm
1585         vcipher         $out6,$out6,v29
1586          le?vperm       $in6,$in6,$in6,$inpperm
1587         vcipher         $out7,$out7,v29
1588          le?vperm       $in7,$in7,$in7,$inpperm
1589
1590         add             $inp,$inp,r0            # $inp is adjusted in such
1591                                                 # way that at exit from the
1592                                                 # loop inX-in7 are loaded
1593                                                 # with last "words"
1594         subfe.          r0,r0,r0                # borrow?-1:0
1595         vcipher         $out0,$out0,v30
1596          vxor           $in0,$in0,v31           # xor with last round key
1597         vcipher         $out1,$out1,v30
1598          vxor           $in1,$in1,v31
1599         vcipher         $out2,$out2,v30
1600          vxor           $in2,$in2,v31
1601         vcipher         $out3,$out3,v30
1602          vxor           $in3,$in3,v31
1603         vcipher         $out4,$out4,v30
1604          vxor           $in4,$in4,v31
1605         vcipher         $out5,$out5,v30
1606          vxor           $in5,$in5,v31
1607         vcipher         $out6,$out6,v30
1608          vxor           $in6,$in6,v31
1609         vcipher         $out7,$out7,v30
1610          vxor           $in7,$in7,v31
1611
1612         bne             Lctr32_enc8x_break      # did $len-129 borrow?
1613
1614         vcipherlast     $in0,$out0,$in0
1615         vcipherlast     $in1,$out1,$in1
1616          vadduwm        $out1,$ivec,$one        # counter values ...
1617         vcipherlast     $in2,$out2,$in2
1618          vadduwm        $out2,$ivec,$two
1619          vxor           $out0,$ivec,$rndkey0    # ... xored with rndkey[0]
1620         vcipherlast     $in3,$out3,$in3
1621          vadduwm        $out3,$out1,$two
1622          vxor           $out1,$out1,$rndkey0
1623         vcipherlast     $in4,$out4,$in4
1624          vadduwm        $out4,$out2,$two
1625          vxor           $out2,$out2,$rndkey0
1626         vcipherlast     $in5,$out5,$in5
1627          vadduwm        $out5,$out3,$two
1628          vxor           $out3,$out3,$rndkey0
1629         vcipherlast     $in6,$out6,$in6
1630          vadduwm        $out6,$out4,$two
1631          vxor           $out4,$out4,$rndkey0
1632         vcipherlast     $in7,$out7,$in7
1633          vadduwm        $out7,$out5,$two
1634          vxor           $out5,$out5,$rndkey0
1635         le?vperm        $in0,$in0,$in0,$inpperm
1636          vadduwm        $ivec,$out6,$two        # next counter value
1637          vxor           $out6,$out6,$rndkey0
1638         le?vperm        $in1,$in1,$in1,$inpperm
1639          vxor           $out7,$out7,$rndkey0
1640         mtctr           $rounds
1641
1642          vcipher        $out0,$out0,v24
1643         stvx_u          $in0,$x00,$out
1644         le?vperm        $in2,$in2,$in2,$inpperm
1645          vcipher        $out1,$out1,v24
1646         stvx_u          $in1,$x10,$out
1647         le?vperm        $in3,$in3,$in3,$inpperm
1648          vcipher        $out2,$out2,v24
1649         stvx_u          $in2,$x20,$out
1650         le?vperm        $in4,$in4,$in4,$inpperm
1651          vcipher        $out3,$out3,v24
1652         stvx_u          $in3,$x30,$out
1653         le?vperm        $in5,$in5,$in5,$inpperm
1654          vcipher        $out4,$out4,v24
1655         stvx_u          $in4,$x40,$out
1656         le?vperm        $in6,$in6,$in6,$inpperm
1657          vcipher        $out5,$out5,v24
1658         stvx_u          $in5,$x50,$out
1659         le?vperm        $in7,$in7,$in7,$inpperm
1660          vcipher        $out6,$out6,v24
1661         stvx_u          $in6,$x60,$out
1662          vcipher        $out7,$out7,v24
1663         stvx_u          $in7,$x70,$out
1664         addi            $out,$out,0x80
1665
1666         b               Loop_ctr32_enc8x_middle
1667
1668 .align  5
1669 Lctr32_enc8x_break:
1670         cmpwi           $len,-0x60
1671         blt             Lctr32_enc8x_one
1672         nop
1673         beq             Lctr32_enc8x_two
1674         cmpwi           $len,-0x40
1675         blt             Lctr32_enc8x_three
1676         nop
1677         beq             Lctr32_enc8x_four
1678         cmpwi           $len,-0x20
1679         blt             Lctr32_enc8x_five
1680         nop
1681         beq             Lctr32_enc8x_six
1682         cmpwi           $len,0x00
1683         blt             Lctr32_enc8x_seven
1684
1685 Lctr32_enc8x_eight:
1686         vcipherlast     $out0,$out0,$in0
1687         vcipherlast     $out1,$out1,$in1
1688         vcipherlast     $out2,$out2,$in2
1689         vcipherlast     $out3,$out3,$in3
1690         vcipherlast     $out4,$out4,$in4
1691         vcipherlast     $out5,$out5,$in5
1692         vcipherlast     $out6,$out6,$in6
1693         vcipherlast     $out7,$out7,$in7
1694
1695         le?vperm        $out0,$out0,$out0,$inpperm
1696         le?vperm        $out1,$out1,$out1,$inpperm
1697         stvx_u          $out0,$x00,$out
1698         le?vperm        $out2,$out2,$out2,$inpperm
1699         stvx_u          $out1,$x10,$out
1700         le?vperm        $out3,$out3,$out3,$inpperm
1701         stvx_u          $out2,$x20,$out
1702         le?vperm        $out4,$out4,$out4,$inpperm
1703         stvx_u          $out3,$x30,$out
1704         le?vperm        $out5,$out5,$out5,$inpperm
1705         stvx_u          $out4,$x40,$out
1706         le?vperm        $out6,$out6,$out6,$inpperm
1707         stvx_u          $out5,$x50,$out
1708         le?vperm        $out7,$out7,$out7,$inpperm
1709         stvx_u          $out6,$x60,$out
1710         stvx_u          $out7,$x70,$out
1711         addi            $out,$out,0x80
1712         b               Lctr32_enc8x_done
1713
1714 .align  5
1715 Lctr32_enc8x_seven:
1716         vcipherlast     $out0,$out0,$in1
1717         vcipherlast     $out1,$out1,$in2
1718         vcipherlast     $out2,$out2,$in3
1719         vcipherlast     $out3,$out3,$in4
1720         vcipherlast     $out4,$out4,$in5
1721         vcipherlast     $out5,$out5,$in6
1722         vcipherlast     $out6,$out6,$in7
1723
1724         le?vperm        $out0,$out0,$out0,$inpperm
1725         le?vperm        $out1,$out1,$out1,$inpperm
1726         stvx_u          $out0,$x00,$out
1727         le?vperm        $out2,$out2,$out2,$inpperm
1728         stvx_u          $out1,$x10,$out
1729         le?vperm        $out3,$out3,$out3,$inpperm
1730         stvx_u          $out2,$x20,$out
1731         le?vperm        $out4,$out4,$out4,$inpperm
1732         stvx_u          $out3,$x30,$out
1733         le?vperm        $out5,$out5,$out5,$inpperm
1734         stvx_u          $out4,$x40,$out
1735         le?vperm        $out6,$out6,$out6,$inpperm
1736         stvx_u          $out5,$x50,$out
1737         stvx_u          $out6,$x60,$out
1738         addi            $out,$out,0x70
1739         b               Lctr32_enc8x_done
1740
1741 .align  5
1742 Lctr32_enc8x_six:
1743         vcipherlast     $out0,$out0,$in2
1744         vcipherlast     $out1,$out1,$in3
1745         vcipherlast     $out2,$out2,$in4
1746         vcipherlast     $out3,$out3,$in5
1747         vcipherlast     $out4,$out4,$in6
1748         vcipherlast     $out5,$out5,$in7
1749
1750         le?vperm        $out0,$out0,$out0,$inpperm
1751         le?vperm        $out1,$out1,$out1,$inpperm
1752         stvx_u          $out0,$x00,$out
1753         le?vperm        $out2,$out2,$out2,$inpperm
1754         stvx_u          $out1,$x10,$out
1755         le?vperm        $out3,$out3,$out3,$inpperm
1756         stvx_u          $out2,$x20,$out
1757         le?vperm        $out4,$out4,$out4,$inpperm
1758         stvx_u          $out3,$x30,$out
1759         le?vperm        $out5,$out5,$out5,$inpperm
1760         stvx_u          $out4,$x40,$out
1761         stvx_u          $out5,$x50,$out
1762         addi            $out,$out,0x60
1763         b               Lctr32_enc8x_done
1764
1765 .align  5
1766 Lctr32_enc8x_five:
1767         vcipherlast     $out0,$out0,$in3
1768         vcipherlast     $out1,$out1,$in4
1769         vcipherlast     $out2,$out2,$in5
1770         vcipherlast     $out3,$out3,$in6
1771         vcipherlast     $out4,$out4,$in7
1772
1773         le?vperm        $out0,$out0,$out0,$inpperm
1774         le?vperm        $out1,$out1,$out1,$inpperm
1775         stvx_u          $out0,$x00,$out
1776         le?vperm        $out2,$out2,$out2,$inpperm
1777         stvx_u          $out1,$x10,$out
1778         le?vperm        $out3,$out3,$out3,$inpperm
1779         stvx_u          $out2,$x20,$out
1780         le?vperm        $out4,$out4,$out4,$inpperm
1781         stvx_u          $out3,$x30,$out
1782         stvx_u          $out4,$x40,$out
1783         addi            $out,$out,0x50
1784         b               Lctr32_enc8x_done
1785
1786 .align  5
1787 Lctr32_enc8x_four:
1788         vcipherlast     $out0,$out0,$in4
1789         vcipherlast     $out1,$out1,$in5
1790         vcipherlast     $out2,$out2,$in6
1791         vcipherlast     $out3,$out3,$in7
1792
1793         le?vperm        $out0,$out0,$out0,$inpperm
1794         le?vperm        $out1,$out1,$out1,$inpperm
1795         stvx_u          $out0,$x00,$out
1796         le?vperm        $out2,$out2,$out2,$inpperm
1797         stvx_u          $out1,$x10,$out
1798         le?vperm        $out3,$out3,$out3,$inpperm
1799         stvx_u          $out2,$x20,$out
1800         stvx_u          $out3,$x30,$out
1801         addi            $out,$out,0x40
1802         b               Lctr32_enc8x_done
1803
1804 .align  5
1805 Lctr32_enc8x_three:
1806         vcipherlast     $out0,$out0,$in5
1807         vcipherlast     $out1,$out1,$in6
1808         vcipherlast     $out2,$out2,$in7
1809
1810         le?vperm        $out0,$out0,$out0,$inpperm
1811         le?vperm        $out1,$out1,$out1,$inpperm
1812         stvx_u          $out0,$x00,$out
1813         le?vperm        $out2,$out2,$out2,$inpperm
1814         stvx_u          $out1,$x10,$out
1815         stvx_u          $out2,$x20,$out
1816         addi            $out,$out,0x30
1817         b               Lcbc_dec8x_done
1818
1819 .align  5
1820 Lctr32_enc8x_two:
1821         vcipherlast     $out0,$out0,$in6
1822         vcipherlast     $out1,$out1,$in7
1823
1824         le?vperm        $out0,$out0,$out0,$inpperm
1825         le?vperm        $out1,$out1,$out1,$inpperm
1826         stvx_u          $out0,$x00,$out
1827         stvx_u          $out1,$x10,$out
1828         addi            $out,$out,0x20
1829         b               Lcbc_dec8x_done
1830
1831 .align  5
1832 Lctr32_enc8x_one:
1833         vcipherlast     $out0,$out0,$in7
1834
1835         le?vperm        $out0,$out0,$out0,$inpperm
1836         stvx_u          $out0,0,$out
1837         addi            $out,$out,0x10
1838
1839 Lctr32_enc8x_done:
1840         li              r10,`$FRAME+15`
1841         li              r11,`$FRAME+31`
1842         stvx            $inpperm,r10,$sp        # wipe copies of round keys
1843         addi            r10,r10,32
1844         stvx            $inpperm,r11,$sp
1845         addi            r11,r11,32
1846         stvx            $inpperm,r10,$sp
1847         addi            r10,r10,32
1848         stvx            $inpperm,r11,$sp
1849         addi            r11,r11,32
1850         stvx            $inpperm,r10,$sp
1851         addi            r10,r10,32
1852         stvx            $inpperm,r11,$sp
1853         addi            r11,r11,32
1854         stvx            $inpperm,r10,$sp
1855         addi            r10,r10,32
1856         stvx            $inpperm,r11,$sp
1857         addi            r11,r11,32
1858
1859         mtspr           256,$vrsave
1860         lvx             v20,r10,$sp             # ABI says so
1861         addi            r10,r10,32
1862         lvx             v21,r11,$sp
1863         addi            r11,r11,32
1864         lvx             v22,r10,$sp
1865         addi            r10,r10,32
1866         lvx             v23,r11,$sp
1867         addi            r11,r11,32
1868         lvx             v24,r10,$sp
1869         addi            r10,r10,32
1870         lvx             v25,r11,$sp
1871         addi            r11,r11,32
1872         lvx             v26,r10,$sp
1873         addi            r10,r10,32
1874         lvx             v27,r11,$sp
1875         addi            r11,r11,32
1876         lvx             v28,r10,$sp
1877         addi            r10,r10,32
1878         lvx             v29,r11,$sp
1879         addi            r11,r11,32
1880         lvx             v30,r10,$sp
1881         lvx             v31,r11,$sp
1882         $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1883         $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1884         $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1885         $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1886         $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1887         $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1888         addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1889         blr
1890         .long           0
1891         .byte           0,12,0x04,0,0x80,6,6,0
1892         .long           0
1893 .size   .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
1894 ___
1895 }}      }}}
1896
1897 my $consts=1;
1898 foreach(split("\n",$code)) {
1899         s/\`([^\`]*)\`/eval($1)/geo;
1900
1901         # constants table endian-specific conversion
1902         if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
1903             my $conv=$3;
1904             my @bytes=();
1905
1906             # convert to endian-agnostic format
1907             if ($1 eq "long") {
1908               foreach (split(/,\s*/,$2)) {
1909                 my $l = /^0/?oct:int;
1910                 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
1911               }
1912             } else {
1913                 @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
1914             }
1915
1916             # little-endian conversion
1917             if ($flavour =~ /le$/o) {
1918                 SWITCH: for($conv)  {
1919                     /\?inv/ && do   { @bytes=map($_^0xf,@bytes); last; };
1920                     /\?rev/ && do   { @bytes=reverse(@bytes);    last; }; 
1921                 }
1922             }
1923
1924             #emit
1925             print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
1926             next;
1927         }
1928         $consts=0 if (m/Lconsts:/o);    # end of table
1929
1930         # instructions prefixed with '?' are endian-specific and need
1931         # to be adjusted accordingly...
1932         if ($flavour =~ /le$/o) {       # little-endian
1933             s/le\?//o           or
1934             s/be\?/#be#/o       or
1935             s/\?lvsr/lvsl/o     or
1936             s/\?lvsl/lvsr/o     or
1937             s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
1938             s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
1939             s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
1940         } else {                        # big-endian
1941             s/le\?/#le#/o       or
1942             s/be\?//o           or
1943             s/\?([a-z]+)/$1/o;
1944         }
1945
1946         print $_,"\n";
1947 }
1948
1949 close STDOUT;