b19fd6a1beea4cae5b13fe5324deff55ac7da586
[openssl.git] / crypto / aes / asm / aesp8-ppc.pl
1 #!/usr/bin/env perl
2 #
3 # ====================================================================
4 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9 #
10 # This module implements support for AES instructions as per PowerISA
11 # specification version 2.07, first implemented by POWER8 processor.
12 # The module is endian-agnostic in sense that it supports both big-
13 # and little-endian cases. As well as alignment-agnostic, and it is
14 # guaranteed not to cause alignment exceptions. [One of options was
15 # to use VSX loads and stores, which tolerate unaligned references,
16 # but even then specification doesn't prohibit exceptions on page
17 # boundaries.]
18
19 $flavour = shift;
20
21 if ($flavour =~ /64/) {
22         $SIZE_T =8;
23         $LRSAVE =2*$SIZE_T;
24         $STU    ="stdu";
25         $POP    ="ld";
26         $PUSH   ="std";
27         $UCMP   ="cmpld";
28 } elsif ($flavour =~ /32/) {
29         $SIZE_T =4;
30         $LRSAVE =$SIZE_T;
31         $STU    ="stwu";
32         $POP    ="lwz";
33         $PUSH   ="stw";
34         $UCMP   ="cmplw";
35 } else { die "nonsense $flavour"; }
36
37 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
38
39 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
40 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
41 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
42 die "can't locate ppc-xlate.pl";
43
44 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
45
46 $FRAME=8*$SIZE_T;
47 $prefix="AES";
48
49 $sp="r1";
50 $vrsave="r12";
51
52 {{{
53 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
54 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
55 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
56
57 $code.=<<___;
58 .machine        "any"
59
60 .text
61
62 .align  7
63 rcon:
64 .long   0x01000000, 0x01000000, 0x01000000, 0x01000000  ?rev
65 .long   0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000  ?rev
66 .long   0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c  ?rev
67 .long   0,0,0,0                                         ?asis
68 Lconsts:
69         mflr    r0
70         bcl     20,31,\$+4
71         mflr    $ptr     #vvvvv "distance between . and rcon
72         addi    $ptr,$ptr,-0x48
73         mtlr    r0
74         blr
75         .long   0
76         .byte   0,12,0x14,0,0,0,0,0
77 .asciz  "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
78
79 .globl  .${prefix}_set_encrypt_key
80 .align  5
81 .${prefix}_set_encrypt_key:
82 Lset_encrypt_key:
83         mflr            r11
84         lis             r0,0xfff0
85         $PUSH           r11,$LRSAVE($sp)
86         mfspr           $vrsave,256
87         mtspr           256,r0
88
89         bl              Lconsts
90         mtlr            r11
91
92         neg             r9,$inp
93         lvx             $in0,0,$inp
94         addi            $inp,$inp,15            # 15 is not typo
95         lvsr            $key,0,r9               # borrow $key
96         li              r8,0x20
97         cmpwi           $bits,192
98         lvx             $in1,0,$inp
99 ___
100 $code.=<<___            if ($LITTLE_ENDIAN);
101         vspltisb        $mask,0x0f              # borrow $mask
102         vxor            $key,$key,$mask         # adjust for byte swap
103 ___
104 $code.=<<___;
105         lvx             $rcon,0,$ptr
106         lvx             $mask,r8,$ptr
107         addi            $ptr,$ptr,0x10
108         vperm           $in0,$in0,$in1,$key     # align [and byte swap in LE]
109         li              $cnt,8
110         vxor            $zero,$zero,$zero
111         mtctr           $cnt
112
113         ?lvsr           $outperm,0,$out
114         vspltisb        $outmask,-1
115         lvx             $outhead,0,$out
116         ?vperm          $outmask,$zero,$outmask,$outperm
117
118         blt             Loop128
119         addi            $inp,$inp,8
120         beq             L192
121         addi            $inp,$inp,8
122         b               L256
123
124 .align  4
125 Loop128:
126         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
127         vsldoi          $tmp,$zero,$in0,12      # >>32
128          vperm          $outtail,$in0,$in0,$outperm     # rotate
129          vsel           $stage,$outhead,$outtail,$outmask
130          vmr            $outhead,$outtail
131         vcipherlast     $key,$key,$rcon
132          stvx           $stage,0,$out
133          addi           $out,$out,16
134
135         vxor            $in0,$in0,$tmp
136         vsldoi          $tmp,$zero,$tmp,12      # >>32
137         vxor            $in0,$in0,$tmp
138         vsldoi          $tmp,$zero,$tmp,12      # >>32
139         vxor            $in0,$in0,$tmp
140          vadduwm        $rcon,$rcon,$rcon
141         vxor            $in0,$in0,$key
142         bdnz            Loop128
143
144         lvx             $rcon,0,$ptr            # last two round keys
145
146         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
147         vsldoi          $tmp,$zero,$in0,12      # >>32
148          vperm          $outtail,$in0,$in0,$outperm     # rotate
149          vsel           $stage,$outhead,$outtail,$outmask
150          vmr            $outhead,$outtail
151         vcipherlast     $key,$key,$rcon
152          stvx           $stage,0,$out
153          addi           $out,$out,16
154
155         vxor            $in0,$in0,$tmp
156         vsldoi          $tmp,$zero,$tmp,12      # >>32
157         vxor            $in0,$in0,$tmp
158         vsldoi          $tmp,$zero,$tmp,12      # >>32
159         vxor            $in0,$in0,$tmp
160          vadduwm        $rcon,$rcon,$rcon
161         vxor            $in0,$in0,$key
162
163         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
164         vsldoi          $tmp,$zero,$in0,12      # >>32
165          vperm          $outtail,$in0,$in0,$outperm     # rotate
166          vsel           $stage,$outhead,$outtail,$outmask
167          vmr            $outhead,$outtail
168         vcipherlast     $key,$key,$rcon
169          stvx           $stage,0,$out
170          addi           $out,$out,16
171
172         vxor            $in0,$in0,$tmp
173         vsldoi          $tmp,$zero,$tmp,12      # >>32
174         vxor            $in0,$in0,$tmp
175         vsldoi          $tmp,$zero,$tmp,12      # >>32
176         vxor            $in0,$in0,$tmp
177         vxor            $in0,$in0,$key
178          vperm          $outtail,$in0,$in0,$outperm     # rotate
179          vsel           $stage,$outhead,$outtail,$outmask
180          vmr            $outhead,$outtail
181          stvx           $stage,0,$out
182
183         addi            $inp,$out,15            # 15 is not typo
184         addi            $out,$out,0x50
185
186         li              $rounds,10
187         b               Ldone
188
189 .align  4
190 L192:
191         lvx             $tmp,0,$inp
192         li              $cnt,4
193          vperm          $outtail,$in0,$in0,$outperm     # rotate
194          vsel           $stage,$outhead,$outtail,$outmask
195          vmr            $outhead,$outtail
196          stvx           $stage,0,$out
197          addi           $out,$out,16
198         vperm           $in1,$in1,$tmp,$key     # align [and byte swap in LE]
199         vspltisb        $key,8                  # borrow $key
200         mtctr           $cnt
201         vsububm         $mask,$mask,$key        # adjust the mask
202
203 Loop192:
204         vperm           $key,$in1,$in1,$mask    # roate-n-splat
205         vsldoi          $tmp,$zero,$in0,12      # >>32
206         vcipherlast     $key,$key,$rcon
207
208         vxor            $in0,$in0,$tmp
209         vsldoi          $tmp,$zero,$tmp,12      # >>32
210         vxor            $in0,$in0,$tmp
211         vsldoi          $tmp,$zero,$tmp,12      # >>32
212         vxor            $in0,$in0,$tmp
213
214          vsldoi         $stage,$zero,$in1,8
215         vspltw          $tmp,$in0,3
216         vxor            $tmp,$tmp,$in1
217         vsldoi          $in1,$zero,$in1,12      # >>32
218          vadduwm        $rcon,$rcon,$rcon
219         vxor            $in1,$in1,$tmp
220         vxor            $in0,$in0,$key
221         vxor            $in1,$in1,$key
222          vsldoi         $stage,$stage,$in0,8
223
224         vperm           $key,$in1,$in1,$mask    # rotate-n-splat
225         vsldoi          $tmp,$zero,$in0,12      # >>32
226          vperm          $outtail,$stage,$stage,$outperm # rotate
227          vsel           $stage,$outhead,$outtail,$outmask
228          vmr            $outhead,$outtail
229         vcipherlast     $key,$key,$rcon
230          stvx           $stage,0,$out
231          addi           $out,$out,16
232
233          vsldoi         $stage,$in0,$in1,8
234         vxor            $in0,$in0,$tmp
235         vsldoi          $tmp,$zero,$tmp,12      # >>32
236          vperm          $outtail,$stage,$stage,$outperm # rotate
237          vsel           $stage,$outhead,$outtail,$outmask
238          vmr            $outhead,$outtail
239         vxor            $in0,$in0,$tmp
240         vsldoi          $tmp,$zero,$tmp,12      # >>32
241         vxor            $in0,$in0,$tmp
242          stvx           $stage,0,$out
243          addi           $out,$out,16
244
245         vspltw          $tmp,$in0,3
246         vxor            $tmp,$tmp,$in1
247         vsldoi          $in1,$zero,$in1,12      # >>32
248          vadduwm        $rcon,$rcon,$rcon
249         vxor            $in1,$in1,$tmp
250         vxor            $in0,$in0,$key
251         vxor            $in1,$in1,$key
252          vperm          $outtail,$in0,$in0,$outperm     # rotate
253          vsel           $stage,$outhead,$outtail,$outmask
254          vmr            $outhead,$outtail
255          stvx           $stage,0,$out
256          addi           $inp,$out,15            # 15 is not typo
257          addi           $out,$out,16
258         bdnz            Loop192
259
260         li              $rounds,12
261         addi            $out,$out,0x20
262         b               Ldone
263
264 .align  4
265 L256:
266         lvx             $tmp,0,$inp
267         li              $cnt,7
268         li              $rounds,14
269          vperm          $outtail,$in0,$in0,$outperm     # rotate
270          vsel           $stage,$outhead,$outtail,$outmask
271          vmr            $outhead,$outtail
272          stvx           $stage,0,$out
273          addi           $out,$out,16
274         vperm           $in1,$in1,$tmp,$key     # align [and byte swap in LE]
275         mtctr           $cnt
276
277 Loop256:
278         vperm           $key,$in1,$in1,$mask    # rotate-n-splat
279         vsldoi          $tmp,$zero,$in0,12      # >>32
280          vperm          $outtail,$in1,$in1,$outperm     # rotate
281          vsel           $stage,$outhead,$outtail,$outmask
282          vmr            $outhead,$outtail
283         vcipherlast     $key,$key,$rcon
284          stvx           $stage,0,$out
285          addi           $out,$out,16
286
287         vxor            $in0,$in0,$tmp
288         vsldoi          $tmp,$zero,$tmp,12      # >>32
289         vxor            $in0,$in0,$tmp
290         vsldoi          $tmp,$zero,$tmp,12      # >>32
291         vxor            $in0,$in0,$tmp
292          vadduwm        $rcon,$rcon,$rcon
293         vxor            $in0,$in0,$key
294          vperm          $outtail,$in0,$in0,$outperm     # rotate
295          vsel           $stage,$outhead,$outtail,$outmask
296          vmr            $outhead,$outtail
297          stvx           $stage,0,$out
298          addi           $inp,$out,15            # 15 is not typo
299          addi           $out,$out,16
300         bdz             Ldone
301
302         vspltw          $key,$in0,3             # just splat
303         vsldoi          $tmp,$zero,$in1,12      # >>32
304         vsbox           $key,$key
305
306         vxor            $in1,$in1,$tmp
307         vsldoi          $tmp,$zero,$tmp,12      # >>32
308         vxor            $in1,$in1,$tmp
309         vsldoi          $tmp,$zero,$tmp,12      # >>32
310         vxor            $in1,$in1,$tmp
311
312         vxor            $in1,$in1,$key
313         b               Loop256
314
315 .align  4
316 Ldone:
317         lvx             $in1,0,$inp             # redundant in aligned case
318         vsel            $in1,$outhead,$in1,$outmask
319         stvx            $in1,0,$inp
320         xor             r3,r3,r3                # return value
321         mtspr           256,$vrsave
322         stw             $rounds,0($out)
323
324         blr
325         .long           0
326         .byte           0,12,0x14,1,0,0,3,0
327 .size   .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
328
329 .globl  .${prefix}_set_decrypt_key
330 .align  5
331 .${prefix}_set_decrypt_key:
332         $STU            $sp,-$FRAME($sp)
333         mflr            r10
334         $PUSH           r10,$FRAME+$LRSAVE($sp)
335         bl              Lset_encrypt_key
336         mtlr            r10
337
338         slwi            $cnt,$rounds,4
339         subi            $inp,$out,240           # first round key
340         srwi            $rounds,$rounds,1
341         add             $out,$inp,$cnt          # last round key
342         mtctr           $rounds
343
344 Ldeckey:
345         lwz             r0, 0($inp)
346         lwz             r6, 4($inp)
347         lwz             r7, 8($inp)
348         lwz             r8, 12($inp)
349         addi            $inp,$inp,16
350         lwz             r9, 0($out)
351         lwz             r10,4($out)
352         lwz             r11,8($out)
353         lwz             r12,12($out)
354         stw             r0, 0($out)
355         stw             r6, 4($out)
356         stw             r7, 8($out)
357         stw             r8, 12($out)
358         subi            $out,$out,16
359         stw             r9, -16($inp)
360         stw             r10,-12($inp)
361         stw             r11,-8($inp)
362         stw             r12,-4($inp)
363         bdnz            Ldeckey
364
365         xor             r3,r3,r3                # return value
366         addi            $sp,$sp,$FRAME
367         blr
368         .long           0
369         .byte           0,12,4,1,0x80,0,3,0
370 .size   .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
371 ___
372 }}}
373 {{{
374 sub gen_block () {
375 my $dir = shift;
376 my $n   = $dir eq "de" ? "n" : "";
377 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
378
379 $code.=<<___;
380 .globl  .${prefix}_${dir}crypt
381 .align  5
382 .${prefix}_${dir}crypt:
383         lwz             $rounds,240($key)
384         lis             r0,0xfc00
385         mfspr           $vrsave,256
386         li              $idx,15                 # 15 is not typo
387         mtspr           256,r0
388
389         lvx             v0,0,$inp
390         neg             r11,$out
391         lvx             v1,$idx,$inp
392         lvsl            v2,0,$inp               # inpperm
393         `"vspltisb      v4,0x0f"                if ($LITTLE_ENDIAN)`
394         ?lvsl           v3,0,r11                # outperm
395         `"vxor          v2,v2,v4"               if ($LITTLE_ENDIAN)`
396         li              $idx,16
397         vperm           v0,v0,v1,v2             # align [and byte swap in LE]
398         lvx             v1,0,$key
399         ?lvsl           v5,0,$key               # keyperm
400         srwi            $rounds,$rounds,1
401         lvx             v2,$idx,$key
402         addi            $idx,$idx,16
403         subi            $rounds,$rounds,1
404         ?vperm          v1,v1,v2,v5             # align round key
405
406         vxor            v0,v0,v1
407         lvx             v1,$idx,$key
408         addi            $idx,$idx,16
409         mtctr           $rounds
410
411 Loop_${dir}c:
412         ?vperm          v2,v2,v1,v5
413         v${n}cipher     v0,v0,v2
414         lvx             v2,$idx,$key
415         addi            $idx,$idx,16
416         ?vperm          v1,v1,v2,v5
417         v${n}cipher     v0,v0,v1
418         lvx             v1,$idx,$key
419         addi            $idx,$idx,16
420         bdnz            Loop_${dir}c
421
422         ?vperm          v2,v2,v1,v5
423         v${n}cipher     v0,v0,v2
424         lvx             v2,$idx,$key
425         ?vperm          v1,v1,v2,v5
426         v${n}cipherlast v0,v0,v1
427
428         vspltisb        v2,-1
429         vxor            v1,v1,v1
430         li              $idx,15                 # 15 is not typo
431         ?vperm          v2,v1,v2,v3             # outmask
432         `"vxor          v3,v3,v4"               if ($LITTLE_ENDIAN)`
433         lvx             v1,0,$out               # outhead
434         vperm           v0,v0,v0,v3             # rotate [and byte swap in LE]
435         vsel            v1,v1,v0,v2
436         lvx             v4,$idx,$out
437         stvx            v1,0,$out
438         vsel            v0,v0,v4,v2
439         stvx            v0,$idx,$out
440
441         mtspr           256,$vrsave
442         blr
443         .long           0
444         .byte           0,12,0x14,0,0,0,3,0
445 .size   .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
446 ___
447 }
448 &gen_block("en");
449 &gen_block("de");
450 }}}
451 {{{
452 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
453 my ($rndkey0,$rndkey1,$inout,$tmp)=map("v$_",(0..3));
454 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=map("v$_",(4..10));
455
456 $code.=<<___;
457 .globl  .${prefix}_cbc_encrypt
458 .align  5
459 .${prefix}_cbc_encrypt:
460         ${UCMP}i        $len,16
461         bltlr-
462
463         cmpwi           $enc,0                  # test direction
464         lis             r0,0xffe0
465         mfspr           $vrsave,256
466         mtspr           256,r0
467
468         li              $idx,15
469         vxor            $rndkey0,$rndkey0,$rndkey0
470         `"vspltisb      $tmp,0x0f"              if ($LITTLE_ENDIAN)`
471
472         lvx             $ivec,0,$ivp            # load [unaligned] iv
473         lvsl            $inpperm,0,$ivp
474         lvx             $inptail,$idx,$ivp
475         `"vxor          $inpperm,$inpperm,$tmp" if ($LITTLE_ENDIAN)`
476         vperm           $ivec,$ivec,$inptail,$inpperm
477
478         neg             r11,$inp
479         ?lvsl           $keyperm,0,$key         # prepare for unaligned key
480         lwz             $rounds,240($key)
481
482         lvsr            $inpperm,0,r11          # prepare for unaligned load
483         lvx             $inptail,0,$inp
484         addi            $inp,$inp,15            # 15 is not typo
485         `"vxor          $inpperm,$inpperm,$tmp" if ($LITTLE_ENDIAN)`
486
487         ?lvsr           $outperm,0,$out         # prepare for unaligned store
488         vspltisb        $outmask,-1
489         lvx             $outhead,0,$out
490         ?vperm          $outmask,$rndkey0,$outmask,$outperm
491         `"vxor          $outperm,$outperm,$tmp" if ($LITTLE_ENDIAN)`
492
493         srwi            $rounds,$rounds,1
494         li              $idx,16
495         subi            $rounds,$rounds,1
496         beq             Lcbc_dec
497
498 Lcbc_enc:
499         vmr             $inout,$inptail
500         lvx             $inptail,0,$inp
501         addi            $inp,$inp,16
502         mtctr           $rounds
503         subi            $len,$len,16            # len-=16
504
505         lvx             $rndkey0,0,$key
506          vperm          $inout,$inout,$inptail,$inpperm
507         lvx             $rndkey1,$idx,$key
508         addi            $idx,$idx,16
509         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
510         vxor            $inout,$inout,$rndkey0
511         lvx             $rndkey0,$idx,$key
512         addi            $idx,$idx,16
513         vxor            $inout,$inout,$ivec
514
515 Loop_cbc_enc:
516         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
517         vcipher         $inout,$inout,$rndkey1
518         lvx             $rndkey1,$idx,$key
519         addi            $idx,$idx,16
520         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
521         vcipher         $inout,$inout,$rndkey0
522         lvx             $rndkey0,$idx,$key
523         addi            $idx,$idx,16
524         bdnz            Loop_cbc_enc
525
526         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
527         vcipher         $inout,$inout,$rndkey1
528         lvx             $rndkey1,$idx,$key
529         li              $idx,16
530         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
531         vcipherlast     $ivec,$inout,$rndkey0
532         ${UCMP}i        $len,16
533
534         vperm           $tmp,$ivec,$ivec,$outperm
535         vsel            $inout,$outhead,$tmp,$outmask
536         vmr             $outhead,$tmp
537         stvx            $inout,0,$out
538         addi            $out,$out,16
539         bge             Lcbc_enc
540
541         b               Lcbc_done
542
543 .align  4
544 Lcbc_dec:
545         ${UCMP}i        $len,128
546         bge             _aesp8_cbc_decrypt8x
547         vmr             $tmp,$inptail
548         lvx             $inptail,0,$inp
549         addi            $inp,$inp,16
550         mtctr           $rounds
551         subi            $len,$len,16            # len-=16
552
553         lvx             $rndkey0,0,$key
554          vperm          $tmp,$tmp,$inptail,$inpperm
555         lvx             $rndkey1,$idx,$key
556         addi            $idx,$idx,16
557         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
558         vxor            $inout,$tmp,$rndkey0
559         lvx             $rndkey0,$idx,$key
560         addi            $idx,$idx,16
561
562 Loop_cbc_dec:
563         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
564         vncipher        $inout,$inout,$rndkey1
565         lvx             $rndkey1,$idx,$key
566         addi            $idx,$idx,16
567         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
568         vncipher        $inout,$inout,$rndkey0
569         lvx             $rndkey0,$idx,$key
570         addi            $idx,$idx,16
571         bdnz            Loop_cbc_dec
572
573         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
574         vncipher        $inout,$inout,$rndkey1
575         lvx             $rndkey1,$idx,$key
576         li              $idx,16
577         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
578         vncipherlast    $inout,$inout,$rndkey0
579         ${UCMP}i        $len,16
580
581         vxor            $inout,$inout,$ivec
582         vmr             $ivec,$tmp
583         vperm           $tmp,$inout,$inout,$outperm
584         vsel            $inout,$outhead,$tmp,$outmask
585         vmr             $outhead,$tmp
586         stvx            $inout,0,$out
587         addi            $out,$out,16
588         bge             Lcbc_dec
589
590 Lcbc_done:
591         addi            $out,$out,-1
592         lvx             $inout,0,$out           # redundant in aligned case
593         vsel            $inout,$outhead,$inout,$outmask
594         stvx            $inout,0,$out
595
596         neg             $enc,$ivp               # write [unaligned] iv
597         li              $idx,15                 # 15 is not typo
598         vxor            $rndkey0,$rndkey0,$rndkey0
599         vspltisb        $outmask,-1
600         `"vspltisb      $tmp,0x0f"              if ($LITTLE_ENDIAN)`
601         ?lvsl           $outperm,0,$enc
602         ?vperm          $outmask,$rndkey0,$outmask,$outperm
603         `"vxor          $outperm,$outperm,$tmp" if ($LITTLE_ENDIAN)`
604         lvx             $outhead,0,$ivp
605         vperm           $ivec,$ivec,$ivec,$outperm
606         vsel            $inout,$outhead,$ivec,$outmask
607         lvx             $inptail,$idx,$ivp
608         stvx            $inout,0,$ivp
609         vsel            $inout,$ivec,$inptail,$outmask
610         stvx            $inout,$idx,$ivp
611
612         mtspr           256,$vrsave
613         blr
614         .long           0
615         .byte           0,12,0x14,0,0,0,6,0
616 ___
617 {{
618 my $key_="r11";
619 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
620 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
621 my $rndkey0="v23";      # v24-v25 rotating buffer for first found keys
622                         # v26-v31 last 6 round keys
623 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
624
625 $code.=<<___;
626 .align  5
627 _aesp8_cbc_decrypt8x:
628         $STU            $sp,-`($FRAME+21*16)`($sp)
629         li              r10,`$FRAME+8*16+15`
630         li              r11,`$FRAME+8*16+31`
631         stvx            v20,r10,$sp             # ABI says so
632         addi            r10,r10,32
633         stvx            v21,r11,$sp
634         addi            r11,r11,32
635         stvx            v22,r10,$sp
636         addi            r10,r10,32
637         stvx            v23,r11,$sp
638         addi            r11,r11,32
639         stvx            v24,r10,$sp
640         addi            r10,r10,32
641         stvx            v25,r11,$sp
642         addi            r11,r11,32
643         stvx            v26,r10,$sp
644         addi            r10,r10,32
645         stvx            v27,r11,$sp
646         addi            r11,r11,32
647         stvx            v28,r10,$sp
648         addi            r10,r10,32
649         stvx            v29,r11,$sp
650         addi            r11,r11,32
651         stvx            v30,r10,$sp
652         stvx            v31,r11,$sp
653         stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
654         li              r0,-1
655         mtspr           256,r0
656
657         subi            $rounds,$rounds,3       # -4 in total
658         subi            $len,$len,128           # bias
659
660         addi            $idx,$key,16            # load key schedule
661         lvx             $rndkey0,0,$key
662         addi            $key,$key,32
663         lvx             v30,0,$idx
664         addi            $idx,$idx,32
665         lvx             v31,0,$key
666         addi            $key,$key,32
667         ?vperm          $rndkey0,$rndkey0,v30,$keyperm
668         addi            $key_,$sp,$FRAME+15
669         mtctr           $rounds
670
671 Load_cbc_dec_key:
672         ?vperm          v24,v30,v31,$keyperm
673         lvx             v30,0,$idx
674         addi            $idx,$idx,32
675         stvx            v24,0,$key_             # off-load round[1]
676         addi            $key_,$key_,16
677         ?vperm          v25,v31,v30,$keyperm
678         lvx             v31,0,$key
679         addi            $key,$key,32
680         stvx            v25,0,$key_             # off-load round[2]
681         addi            $key_,$key_,16
682         bdnz            Load_cbc_dec_key
683
684         lvx             v26,0,$idx
685         addi            $idx,$idx,32
686         ?vperm          v24,v30,v31,$keyperm
687         lvx             v27,0,$key
688         addi            $key,$key,32
689         stvx            v24,0,$key_             # off-load round[3]
690         addi            $key_,$key_,16
691         ?vperm          v25,v31,v26,$keyperm
692         lvx             v28,0,$idx
693         addi            $idx,$idx,32
694         stvx            v25,0,$key_             # off-load round[4]
695         addi            $key_,$sp,$FRAME+15     # rewind $key_
696         ?vperm          v26,v26,v27,$keyperm
697         lvx             v29,0,$key
698         addi            $key,$key,32
699         ?vperm          v27,v27,v28,$keyperm
700         lvx             v30,0,$idx
701         addi            $idx,$idx,32
702         ?vperm          v28,v28,v29,$keyperm
703         lvx             v31,0,$key
704         ?vperm          v29,v29,v30,$keyperm
705         lvx             $out0,0,$idx            # borrow $out0
706         ?vperm          v30,v30,v31,$keyperm
707         lvx             v24,0,$key_             # pre-load round[1]
708         addi            $key_,$key_,16
709         ?vperm          v31,v31,$out0,$keyperm
710         lvx             v25,0,$key_             # pre-load round[2]
711         addi            $key_,$key_,16
712
713
714         #lvx            $inptail,0,$inp         # "caller" already did this
715         #addi           $inp,$inp,15            # 15 is not typo
716
717         lvx             $in1,0,$inp             # load first 8 "words"
718         addi            $inp,$inp,16
719         lvx             $in2,0,$inp
720         addi            $inp,$inp,16
721         lvx             $in3,0,$inp
722         addi            $inp,$inp,16
723         vperm           $in0,$inptail,$in1,$inpperm
724         lvx             $in4,0,$inp
725         addi            $inp,$inp,16
726         vperm           $in1,$in1,$in2,$inpperm
727         lvx             $in5,0,$inp
728         addi            $inp,$inp,16
729         vperm           $in2,$in2,$in3,$inpperm
730         vxor            $out0,$in0,$rndkey0
731         lvx             $in6,0,$inp
732         addi            $inp,$inp,16
733         vperm           $in3,$in3,$in4,$inpperm
734         vxor            $out1,$in1,$rndkey0
735         lvx             $in7,0,$inp
736         addi            $inp,$inp,16
737         vperm           $in4,$in4,$in5,$inpperm
738         vxor            $out2,$in2,$rndkey0
739         lvx             $inptail,0,$inp
740         addi            $inp,$inp,16
741         vperm           $in5,$in5,$in6,$inpperm
742         vxor            $out3,$in3,$rndkey0
743         vperm           $in6,$in6,$in7,$inpperm
744         vxor            $out4,$in4,$rndkey0
745         vperm           $in7,$in7,$inptail,$inpperm
746         vxor            $out5,$in5,$rndkey0
747         vxor            $out6,$in6,$rndkey0
748         vxor            $out7,$in7,$rndkey0
749
750         mtctr           $rounds
751 Loop_cbc_dec8x:
752         vncipher        $out0,$out0,v24
753         vncipher        $out1,$out1,v24
754         vncipher        $out2,$out2,v24
755         vncipher        $out3,$out3,v24
756         vncipher        $out4,$out4,v24
757         vncipher        $out5,$out5,v24
758         vncipher        $out6,$out6,v24
759         vncipher        $out7,$out7,v24
760         lvx             v24,0,$key_             # round[3]
761         addi            $key_,$key_,16
762
763         vncipher        $out0,$out0,v25
764         vncipher        $out1,$out1,v25
765         vncipher        $out2,$out2,v25
766         vncipher        $out3,$out3,v25
767         vncipher        $out4,$out4,v25
768         vncipher        $out5,$out5,v25
769         vncipher        $out6,$out6,v25
770         vncipher        $out7,$out7,v25
771         lvx             v25,0,$key_             # round[4]
772         addi            $key_,$key_,16
773         bdnz            Loop_cbc_dec8x
774
775         subic           $len,$len,128           # $len-=128
776         vncipher        $out0,$out0,v24
777         vncipher        $out1,$out1,v24
778         vncipher        $out2,$out2,v24
779         vncipher        $out3,$out3,v24
780         vncipher        $out4,$out4,v24
781         vncipher        $out5,$out5,v24
782         vncipher        $out6,$out6,v24
783         vncipher        $out7,$out7,v24
784
785         subfe.          r0,r0,r0                # borrow?-1:0
786         vncipher        $out0,$out0,v25
787         vncipher        $out1,$out1,v25
788         vncipher        $out2,$out2,v25
789         vncipher        $out3,$out3,v25
790         vncipher        $out4,$out4,v25
791         vncipher        $out5,$out5,v25
792         vncipher        $out6,$out6,v25
793         vncipher        $out7,$out7,v25
794
795         and             r0,r0,$len
796         vncipher        $out0,$out0,v26
797         vncipher        $out1,$out1,v26
798         vncipher        $out2,$out2,v26
799         vncipher        $out3,$out3,v26
800         vncipher        $out4,$out4,v26
801         vncipher        $out5,$out5,v26
802         vncipher        $out6,$out6,v26
803         vncipher        $out7,$out7,v26
804
805         add             $inp,$inp,r0            # $inp is adjusted in such
806                                                 # way that at exit from the
807                                                 # loop inX-in7 are loaded
808                                                 # with last "words"
809         vncipher        $out0,$out0,v27
810         vncipher        $out1,$out1,v27
811         vncipher        $out2,$out2,v27
812         vncipher        $out3,$out3,v27
813         vncipher        $out4,$out4,v27
814         vncipher        $out5,$out5,v27
815         vncipher        $out6,$out6,v27
816         vncipher        $out7,$out7,v27
817
818         addi            $key_,$sp,$FRAME+15     # rewind $key_
819         vncipher        $out0,$out0,v28
820         vncipher        $out1,$out1,v28
821         vncipher        $out2,$out2,v28
822         vncipher        $out3,$out3,v28
823         vncipher        $out4,$out4,v28
824         vncipher        $out5,$out5,v28
825         vncipher        $out6,$out6,v28
826         vncipher        $out7,$out7,v28
827         lvx             v24,0,$key_             # re-pre-load round[1]
828         addi            $key_,$key_,16
829
830         vncipher        $out0,$out0,v29
831         vncipher        $out1,$out1,v29
832         vncipher        $out2,$out2,v29
833         vncipher        $out3,$out3,v29
834         vncipher        $out4,$out4,v29
835         vncipher        $out5,$out5,v29
836         vncipher        $out6,$out6,v29
837         vncipher        $out7,$out7,v29
838         lvx             v25,0,$key_             # re-pre-load round[2]
839         addi            $key_,$key_,16
840
841         vncipher        $out0,$out0,v30
842          vxor           $ivec,$ivec,v31         # xor with last round key
843         vncipher        $out1,$out1,v30
844          vxor           $in0,$in0,v31
845         vncipher        $out2,$out2,v30
846          vxor           $in1,$in1,v31
847         vncipher        $out3,$out3,v30
848          vxor           $in2,$in2,v31
849         vncipher        $out4,$out4,v30
850          vxor           $in3,$in3,v31
851         vncipher        $out5,$out5,v30
852          vxor           $in4,$in4,v31
853         vncipher        $out6,$out6,v30
854          vxor           $in5,$in5,v31
855         vncipher        $out7,$out7,v30
856          vxor           $in6,$in6,v31
857
858         vncipherlast    $out0,$out0,$ivec
859         vncipherlast    $out1,$out1,$in0
860         vncipherlast    $out2,$out2,$in1
861          lvx            $in1,0,$inp             # load next input block
862          addi           $inp,$inp,16
863         vncipherlast    $out3,$out3,$in2
864          lvx            $in2,0,$inp
865          addi           $inp,$inp,16
866         vncipherlast    $out4,$out4,$in3
867          lvx            $in3,0,$inp
868          addi           $inp,$inp,16
869          vperm          $in0,$inptail,$in1,$inpperm
870         vncipherlast    $out5,$out5,$in4
871          lvx            $in4,0,$inp
872          addi           $inp,$inp,16
873          vperm          $in1,$in1,$in2,$inpperm
874         vncipherlast    $out6,$out6,$in5
875          lvx            $in5,0,$inp
876          addi           $inp,$inp,16
877          vperm          $in2,$in2,$in3,$inpperm
878         vncipherlast    $out7,$out7,$in6
879          lvx            $in6,0,$inp
880          addi           $inp,$inp,16
881          vperm          $in3,$in3,$in4,$inpperm
882         vmr             $ivec,$in7
883
884
885         vperm           $out0,$out0,$out0,$outperm
886          lvx            $in7,0,$inp
887          addi           $inp,$inp,16
888         vperm           $out1,$out1,$out1,$outperm
889         vsel            $outhead,$outhead,$out0,$outmask
890          vperm          $in4,$in4,$in5,$inpperm
891          lvx            $inptail,0,$inp
892          addi           $inp,$inp,16
893         vsel            $out0,$out0,$out1,$outmask
894         stvx            $outhead,0,$out
895         addi            $out,$out,16
896
897         vperm           $out2,$out2,$out2,$outperm
898          vperm          $in5,$in5,$in6,$inpperm
899         vsel            $out1,$out1,$out2,$outmask
900         stvx            $out0,0,$out
901         addi            $out,$out,16
902
903         vperm           $out3,$out3,$out3,$outperm
904          vxor           $out0,$in0,$rndkey0
905          vperm          $in6,$in6,$in7,$inpperm
906         vsel            $out2,$out2,$out3,$outmask
907         stvx            $out1,0,$out
908         addi            $out,$out,16
909
910         vperm           $out4,$out4,$out4,$outperm
911          vxor           $out1,$in1,$rndkey0
912          vperm          $in7,$in7,$inptail,$inpperm
913         vsel            $out3,$out3,$out4,$outmask
914         stvx            $out2,0,$out
915         addi            $out,$out,16
916
917         vperm           $out5,$out5,$out5,$outperm
918          vxor           $out2,$in2,$rndkey0
919         vsel            $out4,$out4,$out5,$outmask
920         stvx            $out3,0,$out
921         addi            $out,$out,16
922
923         vperm           $out6,$out6,$out6,$outperm
924          vxor           $out3,$in3,$rndkey0
925         vsel            $out5,$out5,$out6,$outmask
926         stvx            $out4,0,$out
927         addi            $out,$out,16
928
929         vperm           $outhead,$out7,$out7,$outperm
930          vxor           $out4,$in4,$rndkey0
931         vsel            $out6,$out6,$outhead,$outmask
932         stvx            $out5,0,$out
933         addi            $out,$out,16
934
935          vxor           $out5,$in5,$rndkey0
936         stvx            $out6,0,$out
937         addi            $out,$out,16
938
939          vxor           $out6,$in6,$rndkey0
940          vxor           $out7,$in7,$rndkey0
941
942         mtctr           $rounds
943         beq             Loop_cbc_dec8x          # did $len-=128 borrow?
944
945         addic.          $len,$len,128
946         beq             Lcbc_dec8x_done
947         nop
948
949 Loop_cbc_dec8x_tail:                            # up to 7 "words" tail...
950         vncipher        $out1,$out1,v24
951         vncipher        $out2,$out2,v24
952         vncipher        $out3,$out3,v24
953         vncipher        $out4,$out4,v24
954         vncipher        $out5,$out5,v24
955         vncipher        $out6,$out6,v24
956         vncipher        $out7,$out7,v24
957         lvx             v24,0,$key_             # round[3]
958         addi            $key_,$key_,16
959
960         vncipher        $out1,$out1,v25
961         vncipher        $out2,$out2,v25
962         vncipher        $out3,$out3,v25
963         vncipher        $out4,$out4,v25
964         vncipher        $out5,$out5,v25
965         vncipher        $out6,$out6,v25
966         vncipher        $out7,$out7,v25
967         lvx             v25,0,$key_             # round[4]
968         addi            $key_,$key_,16
969         bdnz            Loop_cbc_dec8x_tail
970
971         vncipher        $out1,$out1,v24
972         vncipher        $out2,$out2,v24
973         vncipher        $out3,$out3,v24
974         vncipher        $out4,$out4,v24
975         vncipher        $out5,$out5,v24
976         vncipher        $out6,$out6,v24
977         vncipher        $out7,$out7,v24
978
979         vncipher        $out1,$out1,v25
980         vncipher        $out2,$out2,v25
981         vncipher        $out3,$out3,v25
982         vncipher        $out4,$out4,v25
983         vncipher        $out5,$out5,v25
984         vncipher        $out6,$out6,v25
985         vncipher        $out7,$out7,v25
986
987         vncipher        $out1,$out1,v26
988         vncipher        $out2,$out2,v26
989         vncipher        $out3,$out3,v26
990         vncipher        $out4,$out4,v26
991         vncipher        $out5,$out5,v26
992         vncipher        $out6,$out6,v26
993         vncipher        $out7,$out7,v26
994
995         vncipher        $out1,$out1,v27
996         vncipher        $out2,$out2,v27
997         vncipher        $out3,$out3,v27
998         vncipher        $out4,$out4,v27
999         vncipher        $out5,$out5,v27
1000         vncipher        $out6,$out6,v27
1001         vncipher        $out7,$out7,v27
1002
1003         vncipher        $out1,$out1,v28
1004         vncipher        $out2,$out2,v28
1005         vncipher        $out3,$out3,v28
1006         vncipher        $out4,$out4,v28
1007         vncipher        $out5,$out5,v28
1008         vncipher        $out6,$out6,v28
1009         vncipher        $out7,$out7,v28
1010
1011         vncipher        $out1,$out1,v29
1012         vncipher        $out2,$out2,v29
1013         vncipher        $out3,$out3,v29
1014         vncipher        $out4,$out4,v29
1015         vncipher        $out5,$out5,v29
1016         vncipher        $out6,$out6,v29
1017         vncipher        $out7,$out7,v29
1018
1019         vncipher        $out1,$out1,v30
1020          vxor           $ivec,$ivec,v31         # last round key
1021         vncipher        $out2,$out2,v30
1022          vxor           $in1,$in1,v31
1023         vncipher        $out3,$out3,v30
1024          vxor           $in2,$in2,v31
1025         vncipher        $out4,$out4,v30
1026          vxor           $in3,$in3,v31
1027         vncipher        $out5,$out5,v30
1028          vxor           $in4,$in4,v31
1029         vncipher        $out6,$out6,v30
1030          vxor           $in5,$in5,v31
1031         vncipher        $out7,$out7,v30
1032          vxor           $in6,$in6,v31
1033
1034         cmplwi          $len,32                 # switch($len)
1035         blt             Lcbc_dec8x_one
1036         nop
1037         beq             Lcbc_dec8x_two
1038         cmplwi          $len,64
1039         blt             Lcbc_dec8x_three
1040         nop
1041         beq             Lcbc_dec8x_four
1042         cmplwi          $len,96
1043         blt             Lcbc_dec8x_five
1044         nop
1045         beq             Lcbc_dec8x_six
1046
1047         vncipherlast    $out1,$out1,$ivec
1048         vncipherlast    $out2,$out2,$in1
1049         vncipherlast    $out3,$out3,$in2
1050         vncipherlast    $out4,$out4,$in3
1051         vncipherlast    $out5,$out5,$in4
1052         vncipherlast    $out6,$out6,$in5
1053         vncipherlast    $out7,$out7,$in6
1054         vmr             $ivec,$in7
1055
1056         vperm           $out1,$out1,$out1,$outperm
1057         vsel            $outhead,$outhead,$out1,$outmask
1058         stvx            $outhead,0,$out
1059         addi            $out,$out,16
1060
1061         vperm           $out2,$out2,$out2,$outperm
1062         vsel            $out1,$out1,$out2,$outmask
1063         stvx            $out1,0,$out
1064         addi            $out,$out,16
1065
1066         vperm           $out3,$out3,$out3,$outperm
1067         vsel            $out2,$out2,$out3,$outmask
1068         stvx            $out2,0,$out
1069         addi            $out,$out,16
1070
1071         vperm           $out4,$out4,$out4,$outperm
1072         vsel            $out3,$out3,$out4,$outmask
1073         stvx            $out3,0,$out
1074         addi            $out,$out,16
1075
1076         vperm           $out5,$out5,$out5,$outperm
1077         vsel            $out4,$out4,$out5,$outmask
1078         stvx            $out4,0,$out
1079         addi            $out,$out,16
1080
1081         vperm           $out6,$out6,$out6,$outperm
1082         vsel            $out5,$out5,$out6,$outmask
1083         stvx            $out5,0,$out
1084         addi            $out,$out,16
1085
1086         vperm           $outhead,$out7,$out7,$outperm
1087         vsel            $out6,$out6,$outhead,$outmask
1088         stvx            $out6,0,$out
1089         addi            $out,$out,16
1090         b               Lcbc_dec8x_done
1091
1092 .align  5
1093 Lcbc_dec8x_six:
1094         vncipherlast    $out2,$out2,$ivec
1095         vncipherlast    $out3,$out3,$in2
1096         vncipherlast    $out4,$out4,$in3
1097         vncipherlast    $out5,$out5,$in4
1098         vncipherlast    $out6,$out6,$in5
1099         vncipherlast    $out7,$out7,$in6
1100         vmr             $ivec,$in7
1101
1102         vperm           $out2,$out2,$out2,$outperm
1103         vsel            $outhead,$outhead,$out2,$outmask
1104         stvx            $outhead,0,$out
1105         addi            $out,$out,16
1106
1107         vperm           $out3,$out3,$out3,$outperm
1108         vsel            $out2,$out2,$out3,$outmask
1109         stvx            $out2,0,$out
1110         addi            $out,$out,16
1111
1112         vperm           $out4,$out4,$out4,$outperm
1113         vsel            $out3,$out3,$out4,$outmask
1114         stvx            $out3,0,$out
1115         addi            $out,$out,16
1116
1117         vperm           $out5,$out5,$out5,$outperm
1118         vsel            $out4,$out4,$out5,$outmask
1119         stvx            $out4,0,$out
1120         addi            $out,$out,16
1121
1122         vperm           $out6,$out6,$out6,$outperm
1123         vsel            $out5,$out5,$out6,$outmask
1124         stvx            $out5,0,$out
1125         addi            $out,$out,16
1126
1127         vperm           $outhead,$out7,$out7,$outperm
1128         vsel            $out6,$out6,$outhead,$outmask
1129         stvx            $out6,0,$out
1130         addi            $out,$out,16
1131         b               Lcbc_dec8x_done
1132
1133 .align  5
1134 Lcbc_dec8x_five:
1135         vncipherlast    $out3,$out3,$ivec
1136         vncipherlast    $out4,$out4,$in3
1137         vncipherlast    $out5,$out5,$in4
1138         vncipherlast    $out6,$out6,$in5
1139         vncipherlast    $out7,$out7,$in6
1140         vmr             $ivec,$in7
1141
1142         vperm           $out3,$out3,$out3,$outperm
1143         vsel            $outhead,$outhead,$out3,$outmask
1144         stvx            $outhead,0,$out
1145         addi            $out,$out,16
1146
1147         vperm           $out4,$out4,$out4,$outperm
1148         vsel            $out3,$out3,$out4,$outmask
1149         stvx            $out3,0,$out
1150         addi            $out,$out,16
1151
1152         vperm           $out5,$out5,$out5,$outperm
1153         vsel            $out4,$out4,$out5,$outmask
1154         stvx            $out4,0,$out
1155         addi            $out,$out,16
1156
1157         vperm           $out6,$out6,$out6,$outperm
1158         vsel            $out5,$out5,$out6,$outmask
1159         stvx            $out5,0,$out
1160         addi            $out,$out,16
1161
1162         vperm           $outhead,$out7,$out7,$outperm
1163         vsel            $out6,$out6,$outhead,$outmask
1164         stvx            $out6,0,$out
1165         addi            $out,$out,16
1166         b               Lcbc_dec8x_done
1167
1168 .align  5
1169 Lcbc_dec8x_four:
1170         vncipherlast    $out4,$out4,$ivec
1171         vncipherlast    $out5,$out5,$in4
1172         vncipherlast    $out6,$out6,$in5
1173         vncipherlast    $out7,$out7,$in6
1174         vmr             $ivec,$in7
1175
1176         vperm           $out4,$out4,$out4,$outperm
1177         vsel            $outhead,$outhead,$out4,$outmask
1178         stvx            $outhead,0,$out
1179         addi            $out,$out,16
1180
1181         vperm           $out5,$out5,$out5,$outperm
1182         vsel            $out4,$out4,$out5,$outmask
1183         stvx            $out4,0,$out
1184         addi            $out,$out,16
1185
1186         vperm           $out6,$out6,$out6,$outperm
1187         vsel            $out5,$out5,$out6,$outmask
1188         stvx            $out5,0,$out
1189         addi            $out,$out,16
1190
1191         vperm           $outhead,$out7,$out7,$outperm
1192         vsel            $out6,$out6,$outhead,$outmask
1193         stvx            $out6,0,$out
1194         addi            $out,$out,16
1195         b               Lcbc_dec8x_done
1196
1197 .align  5
1198 Lcbc_dec8x_three:
1199         vncipherlast    $out5,$out5,$ivec
1200         vncipherlast    $out6,$out6,$in5
1201         vncipherlast    $out7,$out7,$in6
1202         vmr             $ivec,$in7
1203
1204         vperm           $out5,$out5,$out5,$outperm
1205         vsel            $outhead,$outhead,$out5,$outmask
1206         stvx            $outhead,0,$out
1207         addi            $out,$out,16
1208
1209         vperm           $out6,$out6,$out6,$outperm
1210         vsel            $out5,$out5,$out6,$outmask
1211         stvx            $out5,0,$out
1212         addi            $out,$out,16
1213
1214         vperm           $outhead,$out7,$out7,$outperm
1215         vsel            $out6,$out6,$outhead,$outmask
1216         stvx            $out6,0,$out
1217         addi            $out,$out,16
1218         b               Lcbc_dec8x_done
1219
1220 .align  5
1221 Lcbc_dec8x_two:
1222         vncipherlast    $out6,$out6,$ivec
1223         vncipherlast    $out7,$out7,$in6
1224         vmr             $ivec,$in7
1225
1226         vperm           $out6,$out6,$out6,$outperm
1227         vsel            $outhead,$outhead,$out6,$outmask
1228         stvx            $outhead,0,$out
1229         addi            $out,$out,16
1230
1231         vperm           $outhead,$out7,$out7,$outperm
1232         vsel            $out6,$out6,$outhead,$outmask
1233         stvx            $out6,0,$out
1234         addi            $out,$out,16
1235         b               Lcbc_dec8x_done
1236
1237 .align  5
1238 Lcbc_dec8x_one:
1239         vncipherlast    $out7,$out7,$ivec
1240         vmr             $ivec,$in7
1241
1242         vperm           $out7,$out7,$out7,$outperm
1243         vsel            $outhead,$outhead,$out7,$outmask
1244         stvx            $outhead,0,$out
1245         addi            $out,$out,16
1246         vmr             $outhead,$out7
1247         nop
1248
1249 Lcbc_dec8x_done:
1250         addi            $out,$out,-1
1251         lvx             $out7,0,$out            # redundant in aligned case
1252         vsel            $out7,$outhead,$out7,$outmask
1253         stvx            $out7,0,$out
1254
1255         neg             $enc,$ivp               # write [unaligned] iv
1256         li              $idx,15                 # 15 is not typo
1257         vxor            $rndkey0,$rndkey0,$rndkey0
1258         vspltisb        $outmask,-1
1259         `"vspltisb      $tmp,0x0f"              if ($LITTLE_ENDIAN)`
1260         ?lvsl           $outperm,0,$enc
1261         ?vperm          $outmask,$rndkey0,$outmask,$outperm
1262         `"vxor          $outperm,$outperm,$tmp" if ($LITTLE_ENDIAN)`
1263         lvx             $outhead,0,$ivp
1264         vperm           $ivec,$ivec,$ivec,$outperm
1265         vsel            $in0,$outhead,$ivec,$outmask
1266         lvx             $inptail,$idx,$ivp
1267         stvx            $in0,0,$ivp
1268         vsel            $in0,$ivec,$inptail,$outmask
1269         stvx            $in0,$idx,$ivp
1270
1271         li              r10,`$FRAME+15`
1272         li              r11,`$FRAME+31`
1273         stvx            $outmask,r10,$sp        # wipe copies of rounds keys
1274         addi            r10,r10,32
1275         stvx            $outmask,r11,$sp
1276         addi            r11,r11,32
1277         stvx            $outmask,r10,$sp
1278         addi            r10,r10,32
1279         stvx            $outmask,r11,$sp
1280         addi            r11,r11,32
1281         stvx            $outmask,r10,$sp
1282         addi            r10,r10,32
1283         stvx            $outmask,r11,$sp
1284         addi            r11,r11,32
1285         stvx            $outmask,r10,$sp
1286         addi            r10,r10,32
1287         stvx            $outmask,r11,$sp
1288         addi            r11,r11,32
1289
1290         mtspr           256,$vrsave
1291         lvx             v20,r10,$sp             # ABI says so
1292         addi            r10,r10,32
1293         lvx             v21,r11,$sp
1294         addi            r11,r11,32
1295         lvx             v22,r10,$sp
1296         addi            r10,r10,32
1297         lvx             v23,r11,$sp
1298         addi            r11,r11,32
1299         lvx             v24,r10,$sp
1300         addi            r10,r10,32
1301         lvx             v25,r11,$sp
1302         addi            r11,r11,32
1303         lvx             v26,r10,$sp
1304         addi            r10,r10,32
1305         lvx             v27,r11,$sp
1306         addi            r11,r11,32
1307         lvx             v28,r10,$sp
1308         addi            r10,r10,32
1309         lvx             v29,r11,$sp
1310         addi            r11,r11,32
1311         lvx             v30,r10,$sp
1312         lvx             v31,r11,$sp
1313         addi            $sp,$sp,`$FRAME+21*16`
1314         blr
1315         .long           0
1316         .byte           0,12,0x14,0,0x80,0,6,0
1317 .size   .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1318 ___
1319 }}      }}}
1320
1321 my $consts=1;
1322 foreach(split("\n",$code)) {
1323         s/\`([^\`]*)\`/eval($1)/geo;
1324
1325         # constants table endian-specific conversion
1326         if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
1327             my $conv=$3;
1328             my @bytes=();
1329
1330             # convert to endian-agnostic format
1331             if ($1 eq "long") {
1332               foreach (split(/,\s*/,$2)) {
1333                 my $l = /^0/?oct:int;
1334                 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
1335               }
1336             } else {
1337                 @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
1338             }
1339
1340             # little-endian conversion
1341             if ($flavour =~ /le$/o) {
1342                 SWITCH: for($conv)  {
1343                     /\?inv/ && do   { @bytes=map($_^0xf,@bytes); last; };
1344                     /\?rev/ && do   { @bytes=reverse(@bytes);    last; }; 
1345                 }
1346             }
1347
1348             #emit
1349             print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
1350             next;
1351         }
1352         $consts=0 if (m/Lconsts:/o);    # end of table
1353
1354         # instructions prefixed with '?' are endian-specific and need
1355         # to be adjusted accordingly...
1356         if ($flavour =~ /le$/o) {       # little-endian
1357             s/\?lvsr/lvsl/o or
1358             s/\?lvsl/lvsr/o or
1359             s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
1360             s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
1361             s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
1362         } else {                        # big-endian
1363             s/\?([a-z]+)/$1/o;
1364         }
1365
1366         print $_,"\n";
1367 }
1368
1369 close STDOUT;