Add "teaser" AES module for PowerISA 2.07.
[openssl.git] / crypto / aes / asm / aesp8-ppc.pl
1 #!/usr/bin/env perl
2 #
3 # ====================================================================
4 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9 #
10 # This module implements support for AES instructions as per PowerISA
11 # specification version 2.07, first implemented by POWER8 processor.
12 # The module is endian-agnostic in sense that it supports both big-
13 # and little-endian cases. As well as alignment-agnostic, and it is
14 # guaranteed not to cause alignment exceptions. [One of options was
15 # to use VSX loads and stores, which tolerate unaligned references,
16 # but even then specification doesn't prohibit exceptions on page
17 # boundaries.]
18
19 $flavour = shift;
20
21 if ($flavour =~ /64/) {
22         $SIZE_T =8;
23         $LRSAVE =2*$SIZE_T;
24         $STU    ="stdu";
25         $POP    ="ld";
26         $PUSH   ="std";
27 } elsif ($flavour =~ /32/) {
28         $SIZE_T =4;
29         $LRSAVE =$SIZE_T;
30         $STU    ="stwu";
31         $POP    ="lwz";
32         $PUSH   ="stw";
33 } else { die "nonsense $flavour"; }
34
35 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
36
37 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
38 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
39 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
40 die "can't locate ppc-xlate.pl";
41
42 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
43
44 $FRAME=8*$SIZE_T;
45 $prefix="AES";
46
47 $sp="r1";
48 $vrsave="r12";
49
50 {{{
51 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
52 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
53 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
54
55 $code.=<<___;
56 .machine        "any"
57
58 .text
59
60 .align  7
61 rcon:
62 .long   0x01000000, 0x01000000, 0x01000000, 0x01000000  ?rev
63 .long   0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000  ?rev
64 .long   0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c  ?rev
65 .long   0,0,0,0                                         ?asis
66 Lconsts:
67         mflr    r0
68         bcl     20,31,\$+4
69         mflr    $ptr     #vvvvv "distance between . and rcon
70         addi    $ptr,$ptr,-0x48
71         mtlr    r0
72         blr
73         .long   0
74         .byte   0,12,0x14,0,0,0,0,0
75 .asciz  "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
76
77 .globl  .${prefix}_set_encrypt_key
78 .align  5
79 .${prefix}_set_encrypt_key:
80 Lset_encrypt_key:
81         mflr            r11
82         li              r0,0xfff
83         $PUSH           r11,$LRSAVE($sp)
84         mfspr           $vrsave,256
85         mtspr           256,r0
86
87         bl              Lconsts
88         mtlr            r11
89
90         neg             r9,$inp
91         lvx             $in0,0,$inp
92         addi            $inp,$inp,15            # 15 is not typo
93         lvsr            $key,0,r9               # borrow $key
94         li              r8,0x20
95         cmpwi           $bits,192
96         lvx             $in1,0,$inp
97 ___
98 $code.=<<___            if ($LITTLE_ENDIAN);
99         vspltisb        $mask,0x0f              # borrow $mask
100         vxor            $key,$key,$mask         # adjust for byte swap
101 ___
102 $code.=<<___;
103         lvx             $rcon,0,$ptr
104         lvx             $mask,r8,$ptr
105         addi            $ptr,$ptr,0x10
106         vperm           $in0,$in0,$in1,$key     # align [and byte swap in LE]
107         li              $cnt,8
108         vxor            $zero,$zero,$zero
109         mtctr           $cnt
110
111         ?lvsr           $outperm,0,$out
112         vspltisb        $outmask,-1
113         lvx             $outhead,0,$out
114         ?vperm          $outmask,$zero,$outmask,$outperm
115
116         blt             Loop128
117         addi            $inp,$inp,8
118         beq             L192
119         addi            $inp,$inp,8
120         b               L256
121
122 .align  4
123 Loop128:
124         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
125         vsldoi          $tmp,$zero,$in0,12      # >>32
126          vperm          $outtail,$in0,$in0,$outperm     # rotate
127          vsel           $stage,$outhead,$outtail,$outmask
128          vmr            $outhead,$outtail
129         vcipherlast     $key,$key,$rcon
130          stvx           $stage,0,$out
131          addi           $out,$out,16
132
133         vxor            $in0,$in0,$tmp
134         vsldoi          $tmp,$zero,$tmp,12      # >>32
135         vxor            $in0,$in0,$tmp
136         vsldoi          $tmp,$zero,$tmp,12      # >>32
137         vxor            $in0,$in0,$tmp
138          vadduwm        $rcon,$rcon,$rcon
139         vxor            $in0,$in0,$key
140         bdnz            Loop128
141
142         lvx             $rcon,0,$ptr            # last two round keys
143
144         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
145         vsldoi          $tmp,$zero,$in0,12      # >>32
146          vperm          $outtail,$in0,$in0,$outperm     # rotate
147          vsel           $stage,$outhead,$outtail,$outmask
148          vmr            $outhead,$outtail
149         vcipherlast     $key,$key,$rcon
150          stvx           $stage,0,$out
151          addi           $out,$out,16
152
153         vxor            $in0,$in0,$tmp
154         vsldoi          $tmp,$zero,$tmp,12      # >>32
155         vxor            $in0,$in0,$tmp
156         vsldoi          $tmp,$zero,$tmp,12      # >>32
157         vxor            $in0,$in0,$tmp
158          vadduwm        $rcon,$rcon,$rcon
159         vxor            $in0,$in0,$key
160
161         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
162         vsldoi          $tmp,$zero,$in0,12      # >>32
163          vperm          $outtail,$in0,$in0,$outperm     # rotate
164          vsel           $stage,$outhead,$outtail,$outmask
165          vmr            $outhead,$outtail
166         vcipherlast     $key,$key,$rcon
167          stvx           $stage,0,$out
168          addi           $out,$out,16
169
170         vxor            $in0,$in0,$tmp
171         vsldoi          $tmp,$zero,$tmp,12      # >>32
172         vxor            $in0,$in0,$tmp
173         vsldoi          $tmp,$zero,$tmp,12      # >>32
174         vxor            $in0,$in0,$tmp
175         vxor            $in0,$in0,$key
176          vperm          $outtail,$in0,$in0,$outperm     # rotate
177          vsel           $stage,$outhead,$outtail,$outmask
178          vmr            $outhead,$outtail
179          stvx           $stage,0,$out
180
181         addi            $inp,$out,15            # 15 is not typo
182         addi            $out,$out,0x50
183
184         li              $rounds,10
185         b               Ldone
186
187 .align  4
188 L192:
189         lvx             $tmp,0,$inp
190         li              $cnt,4
191          vperm          $outtail,$in0,$in0,$outperm     # rotate
192          vsel           $stage,$outhead,$outtail,$outmask
193          vmr            $outhead,$outtail
194          stvx           $stage,0,$out
195          addi           $out,$out,16
196         vperm           $in1,$in1,$tmp,$key     # align [and byte swap in LE]
197         vspltisb        $key,8                  # borrow $key
198         mtctr           $cnt
199         vsububm         $mask,$mask,$key        # adjust the mask
200
201 Loop192:
202         vperm           $key,$in1,$in1,$mask    # roate-n-splat
203         vsldoi          $tmp,$zero,$in0,12      # >>32
204         vcipherlast     $key,$key,$rcon
205
206         vxor            $in0,$in0,$tmp
207         vsldoi          $tmp,$zero,$tmp,12      # >>32
208         vxor            $in0,$in0,$tmp
209         vsldoi          $tmp,$zero,$tmp,12      # >>32
210         vxor            $in0,$in0,$tmp
211
212          vsldoi         $stage,$zero,$in1,8
213         vspltw          $tmp,$in0,3
214         vxor            $tmp,$tmp,$in1
215         vsldoi          $in1,$zero,$in1,12      # >>32
216          vadduwm        $rcon,$rcon,$rcon
217         vxor            $in1,$in1,$tmp
218         vxor            $in0,$in0,$key
219         vxor            $in1,$in1,$key
220          vsldoi         $stage,$stage,$in0,8
221
222         vperm           $key,$in1,$in1,$mask    # rotate-n-splat
223         vsldoi          $tmp,$zero,$in0,12      # >>32
224          vperm          $outtail,$stage,$stage,$outperm # rotate
225          vsel           $stage,$outhead,$outtail,$outmask
226          vmr            $outhead,$outtail
227         vcipherlast     $key,$key,$rcon
228          stvx           $stage,0,$out
229          addi           $out,$out,16
230
231          vsldoi         $stage,$in0,$in1,8
232         vxor            $in0,$in0,$tmp
233         vsldoi          $tmp,$zero,$tmp,12      # >>32
234          vperm          $outtail,$stage,$stage,$outperm # rotate
235          vsel           $stage,$outhead,$outtail,$outmask
236          vmr            $outhead,$outtail
237         vxor            $in0,$in0,$tmp
238         vsldoi          $tmp,$zero,$tmp,12      # >>32
239         vxor            $in0,$in0,$tmp
240          stvx           $stage,0,$out
241          addi           $out,$out,16
242
243         vspltw          $tmp,$in0,3
244         vxor            $tmp,$tmp,$in1
245         vsldoi          $in1,$zero,$in1,12      # >>32
246          vadduwm        $rcon,$rcon,$rcon
247         vxor            $in1,$in1,$tmp
248         vxor            $in0,$in0,$key
249         vxor            $in1,$in1,$key
250          vperm          $outtail,$in0,$in0,$outperm     # rotate
251          vsel           $stage,$outhead,$outtail,$outmask
252          vmr            $outhead,$outtail
253          stvx           $stage,0,$out
254          addi           $inp,$out,15            # 15 is not typo
255          addi           $out,$out,16
256         bdnz            Loop192
257
258         li              $rounds,12
259         addi            $out,$out,0x20
260         b               Ldone
261
262 .align  4
263 L256:
264         lvx             $tmp,0,$inp
265         li              $cnt,7
266         li              $rounds,14
267          vperm          $outtail,$in0,$in0,$outperm     # rotate
268          vsel           $stage,$outhead,$outtail,$outmask
269          vmr            $outhead,$outtail
270          stvx           $stage,0,$out
271          addi           $out,$out,16
272         vperm           $in1,$in1,$tmp,$key     # align [and byte swap in LE]
273         mtctr           $cnt
274
275 Loop256:
276         vperm           $key,$in1,$in1,$mask    # rotate-n-splat
277         vsldoi          $tmp,$zero,$in0,12      # >>32
278          vperm          $outtail,$in1,$in1,$outperm     # rotate
279          vsel           $stage,$outhead,$outtail,$outmask
280          vmr            $outhead,$outtail
281         vcipherlast     $key,$key,$rcon
282          stvx           $stage,0,$out
283          addi           $out,$out,16
284
285         vxor            $in0,$in0,$tmp
286         vsldoi          $tmp,$zero,$tmp,12      # >>32
287         vxor            $in0,$in0,$tmp
288         vsldoi          $tmp,$zero,$tmp,12      # >>32
289         vxor            $in0,$in0,$tmp
290          vadduwm        $rcon,$rcon,$rcon
291         vxor            $in0,$in0,$key
292          vperm          $outtail,$in0,$in0,$outperm     # rotate
293          vsel           $stage,$outhead,$outtail,$outmask
294          vmr            $outhead,$outtail
295          stvx           $stage,0,$out
296          addi           $inp,$out,15            # 15 is not typo
297          addi           $out,$out,16
298         bdz             Ldone
299
300         vspltw          $key,$in0,3             # just splat
301         vsldoi          $tmp,$zero,$in1,12      # >>32
302         vsbox           $key,$key
303
304         vxor            $in1,$in1,$tmp
305         vsldoi          $tmp,$zero,$tmp,12      # >>32
306         vxor            $in1,$in1,$tmp
307         vsldoi          $tmp,$zero,$tmp,12      # >>32
308         vxor            $in1,$in1,$tmp
309
310         vxor            $in1,$in1,$key
311         b               Loop256
312
313 .align  4
314 Ldone:
315         lvx             $in1,0,$inp             # redundant in aligned case
316         vsel            $in1,$outhead,$in1,$outmask
317         stvx            $in1,0,$inp
318         xor             r3,r3,r3                # return value
319         mtspr           256,$vrsave
320         stw             $rounds,0($out)
321
322         blr
323         .long           0
324         .byte           0,12,0x14,1,0,0,3,0
325 .size   .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
326
327 .globl  .${prefix}_set_decrypt_key
328 .align  5
329 .${prefix}_set_decrypt_key:
330         $STU            $sp,-$FRAME($sp)
331         mflr            r10
332         $PUSH           r10,$FRAME+$LRSAVE($sp)
333         bl              Lset_encrypt_key
334         mtlr            r10
335
336         slwi            $cnt,$rounds,4
337         subi            $inp,$out,240           # first round key
338         srwi            $rounds,$rounds,1
339         add             $out,$inp,$cnt          # last round key
340         mtctr           $rounds
341
342 Ldeckey:
343         lwz             r0, 0($inp)
344         lwz             r6, 4($inp)
345         lwz             r7, 8($inp)
346         lwz             r8, 12($inp)
347         addi            $inp,$inp,16
348         lwz             r9, 0($out)
349         lwz             r10,4($out)
350         lwz             r11,8($out)
351         lwz             r12,12($out)
352         stw             r0, 0($out)
353         stw             r6, 4($out)
354         stw             r7, 8($out)
355         stw             r8, 12($out)
356         subi            $out,$out,16
357         stw             r9, -16($inp)
358         stw             r10,-12($inp)
359         stw             r11,-8($inp)
360         stw             r12,-4($inp)
361         bdnz            Ldeckey
362
363         xor             r3,r3,r3                # return value
364         addi            $sp,$sp,$FRAME
365         blr
366         .long           0
367         .byte           0,12,4,1,0x80,0,3,0
368 .size   .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
369 ___
370 }}}
371 {{{
372 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
373
374 $code.=<<___;
375 .globl  .${prefix}_encrypt
376 .align  5
377 .${prefix}_encrypt:
378         lwz             $rounds,240($key)
379         li              r0,0x3f
380         mfspr           $vrsave,256
381         li              $idx,15                 # 15 is not typo
382         mtspr           256,r0
383
384         lvx             v0,0,$inp
385         neg             r11,$out
386         lvx             v1,$idx,$inp
387         lvsl            v2,0,$inp               # inpperm
388         `"vspltisb      v4,0x0f"                if ($LITTLE_ENDIAN)`
389         ?lvsl           v3,0,r11                # outperm
390         `"vxor          v2,v2,v4"               if ($LITTLE_ENDIAN)`
391         li              $idx,16
392         vperm           v0,v0,v1,v2             # align [and byte swap in LE]
393         lvx             v1,0,$key
394         ?lvsl           v5,0,$key               # keyperm
395         srwi            $rounds,$rounds,1
396         lvx             v2,$idx,$key
397         addi            $idx,$idx,16
398         subi            $rounds,$rounds,1
399         ?vperm          v1,v1,v2,v5             # align round key
400
401         vxor            v0,v0,v1
402         lvx             v1,$idx,$key
403         addi            $idx,$idx,16
404         mtctr           $rounds
405
406 Loop_enc:
407         ?vperm          v2,v2,v1,v5
408         vcipher         v0,v0,v2
409         lvx             v2,$idx,$key
410         addi            $idx,$idx,16
411         ?vperm          v1,v1,v2,v5
412         vcipher         v0,v0,v1
413         lvx             v1,$idx,$key
414         addi            $idx,$idx,16
415         bdnz            Loop_enc
416
417         ?vperm          v2,v2,v1,v5
418         vcipher         v0,v0,v2
419         lvx             v2,$idx,$key
420         ?vperm          v1,v1,v2,v5
421         vcipherlast     v0,v0,v1
422
423         vspltisb        v2,-1
424         vxor            v1,v1,v1
425         li              $idx,15                 # 15 is not typo
426         ?vperm          v2,v1,v2,v3             # outmask
427         `"vxor          v3,v3,v4"               if ($LITTLE_ENDIAN)`
428         lvx             v1,0,$out               # outhead
429         vperm           v0,v0,v0,v3             # rotate [and byte swap in LE]
430         vsel            v1,v1,v0,v2
431         lvx             v4,$idx,$out
432         stvx            v1,0,$out
433         vsel            v0,v0,v4,v2
434         stvx            v0,$idx,$out
435
436         mtspr           256,$vrsave
437         blr
438         .long           0
439         .byte           0,12,0x14,0,0,0,3,0
440 .size   .${prefix}_encrypt,.-.${prefix}_encrypt
441
442 .globl  .${prefix}_decrypt
443 .align  5
444 .${prefix}_decrypt:
445         lwz             $rounds,240($key)
446         li              r0,0x3f
447         mfspr           $vrsave,256
448         li              $idx,15                 # 15 is not typo
449         mtspr           256,r0
450
451         lvx             v0,0,$inp
452         neg             r11,$out
453         lvx             v1,$idx,$inp
454         lvsl            v2,0,$inp               # inpperm
455         `"vspltisb      v4,0x0f"                if ($LITTLE_ENDIAN)`
456         ?lvsl           v3,0,r11                # outperm
457         `"vxor          v2,v2,v4"               if ($LITTLE_ENDIAN)`
458         li              $idx,16
459         vperm           v0,v0,v1,v2             # align [and byte swap in LE]
460         lvx             v1,0,$key
461         ?lvsl           v5,0,$key               # keyperm
462         srwi            $rounds,$rounds,1
463         lvx             v2,$idx,$key
464         addi            $idx,$idx,16
465         subi            $rounds,$rounds,1
466         ?vperm          v1,v1,v2,v5             # align round key
467
468         vxor            v0,v0,v1
469         lvx             v1,$idx,$key
470         addi            $idx,$idx,16
471         mtctr           $rounds
472
473 Loop_dec:
474         ?vperm          v2,v2,v1,v5
475         vncipher        v0,v0,v2
476         lvx             v2,$idx,$key
477         addi            $idx,$idx,16
478         ?vperm          v1,v1,v2,v5
479         vncipher        v0,v0,v1
480         lvx             v1,$idx,$key
481         addi            $idx,$idx,16
482         bdnz            Loop_dec
483
484         ?vperm          v2,v2,v1,v5
485         vncipher        v0,v0,v2
486         lvx             v2,$idx,$key
487         ?vperm          v1,v1,v2,v5
488         vncipherlast    v0,v0,v1
489
490         vspltisb        v2,-1
491         vxor            v1,v1,v1
492         li              $idx,15                 # 15 is not typo
493         ?vperm          v2,v1,v2,v3             # outmask
494         `"vxor          v3,v3,v4"               if ($LITTLE_ENDIAN)`
495         lvx             v1,0,$out               # outhead
496         vperm           v0,v0,v0,v3             # rotate [and byte swap in LE]
497         vsel            v1,v1,v0,v2
498         lvx             v4,$idx,$out
499         stvx            v1,0,$out
500         vsel            v0,v0,v4,v2
501         stvx            v0,$idx,$out
502
503         mtspr           256,$vrsave
504         blr
505         .long           0
506         .byte           0,12,0x14,0,0,0,3,0
507 .size   .${prefix}_decrypt,.-.${prefix}_decrypt
508 ___
509 }}}
510 {{{
511 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
512 my ($rndkey0,$rndkey1,$inout,$ivec,$tmp)=map("v$_",(0..4));
513 my ($inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=map("v$_",(5..10));
514
515 $code.=<<___;
516 .globl  .${prefix}_cbc_encrypt
517 .align  5
518 .${prefix}_cbc_encrypt:
519         subic.          $len,$len,16
520         bltlr-
521
522         cmpwi           $enc,0                  # test direction
523         li              r0,0x7ff
524         mfspr           $vrsave,256
525         mtspr           256,r0
526
527         li              $idx,15
528         vxor            $rndkey0,$rndkey0,$rndkey0
529         `"vspltisb      $tmp,0x0f"              if ($LITTLE_ENDIAN)`
530
531         lvx             $ivec,0,$ivp            # load [unaligned] iv
532         lvsl            $inpperm,0,$ivp
533         lvx             $inptail,$idx,$ivp
534         `"vxor          $inpperm,$inpperm,$tmp" if ($LITTLE_ENDIAN)`
535         vperm           $ivec,$ivec,$inptail,$inpperm
536
537         ?lvsl           $keyperm,0,$key         # prepare for unaligned key
538         lwz             $rounds,240($key)
539
540         lvsl            $inpperm,0,$inp         # prepare for unaligned load
541         lvx             $inptail,0,$inp
542         addi            $inp,$inp,15            # 15 is not typo
543         `"vxor          $inpperm,$inpperm,$tmp" if ($LITTLE_ENDIAN)`
544
545         ?lvsr           $outperm,0,$out         # prepare for unaligned store
546         vspltisb        $outmask,-1
547         lvx             $outhead,0,$out
548         ?vperm          $outmask,$rndkey0,$outmask,$outperm
549         `"vxor          $outperm,$outperm,$tmp" if ($LITTLE_ENDIAN)`
550
551         srwi            $rounds,$rounds,1
552         li              $idx,16
553         subi            $rounds,$rounds,1
554         beq             Lcbc_dec
555
556 Lcbc_enc:
557         vmr             $inout,$inptail
558         lvx             $inptail,0,$inp
559         addi            $inp,$inp,16
560         mtctr           $rounds
561
562         lvx             $rndkey0,0,$key
563          vperm          $inout,$inout,$inptail,$inpperm
564         lvx             $rndkey1,$idx,$key
565         addi            $idx,$idx,16
566         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
567         vxor            $inout,$inout,$rndkey0
568         lvx             $rndkey0,$idx,$key
569         addi            $idx,$idx,16
570         vxor            $inout,$inout,$ivec
571
572 Loop_cbc_enc:
573         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
574         vcipher         $inout,$inout,$rndkey1
575         lvx             $rndkey1,$idx,$key
576         addi            $idx,$idx,16
577         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
578         vcipher         $inout,$inout,$rndkey0
579         lvx             $rndkey0,$idx,$key
580         addi            $idx,$idx,16
581         bdnz            Loop_cbc_enc
582
583         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
584         vcipher         $inout,$inout,$rndkey1
585         lvx             $rndkey1,$idx,$key
586         li              $idx,16
587         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
588         vcipherlast     $ivec,$inout,$rndkey0
589         sub.            $len,$len,$idx          # len -=16
590
591         vperm           $tmp,$ivec,$ivec,$outperm
592         vsel            $inout,$outhead,$tmp,$outmask
593         vmr             $outhead,$tmp
594         stvx            $inout,0,$out
595         addi            $out,$out,16
596         bge             Lcbc_enc
597
598         b               Lcbc_done
599
600 .align  4
601 Lcbc_dec:
602         vmr             $tmp,$inptail
603         lvx             $inptail,0,$inp
604         addi            $inp,$inp,16
605         mtctr           $rounds
606
607         lvx             $rndkey0,0,$key
608          vperm          $tmp,$tmp,$inptail,$inpperm
609         lvx             $rndkey1,$idx,$key
610         addi            $idx,$idx,16
611         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
612         vxor            $inout,$tmp,$rndkey0
613         lvx             $rndkey0,$idx,$key
614         addi            $idx,$idx,16
615
616 Loop_cbc_dec:
617         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
618         vncipher        $inout,$inout,$rndkey1
619         lvx             $rndkey1,$idx,$key
620         addi            $idx,$idx,16
621         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
622         vncipher        $inout,$inout,$rndkey0
623         lvx             $rndkey0,$idx,$key
624         addi            $idx,$idx,16
625         bdnz            Loop_cbc_dec
626
627         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
628         vncipher        $inout,$inout,$rndkey1
629         lvx             $rndkey1,$idx,$key
630         li              $idx,16
631         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
632         vncipherlast    $inout,$inout,$rndkey0
633         sub.            $len,$len,$idx          # len -=16
634
635         vxor            $inout,$inout,$ivec
636         vmr             $ivec,$tmp
637         vperm           $tmp,$inout,$inout,$outperm
638         vsel            $inout,$outhead,$tmp,$outmask
639         vmr             $outhead,$tmp
640         stvx            $inout,0,$out
641         addi            $out,$out,16
642         bge             Lcbc_dec
643
644 Lcbc_done:
645         addi            $out,$out,-1
646         lvx             $inout,0,$out           # redundant in aligned case
647         vsel            $inout,$outhead,$inout,$outmask
648         stvx            $inout,0,$out
649
650         neg             $enc,$ivp               # write [unaligned] iv
651         li              $idx,15                 # 15 is not typo
652         vxor            $rndkey0,$rndkey0,$rndkey0
653         vspltisb        $outmask,-1
654         `"vspltisb      $tmp,0x0f"              if ($LITTLE_ENDIAN)`
655         ?lvsl           $outperm,0,$enc
656         ?vperm          $outmask,$rndkey0,$outmask,$outperm
657         `"vxor          $outperm,$outperm,$tmp" if ($LITTLE_ENDIAN)`
658         lvx             $outhead,0,$ivp
659         vperm           $ivec,$ivec,$ivec,$outperm
660         vsel            $inout,$outhead,$ivec,$outmask
661         lvx             $inptail,$idx,$ivp
662         stvx            $inout,0,$ivp
663         vsel            $inout,$ivec,$inptail,$outmask
664         stvx            $inout,$idx,$ivp
665
666         mtspr           256,$vrsave
667         blr
668         .long           0
669         .byte           0,12,0x14,0,0,0,6,0
670 .size   .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
671 ___
672 }}}
673
674 my $consts=1;
675 foreach(split("\n",$code)) {
676         s/\`([^\`]*)\`/eval($1)/geo;
677
678         # constants table endian-specific conversion
679         if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
680             my $conv=$3;
681             my @bytes=();
682
683             # convert to endian-agnostic format
684             if ($1 eq "long") {
685               foreach (split(/,\s*/,$2)) {
686                 my $l = /^0/?oct:int;
687                 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
688               }
689             } else {
690                 @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
691             }
692
693             # little-endian conversion
694             if ($flavour =~ /le$/o) {
695                 SWITCH: for($conv)  {
696                     /\?inv/ && do   { @bytes=map($_^0xf,@bytes); last; };
697                     /\?rev/ && do   { @bytes=reverse(@bytes);    last; }; 
698                 }
699             }
700
701             #emit
702             print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
703             next;
704         }
705         $consts=0 if (m/Lconsts:/o);    # end of table
706
707         # instructions prefixed with '?' are endian-specific and need
708         # to be adjusted accordingly...
709         if ($flavour =~ /le$/o) {       # little-endian
710             s/\?lvsr/lvsl/o or
711             s/\?lvsl/lvsr/o or
712             s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
713             s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
714             s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
715         } else {                        # big-endian
716             s/\?([a-z]+)/$1/o;
717         }
718
719         print $_,"\n";
720 }
721
722 close STDOUT;