Also check for errors in x86_64-xlate.pl.
[openssl.git] / crypto / aes / asm / aes-ppc.pl
1 #! /usr/bin/env perl
2 # Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
3 #
4 # Licensed under the Apache License 2.0 (the "License").  You may not use
5 # this file except in compliance with the License.  You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
8
9
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
16
17 # Needs more work: key setup, CBC routine...
18 #
19 # ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
20 # 128-bit key, which is ~40% better than 64-bit code generated by gcc
21 # 4.0. But these are not the ones currently used! Their "compact"
22 # counterparts are, for security reason. ppc_AES_encrypt_compact runs
23 # at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
24 # at 1/3 of ppc_AES_decrypt.
25
26 # February 2010
27 #
28 # Rescheduling instructions to favour Power6 pipeline gave 10%
29 # performance improvement on the platform in question (and marginal
30 # improvement even on others). It should be noted that Power6 fails
31 # to process byte in 18 cycles, only in 23, because it fails to issue
32 # 4 load instructions in two cycles, only in 3. As result non-compact
33 # block subroutines are 25% slower than one would expect. Compact
34 # functions scale better, because they have pure computational part,
35 # which scales perfectly with clock frequency. To be specific
36 # ppc_AES_encrypt_compact operates at 42 cycles per byte, while
37 # ppc_AES_decrypt_compact - at 55 (in 64-bit build).
38
39 # $output is the last argument if it looks like a file (it has an extension)
40 # $flavour is the first argument if it doesn't look like a file
41 $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
42 $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
43
44 if ($flavour =~ /64/) {
45         $SIZE_T =8;
46         $LRSAVE =2*$SIZE_T;
47         $STU    ="stdu";
48         $POP    ="ld";
49         $PUSH   ="std";
50 } elsif ($flavour =~ /32/) {
51         $SIZE_T =4;
52         $LRSAVE =$SIZE_T;
53         $STU    ="stwu";
54         $POP    ="lwz";
55         $PUSH   ="stw";
56 } else { die "nonsense $flavour"; }
57
58 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
59
60 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
61 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
62 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
63 die "can't locate ppc-xlate.pl";
64
65 open STDOUT,"| $^X $xlate $flavour \"$output\""
66     or die "can't call $xlate: $!";
67
68 $FRAME=32*$SIZE_T;
69
70 sub _data_word()
71 { my $i;
72     while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
73 }
74
75 $sp="r1";
76 $toc="r2";
77 $inp="r3";
78 $out="r4";
79 $key="r5";
80
81 $Tbl0="r3";
82 $Tbl1="r6";
83 $Tbl2="r7";
84 $Tbl3=$out;     # stay away from "r2"; $out is offloaded to stack
85
86 $s0="r8";
87 $s1="r9";
88 $s2="r10";
89 $s3="r11";
90
91 $t0="r12";
92 $t1="r0";       # stay away from "r13";
93 $t2="r14";
94 $t3="r15";
95
96 $acc00="r16";
97 $acc01="r17";
98 $acc02="r18";
99 $acc03="r19";
100
101 $acc04="r20";
102 $acc05="r21";
103 $acc06="r22";
104 $acc07="r23";
105
106 $acc08="r24";
107 $acc09="r25";
108 $acc10="r26";
109 $acc11="r27";
110
111 $acc12="r28";
112 $acc13="r29";
113 $acc14="r30";
114 $acc15="r31";
115
116 $mask80=$Tbl2;
117 $mask1b=$Tbl3;
118
119 $code.=<<___;
120 .machine        "any"
121 .text
122
123 .align  7
124 LAES_Te:
125         mflr    r0
126         bcl     20,31,\$+4
127         mflr    $Tbl0   ;    vvvvv "distance" between . and 1st data entry
128         addi    $Tbl0,$Tbl0,`128-8`
129         mtlr    r0
130         blr
131         .long   0
132         .byte   0,12,0x14,0,0,0,0,0
133         .space  `64-9*4`
134 LAES_Td:
135         mflr    r0
136         bcl     20,31,\$+4
137         mflr    $Tbl0   ;    vvvvvvvv "distance" between . and 1st data entry
138         addi    $Tbl0,$Tbl0,`128-64-8+2048+256`
139         mtlr    r0
140         blr
141         .long   0
142         .byte   0,12,0x14,0,0,0,0,0
143         .space  `128-64-9*4`
144 ___
145 &_data_word(
146         0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
147         0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
148         0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
149         0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
150         0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
151         0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
152         0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
153         0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
154         0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
155         0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
156         0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
157         0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
158         0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
159         0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
160         0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
161         0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
162         0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
163         0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
164         0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
165         0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
166         0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
167         0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
168         0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
169         0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
170         0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
171         0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
172         0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
173         0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
174         0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
175         0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
176         0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
177         0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
178         0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
179         0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
180         0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
181         0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
182         0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
183         0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
184         0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
185         0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
186         0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
187         0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
188         0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
189         0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
190         0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
191         0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
192         0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
193         0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
194         0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
195         0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
196         0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
197         0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
198         0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
199         0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
200         0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
201         0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
202         0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
203         0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
204         0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
205         0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
206         0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
207         0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
208         0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
209         0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
210 $code.=<<___;
211 .byte   0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
212 .byte   0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
213 .byte   0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
214 .byte   0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
215 .byte   0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
216 .byte   0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
217 .byte   0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
218 .byte   0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
219 .byte   0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
220 .byte   0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
221 .byte   0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
222 .byte   0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
223 .byte   0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
224 .byte   0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
225 .byte   0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
226 .byte   0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
227 .byte   0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
228 .byte   0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
229 .byte   0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
230 .byte   0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
231 .byte   0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
232 .byte   0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
233 .byte   0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
234 .byte   0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
235 .byte   0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
236 .byte   0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
237 .byte   0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
238 .byte   0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
239 .byte   0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
240 .byte   0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
241 .byte   0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
242 .byte   0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
243 ___
244 &_data_word(
245         0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
246         0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
247         0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
248         0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
249         0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
250         0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
251         0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
252         0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
253         0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
254         0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
255         0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
256         0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
257         0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
258         0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
259         0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
260         0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
261         0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
262         0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
263         0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
264         0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
265         0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
266         0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
267         0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
268         0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
269         0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
270         0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
271         0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
272         0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
273         0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
274         0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
275         0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
276         0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
277         0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
278         0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
279         0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
280         0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
281         0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
282         0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
283         0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
284         0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
285         0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
286         0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
287         0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
288         0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
289         0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
290         0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
291         0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
292         0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
293         0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
294         0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
295         0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
296         0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
297         0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
298         0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
299         0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
300         0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
301         0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
302         0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
303         0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
304         0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
305         0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
306         0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
307         0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
308         0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
309 $code.=<<___;
310 .byte   0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
311 .byte   0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
312 .byte   0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
313 .byte   0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
314 .byte   0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
315 .byte   0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
316 .byte   0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
317 .byte   0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
318 .byte   0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
319 .byte   0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
320 .byte   0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
321 .byte   0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
322 .byte   0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
323 .byte   0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
324 .byte   0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
325 .byte   0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
326 .byte   0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
327 .byte   0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
328 .byte   0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
329 .byte   0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
330 .byte   0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
331 .byte   0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
332 .byte   0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
333 .byte   0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
334 .byte   0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
335 .byte   0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
336 .byte   0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
337 .byte   0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
338 .byte   0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
339 .byte   0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
340 .byte   0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
341 .byte   0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
342
343
344 .globl  .AES_encrypt
345 .align  7
346 .AES_encrypt:
347         $STU    $sp,-$FRAME($sp)
348         mflr    r0
349
350         $PUSH   $out,`$FRAME-$SIZE_T*19`($sp)
351         $PUSH   r14,`$FRAME-$SIZE_T*18`($sp)
352         $PUSH   r15,`$FRAME-$SIZE_T*17`($sp)
353         $PUSH   r16,`$FRAME-$SIZE_T*16`($sp)
354         $PUSH   r17,`$FRAME-$SIZE_T*15`($sp)
355         $PUSH   r18,`$FRAME-$SIZE_T*14`($sp)
356         $PUSH   r19,`$FRAME-$SIZE_T*13`($sp)
357         $PUSH   r20,`$FRAME-$SIZE_T*12`($sp)
358         $PUSH   r21,`$FRAME-$SIZE_T*11`($sp)
359         $PUSH   r22,`$FRAME-$SIZE_T*10`($sp)
360         $PUSH   r23,`$FRAME-$SIZE_T*9`($sp)
361         $PUSH   r24,`$FRAME-$SIZE_T*8`($sp)
362         $PUSH   r25,`$FRAME-$SIZE_T*7`($sp)
363         $PUSH   r26,`$FRAME-$SIZE_T*6`($sp)
364         $PUSH   r27,`$FRAME-$SIZE_T*5`($sp)
365         $PUSH   r28,`$FRAME-$SIZE_T*4`($sp)
366         $PUSH   r29,`$FRAME-$SIZE_T*3`($sp)
367         $PUSH   r30,`$FRAME-$SIZE_T*2`($sp)
368         $PUSH   r31,`$FRAME-$SIZE_T*1`($sp)
369         $PUSH   r0,`$FRAME+$LRSAVE`($sp)
370
371         andi.   $t0,$inp,3
372         andi.   $t1,$out,3
373         or.     $t0,$t0,$t1
374         bne     Lenc_unaligned
375
376 Lenc_unaligned_ok:
377 ___
378 $code.=<<___ if (!$LITTLE_ENDIAN);
379         lwz     $s0,0($inp)
380         lwz     $s1,4($inp)
381         lwz     $s2,8($inp)
382         lwz     $s3,12($inp)
383 ___
384 $code.=<<___ if ($LITTLE_ENDIAN);
385         lwz     $t0,0($inp)
386         lwz     $t1,4($inp)
387         lwz     $t2,8($inp)
388         lwz     $t3,12($inp)
389         rotlwi  $s0,$t0,8
390         rotlwi  $s1,$t1,8
391         rotlwi  $s2,$t2,8
392         rotlwi  $s3,$t3,8
393         rlwimi  $s0,$t0,24,0,7
394         rlwimi  $s1,$t1,24,0,7
395         rlwimi  $s2,$t2,24,0,7
396         rlwimi  $s3,$t3,24,0,7
397         rlwimi  $s0,$t0,24,16,23
398         rlwimi  $s1,$t1,24,16,23
399         rlwimi  $s2,$t2,24,16,23
400         rlwimi  $s3,$t3,24,16,23
401 ___
402 $code.=<<___;
403         bl      LAES_Te
404         bl      Lppc_AES_encrypt_compact
405         $POP    $out,`$FRAME-$SIZE_T*19`($sp)
406 ___
407 $code.=<<___ if ($LITTLE_ENDIAN);
408         rotlwi  $t0,$s0,8
409         rotlwi  $t1,$s1,8
410         rotlwi  $t2,$s2,8
411         rotlwi  $t3,$s3,8
412         rlwimi  $t0,$s0,24,0,7
413         rlwimi  $t1,$s1,24,0,7
414         rlwimi  $t2,$s2,24,0,7
415         rlwimi  $t3,$s3,24,0,7
416         rlwimi  $t0,$s0,24,16,23
417         rlwimi  $t1,$s1,24,16,23
418         rlwimi  $t2,$s2,24,16,23
419         rlwimi  $t3,$s3,24,16,23
420         stw     $t0,0($out)
421         stw     $t1,4($out)
422         stw     $t2,8($out)
423         stw     $t3,12($out)
424 ___
425 $code.=<<___ if (!$LITTLE_ENDIAN);
426         stw     $s0,0($out)
427         stw     $s1,4($out)
428         stw     $s2,8($out)
429         stw     $s3,12($out)
430 ___
431 $code.=<<___;
432         b       Lenc_done
433
434 Lenc_unaligned:
435         subfic  $t0,$inp,4096
436         subfic  $t1,$out,4096
437         andi.   $t0,$t0,4096-16
438         beq     Lenc_xpage
439         andi.   $t1,$t1,4096-16
440         bne     Lenc_unaligned_ok
441
442 Lenc_xpage:
443         lbz     $acc00,0($inp)
444         lbz     $acc01,1($inp)
445         lbz     $acc02,2($inp)
446         lbz     $s0,3($inp)
447         lbz     $acc04,4($inp)
448         lbz     $acc05,5($inp)
449         lbz     $acc06,6($inp)
450         lbz     $s1,7($inp)
451         lbz     $acc08,8($inp)
452         lbz     $acc09,9($inp)
453         lbz     $acc10,10($inp)
454         insrwi  $s0,$acc00,8,0
455         lbz     $s2,11($inp)
456         insrwi  $s1,$acc04,8,0
457         lbz     $acc12,12($inp)
458         insrwi  $s0,$acc01,8,8
459         lbz     $acc13,13($inp)
460         insrwi  $s1,$acc05,8,8
461         lbz     $acc14,14($inp)
462         insrwi  $s0,$acc02,8,16
463         lbz     $s3,15($inp)
464         insrwi  $s1,$acc06,8,16
465         insrwi  $s2,$acc08,8,0
466         insrwi  $s3,$acc12,8,0
467         insrwi  $s2,$acc09,8,8
468         insrwi  $s3,$acc13,8,8
469         insrwi  $s2,$acc10,8,16
470         insrwi  $s3,$acc14,8,16
471
472         bl      LAES_Te
473         bl      Lppc_AES_encrypt_compact
474         $POP    $out,`$FRAME-$SIZE_T*19`($sp)
475
476         extrwi  $acc00,$s0,8,0
477         extrwi  $acc01,$s0,8,8
478         stb     $acc00,0($out)
479         extrwi  $acc02,$s0,8,16
480         stb     $acc01,1($out)
481         stb     $acc02,2($out)
482         extrwi  $acc04,$s1,8,0
483         stb     $s0,3($out)
484         extrwi  $acc05,$s1,8,8
485         stb     $acc04,4($out)
486         extrwi  $acc06,$s1,8,16
487         stb     $acc05,5($out)
488         stb     $acc06,6($out)
489         extrwi  $acc08,$s2,8,0
490         stb     $s1,7($out)
491         extrwi  $acc09,$s2,8,8
492         stb     $acc08,8($out)
493         extrwi  $acc10,$s2,8,16
494         stb     $acc09,9($out)
495         stb     $acc10,10($out)
496         extrwi  $acc12,$s3,8,0
497         stb     $s2,11($out)
498         extrwi  $acc13,$s3,8,8
499         stb     $acc12,12($out)
500         extrwi  $acc14,$s3,8,16
501         stb     $acc13,13($out)
502         stb     $acc14,14($out)
503         stb     $s3,15($out)
504
505 Lenc_done:
506         $POP    r0,`$FRAME+$LRSAVE`($sp)
507         $POP    r14,`$FRAME-$SIZE_T*18`($sp)
508         $POP    r15,`$FRAME-$SIZE_T*17`($sp)
509         $POP    r16,`$FRAME-$SIZE_T*16`($sp)
510         $POP    r17,`$FRAME-$SIZE_T*15`($sp)
511         $POP    r18,`$FRAME-$SIZE_T*14`($sp)
512         $POP    r19,`$FRAME-$SIZE_T*13`($sp)
513         $POP    r20,`$FRAME-$SIZE_T*12`($sp)
514         $POP    r21,`$FRAME-$SIZE_T*11`($sp)
515         $POP    r22,`$FRAME-$SIZE_T*10`($sp)
516         $POP    r23,`$FRAME-$SIZE_T*9`($sp)
517         $POP    r24,`$FRAME-$SIZE_T*8`($sp)
518         $POP    r25,`$FRAME-$SIZE_T*7`($sp)
519         $POP    r26,`$FRAME-$SIZE_T*6`($sp)
520         $POP    r27,`$FRAME-$SIZE_T*5`($sp)
521         $POP    r28,`$FRAME-$SIZE_T*4`($sp)
522         $POP    r29,`$FRAME-$SIZE_T*3`($sp)
523         $POP    r30,`$FRAME-$SIZE_T*2`($sp)
524         $POP    r31,`$FRAME-$SIZE_T*1`($sp)
525         mtlr    r0
526         addi    $sp,$sp,$FRAME
527         blr
528         .long   0
529         .byte   0,12,4,1,0x80,18,3,0
530         .long   0
531
532 .align  5
533 Lppc_AES_encrypt:
534         lwz     $acc00,240($key)
535         addi    $Tbl1,$Tbl0,3
536         lwz     $t0,0($key)
537         addi    $Tbl2,$Tbl0,2
538         lwz     $t1,4($key)
539         addi    $Tbl3,$Tbl0,1
540         lwz     $t2,8($key)
541         addi    $acc00,$acc00,-1
542         lwz     $t3,12($key)
543         addi    $key,$key,16
544         xor     $s0,$s0,$t0
545         xor     $s1,$s1,$t1
546         xor     $s2,$s2,$t2
547         xor     $s3,$s3,$t3
548         mtctr   $acc00
549 .align  4
550 Lenc_loop:
551         rlwinm  $acc00,$s0,`32-24+3`,21,28
552         rlwinm  $acc01,$s1,`32-24+3`,21,28
553         rlwinm  $acc02,$s2,`32-24+3`,21,28
554         rlwinm  $acc03,$s3,`32-24+3`,21,28
555         lwz     $t0,0($key)
556         rlwinm  $acc04,$s1,`32-16+3`,21,28
557         lwz     $t1,4($key)
558         rlwinm  $acc05,$s2,`32-16+3`,21,28
559         lwz     $t2,8($key)
560         rlwinm  $acc06,$s3,`32-16+3`,21,28
561         lwz     $t3,12($key)
562         rlwinm  $acc07,$s0,`32-16+3`,21,28
563         lwzx    $acc00,$Tbl0,$acc00
564         rlwinm  $acc08,$s2,`32-8+3`,21,28
565         lwzx    $acc01,$Tbl0,$acc01
566         rlwinm  $acc09,$s3,`32-8+3`,21,28
567         lwzx    $acc02,$Tbl0,$acc02
568         rlwinm  $acc10,$s0,`32-8+3`,21,28
569         lwzx    $acc03,$Tbl0,$acc03
570         rlwinm  $acc11,$s1,`32-8+3`,21,28
571         lwzx    $acc04,$Tbl1,$acc04
572         rlwinm  $acc12,$s3,`0+3`,21,28
573         lwzx    $acc05,$Tbl1,$acc05
574         rlwinm  $acc13,$s0,`0+3`,21,28
575         lwzx    $acc06,$Tbl1,$acc06
576         rlwinm  $acc14,$s1,`0+3`,21,28
577         lwzx    $acc07,$Tbl1,$acc07
578         rlwinm  $acc15,$s2,`0+3`,21,28
579         lwzx    $acc08,$Tbl2,$acc08
580         xor     $t0,$t0,$acc00
581         lwzx    $acc09,$Tbl2,$acc09
582         xor     $t1,$t1,$acc01
583         lwzx    $acc10,$Tbl2,$acc10
584         xor     $t2,$t2,$acc02
585         lwzx    $acc11,$Tbl2,$acc11
586         xor     $t3,$t3,$acc03
587         lwzx    $acc12,$Tbl3,$acc12
588         xor     $t0,$t0,$acc04
589         lwzx    $acc13,$Tbl3,$acc13
590         xor     $t1,$t1,$acc05
591         lwzx    $acc14,$Tbl3,$acc14
592         xor     $t2,$t2,$acc06
593         lwzx    $acc15,$Tbl3,$acc15
594         xor     $t3,$t3,$acc07
595         xor     $t0,$t0,$acc08
596         xor     $t1,$t1,$acc09
597         xor     $t2,$t2,$acc10
598         xor     $t3,$t3,$acc11
599         xor     $s0,$t0,$acc12
600         xor     $s1,$t1,$acc13
601         xor     $s2,$t2,$acc14
602         xor     $s3,$t3,$acc15
603         addi    $key,$key,16
604         bdnz    Lenc_loop
605
606         addi    $Tbl2,$Tbl0,2048
607         nop
608         lwz     $t0,0($key)
609         rlwinm  $acc00,$s0,`32-24`,24,31
610         lwz     $t1,4($key)
611         rlwinm  $acc01,$s1,`32-24`,24,31
612         lwz     $t2,8($key)
613         rlwinm  $acc02,$s2,`32-24`,24,31
614         lwz     $t3,12($key)
615         rlwinm  $acc03,$s3,`32-24`,24,31
616         lwz     $acc08,`2048+0`($Tbl0)  ! prefetch Te4
617         rlwinm  $acc04,$s1,`32-16`,24,31
618         lwz     $acc09,`2048+32`($Tbl0)
619         rlwinm  $acc05,$s2,`32-16`,24,31
620         lwz     $acc10,`2048+64`($Tbl0)
621         rlwinm  $acc06,$s3,`32-16`,24,31
622         lwz     $acc11,`2048+96`($Tbl0)
623         rlwinm  $acc07,$s0,`32-16`,24,31
624         lwz     $acc12,`2048+128`($Tbl0)
625         rlwinm  $acc08,$s2,`32-8`,24,31
626         lwz     $acc13,`2048+160`($Tbl0)
627         rlwinm  $acc09,$s3,`32-8`,24,31
628         lwz     $acc14,`2048+192`($Tbl0)
629         rlwinm  $acc10,$s0,`32-8`,24,31
630         lwz     $acc15,`2048+224`($Tbl0)
631         rlwinm  $acc11,$s1,`32-8`,24,31
632         lbzx    $acc00,$Tbl2,$acc00
633         rlwinm  $acc12,$s3,`0`,24,31
634         lbzx    $acc01,$Tbl2,$acc01
635         rlwinm  $acc13,$s0,`0`,24,31
636         lbzx    $acc02,$Tbl2,$acc02
637         rlwinm  $acc14,$s1,`0`,24,31
638         lbzx    $acc03,$Tbl2,$acc03
639         rlwinm  $acc15,$s2,`0`,24,31
640         lbzx    $acc04,$Tbl2,$acc04
641         rlwinm  $s0,$acc00,24,0,7
642         lbzx    $acc05,$Tbl2,$acc05
643         rlwinm  $s1,$acc01,24,0,7
644         lbzx    $acc06,$Tbl2,$acc06
645         rlwinm  $s2,$acc02,24,0,7
646         lbzx    $acc07,$Tbl2,$acc07
647         rlwinm  $s3,$acc03,24,0,7
648         lbzx    $acc08,$Tbl2,$acc08
649         rlwimi  $s0,$acc04,16,8,15
650         lbzx    $acc09,$Tbl2,$acc09
651         rlwimi  $s1,$acc05,16,8,15
652         lbzx    $acc10,$Tbl2,$acc10
653         rlwimi  $s2,$acc06,16,8,15
654         lbzx    $acc11,$Tbl2,$acc11
655         rlwimi  $s3,$acc07,16,8,15
656         lbzx    $acc12,$Tbl2,$acc12
657         rlwimi  $s0,$acc08,8,16,23
658         lbzx    $acc13,$Tbl2,$acc13
659         rlwimi  $s1,$acc09,8,16,23
660         lbzx    $acc14,$Tbl2,$acc14
661         rlwimi  $s2,$acc10,8,16,23
662         lbzx    $acc15,$Tbl2,$acc15
663         rlwimi  $s3,$acc11,8,16,23
664         or      $s0,$s0,$acc12
665         or      $s1,$s1,$acc13
666         or      $s2,$s2,$acc14
667         or      $s3,$s3,$acc15
668         xor     $s0,$s0,$t0
669         xor     $s1,$s1,$t1
670         xor     $s2,$s2,$t2
671         xor     $s3,$s3,$t3
672         blr
673         .long   0
674         .byte   0,12,0x14,0,0,0,0,0
675
676 .align  4
677 Lppc_AES_encrypt_compact:
678         lwz     $acc00,240($key)
679         addi    $Tbl1,$Tbl0,2048
680         lwz     $t0,0($key)
681         lis     $mask80,0x8080
682         lwz     $t1,4($key)
683         lis     $mask1b,0x1b1b
684         lwz     $t2,8($key)
685         ori     $mask80,$mask80,0x8080
686         lwz     $t3,12($key)
687         ori     $mask1b,$mask1b,0x1b1b
688         addi    $key,$key,16
689         mtctr   $acc00
690 .align  4
691 Lenc_compact_loop:
692         xor     $s0,$s0,$t0
693         xor     $s1,$s1,$t1
694         rlwinm  $acc00,$s0,`32-24`,24,31
695         xor     $s2,$s2,$t2
696         rlwinm  $acc01,$s1,`32-24`,24,31
697         xor     $s3,$s3,$t3
698         rlwinm  $acc02,$s2,`32-24`,24,31
699         rlwinm  $acc03,$s3,`32-24`,24,31
700         rlwinm  $acc04,$s1,`32-16`,24,31
701         rlwinm  $acc05,$s2,`32-16`,24,31
702         rlwinm  $acc06,$s3,`32-16`,24,31
703         rlwinm  $acc07,$s0,`32-16`,24,31
704         lbzx    $acc00,$Tbl1,$acc00
705         rlwinm  $acc08,$s2,`32-8`,24,31
706         lbzx    $acc01,$Tbl1,$acc01
707         rlwinm  $acc09,$s3,`32-8`,24,31
708         lbzx    $acc02,$Tbl1,$acc02
709         rlwinm  $acc10,$s0,`32-8`,24,31
710         lbzx    $acc03,$Tbl1,$acc03
711         rlwinm  $acc11,$s1,`32-8`,24,31
712         lbzx    $acc04,$Tbl1,$acc04
713         rlwinm  $acc12,$s3,`0`,24,31
714         lbzx    $acc05,$Tbl1,$acc05
715         rlwinm  $acc13,$s0,`0`,24,31
716         lbzx    $acc06,$Tbl1,$acc06
717         rlwinm  $acc14,$s1,`0`,24,31
718         lbzx    $acc07,$Tbl1,$acc07
719         rlwinm  $acc15,$s2,`0`,24,31
720         lbzx    $acc08,$Tbl1,$acc08
721         rlwinm  $s0,$acc00,24,0,7
722         lbzx    $acc09,$Tbl1,$acc09
723         rlwinm  $s1,$acc01,24,0,7
724         lbzx    $acc10,$Tbl1,$acc10
725         rlwinm  $s2,$acc02,24,0,7
726         lbzx    $acc11,$Tbl1,$acc11
727         rlwinm  $s3,$acc03,24,0,7
728         lbzx    $acc12,$Tbl1,$acc12
729         rlwimi  $s0,$acc04,16,8,15
730         lbzx    $acc13,$Tbl1,$acc13
731         rlwimi  $s1,$acc05,16,8,15
732         lbzx    $acc14,$Tbl1,$acc14
733         rlwimi  $s2,$acc06,16,8,15
734         lbzx    $acc15,$Tbl1,$acc15
735         rlwimi  $s3,$acc07,16,8,15
736         rlwimi  $s0,$acc08,8,16,23
737         rlwimi  $s1,$acc09,8,16,23
738         rlwimi  $s2,$acc10,8,16,23
739         rlwimi  $s3,$acc11,8,16,23
740         lwz     $t0,0($key)
741         or      $s0,$s0,$acc12
742         lwz     $t1,4($key)
743         or      $s1,$s1,$acc13
744         lwz     $t2,8($key)
745         or      $s2,$s2,$acc14
746         lwz     $t3,12($key)
747         or      $s3,$s3,$acc15
748
749         addi    $key,$key,16
750         bdz     Lenc_compact_done
751
752         and     $acc00,$s0,$mask80      # r1=r0&0x80808080
753         and     $acc01,$s1,$mask80
754         and     $acc02,$s2,$mask80
755         and     $acc03,$s3,$mask80
756         srwi    $acc04,$acc00,7         # r1>>7
757         andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
758         srwi    $acc05,$acc01,7
759         andc    $acc09,$s1,$mask80
760         srwi    $acc06,$acc02,7
761         andc    $acc10,$s2,$mask80
762         srwi    $acc07,$acc03,7
763         andc    $acc11,$s3,$mask80
764         sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
765         sub     $acc01,$acc01,$acc05
766         sub     $acc02,$acc02,$acc06
767         sub     $acc03,$acc03,$acc07
768         add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
769         add     $acc09,$acc09,$acc09
770         add     $acc10,$acc10,$acc10
771         add     $acc11,$acc11,$acc11
772         and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
773         and     $acc01,$acc01,$mask1b
774         and     $acc02,$acc02,$mask1b
775         and     $acc03,$acc03,$mask1b
776         xor     $acc00,$acc00,$acc08    # r2
777         xor     $acc01,$acc01,$acc09
778          rotlwi $acc12,$s0,16           # ROTATE(r0,16)
779         xor     $acc02,$acc02,$acc10
780          rotlwi $acc13,$s1,16
781         xor     $acc03,$acc03,$acc11
782          rotlwi $acc14,$s2,16
783
784         xor     $s0,$s0,$acc00          # r0^r2
785         rotlwi  $acc15,$s3,16
786         xor     $s1,$s1,$acc01
787         rotrwi  $s0,$s0,24              # ROTATE(r2^r0,24)
788         xor     $s2,$s2,$acc02
789         rotrwi  $s1,$s1,24
790         xor     $s3,$s3,$acc03
791         rotrwi  $s2,$s2,24
792         xor     $s0,$s0,$acc00          # ROTATE(r2^r0,24)^r2
793         rotrwi  $s3,$s3,24
794         xor     $s1,$s1,$acc01
795         xor     $s2,$s2,$acc02
796         xor     $s3,$s3,$acc03
797         rotlwi  $acc08,$acc12,8         # ROTATE(r0,24)
798         xor     $s0,$s0,$acc12          #
799         rotlwi  $acc09,$acc13,8
800         xor     $s1,$s1,$acc13
801         rotlwi  $acc10,$acc14,8
802         xor     $s2,$s2,$acc14
803         rotlwi  $acc11,$acc15,8
804         xor     $s3,$s3,$acc15
805         xor     $s0,$s0,$acc08          #
806         xor     $s1,$s1,$acc09
807         xor     $s2,$s2,$acc10
808         xor     $s3,$s3,$acc11
809
810         b       Lenc_compact_loop
811 .align  4
812 Lenc_compact_done:
813         xor     $s0,$s0,$t0
814         xor     $s1,$s1,$t1
815         xor     $s2,$s2,$t2
816         xor     $s3,$s3,$t3
817         blr
818         .long   0
819         .byte   0,12,0x14,0,0,0,0,0
820 .size   .AES_encrypt,.-.AES_encrypt
821
822 .globl  .AES_decrypt
823 .align  7
824 .AES_decrypt:
825         $STU    $sp,-$FRAME($sp)
826         mflr    r0
827
828         $PUSH   $out,`$FRAME-$SIZE_T*19`($sp)
829         $PUSH   r14,`$FRAME-$SIZE_T*18`($sp)
830         $PUSH   r15,`$FRAME-$SIZE_T*17`($sp)
831         $PUSH   r16,`$FRAME-$SIZE_T*16`($sp)
832         $PUSH   r17,`$FRAME-$SIZE_T*15`($sp)
833         $PUSH   r18,`$FRAME-$SIZE_T*14`($sp)
834         $PUSH   r19,`$FRAME-$SIZE_T*13`($sp)
835         $PUSH   r20,`$FRAME-$SIZE_T*12`($sp)
836         $PUSH   r21,`$FRAME-$SIZE_T*11`($sp)
837         $PUSH   r22,`$FRAME-$SIZE_T*10`($sp)
838         $PUSH   r23,`$FRAME-$SIZE_T*9`($sp)
839         $PUSH   r24,`$FRAME-$SIZE_T*8`($sp)
840         $PUSH   r25,`$FRAME-$SIZE_T*7`($sp)
841         $PUSH   r26,`$FRAME-$SIZE_T*6`($sp)
842         $PUSH   r27,`$FRAME-$SIZE_T*5`($sp)
843         $PUSH   r28,`$FRAME-$SIZE_T*4`($sp)
844         $PUSH   r29,`$FRAME-$SIZE_T*3`($sp)
845         $PUSH   r30,`$FRAME-$SIZE_T*2`($sp)
846         $PUSH   r31,`$FRAME-$SIZE_T*1`($sp)
847         $PUSH   r0,`$FRAME+$LRSAVE`($sp)
848
849         andi.   $t0,$inp,3
850         andi.   $t1,$out,3
851         or.     $t0,$t0,$t1
852         bne     Ldec_unaligned
853
854 Ldec_unaligned_ok:
855 ___
856 $code.=<<___ if (!$LITTLE_ENDIAN);
857         lwz     $s0,0($inp)
858         lwz     $s1,4($inp)
859         lwz     $s2,8($inp)
860         lwz     $s3,12($inp)
861 ___
862 $code.=<<___ if ($LITTLE_ENDIAN);
863         lwz     $t0,0($inp)
864         lwz     $t1,4($inp)
865         lwz     $t2,8($inp)
866         lwz     $t3,12($inp)
867         rotlwi  $s0,$t0,8
868         rotlwi  $s1,$t1,8
869         rotlwi  $s2,$t2,8
870         rotlwi  $s3,$t3,8
871         rlwimi  $s0,$t0,24,0,7
872         rlwimi  $s1,$t1,24,0,7
873         rlwimi  $s2,$t2,24,0,7
874         rlwimi  $s3,$t3,24,0,7
875         rlwimi  $s0,$t0,24,16,23
876         rlwimi  $s1,$t1,24,16,23
877         rlwimi  $s2,$t2,24,16,23
878         rlwimi  $s3,$t3,24,16,23
879 ___
880 $code.=<<___;
881         bl      LAES_Td
882         bl      Lppc_AES_decrypt_compact
883         $POP    $out,`$FRAME-$SIZE_T*19`($sp)
884 ___
885 $code.=<<___ if ($LITTLE_ENDIAN);
886         rotlwi  $t0,$s0,8
887         rotlwi  $t1,$s1,8
888         rotlwi  $t2,$s2,8
889         rotlwi  $t3,$s3,8
890         rlwimi  $t0,$s0,24,0,7
891         rlwimi  $t1,$s1,24,0,7
892         rlwimi  $t2,$s2,24,0,7
893         rlwimi  $t3,$s3,24,0,7
894         rlwimi  $t0,$s0,24,16,23
895         rlwimi  $t1,$s1,24,16,23
896         rlwimi  $t2,$s2,24,16,23
897         rlwimi  $t3,$s3,24,16,23
898         stw     $t0,0($out)
899         stw     $t1,4($out)
900         stw     $t2,8($out)
901         stw     $t3,12($out)
902 ___
903 $code.=<<___ if (!$LITTLE_ENDIAN);
904         stw     $s0,0($out)
905         stw     $s1,4($out)
906         stw     $s2,8($out)
907         stw     $s3,12($out)
908 ___
909 $code.=<<___;
910         b       Ldec_done
911
912 Ldec_unaligned:
913         subfic  $t0,$inp,4096
914         subfic  $t1,$out,4096
915         andi.   $t0,$t0,4096-16
916         beq     Ldec_xpage
917         andi.   $t1,$t1,4096-16
918         bne     Ldec_unaligned_ok
919
920 Ldec_xpage:
921         lbz     $acc00,0($inp)
922         lbz     $acc01,1($inp)
923         lbz     $acc02,2($inp)
924         lbz     $s0,3($inp)
925         lbz     $acc04,4($inp)
926         lbz     $acc05,5($inp)
927         lbz     $acc06,6($inp)
928         lbz     $s1,7($inp)
929         lbz     $acc08,8($inp)
930         lbz     $acc09,9($inp)
931         lbz     $acc10,10($inp)
932         insrwi  $s0,$acc00,8,0
933         lbz     $s2,11($inp)
934         insrwi  $s1,$acc04,8,0
935         lbz     $acc12,12($inp)
936         insrwi  $s0,$acc01,8,8
937         lbz     $acc13,13($inp)
938         insrwi  $s1,$acc05,8,8
939         lbz     $acc14,14($inp)
940         insrwi  $s0,$acc02,8,16
941         lbz     $s3,15($inp)
942         insrwi  $s1,$acc06,8,16
943         insrwi  $s2,$acc08,8,0
944         insrwi  $s3,$acc12,8,0
945         insrwi  $s2,$acc09,8,8
946         insrwi  $s3,$acc13,8,8
947         insrwi  $s2,$acc10,8,16
948         insrwi  $s3,$acc14,8,16
949
950         bl      LAES_Td
951         bl      Lppc_AES_decrypt_compact
952         $POP    $out,`$FRAME-$SIZE_T*19`($sp)
953
954         extrwi  $acc00,$s0,8,0
955         extrwi  $acc01,$s0,8,8
956         stb     $acc00,0($out)
957         extrwi  $acc02,$s0,8,16
958         stb     $acc01,1($out)
959         stb     $acc02,2($out)
960         extrwi  $acc04,$s1,8,0
961         stb     $s0,3($out)
962         extrwi  $acc05,$s1,8,8
963         stb     $acc04,4($out)
964         extrwi  $acc06,$s1,8,16
965         stb     $acc05,5($out)
966         stb     $acc06,6($out)
967         extrwi  $acc08,$s2,8,0
968         stb     $s1,7($out)
969         extrwi  $acc09,$s2,8,8
970         stb     $acc08,8($out)
971         extrwi  $acc10,$s2,8,16
972         stb     $acc09,9($out)
973         stb     $acc10,10($out)
974         extrwi  $acc12,$s3,8,0
975         stb     $s2,11($out)
976         extrwi  $acc13,$s3,8,8
977         stb     $acc12,12($out)
978         extrwi  $acc14,$s3,8,16
979         stb     $acc13,13($out)
980         stb     $acc14,14($out)
981         stb     $s3,15($out)
982
983 Ldec_done:
984         $POP    r0,`$FRAME+$LRSAVE`($sp)
985         $POP    r14,`$FRAME-$SIZE_T*18`($sp)
986         $POP    r15,`$FRAME-$SIZE_T*17`($sp)
987         $POP    r16,`$FRAME-$SIZE_T*16`($sp)
988         $POP    r17,`$FRAME-$SIZE_T*15`($sp)
989         $POP    r18,`$FRAME-$SIZE_T*14`($sp)
990         $POP    r19,`$FRAME-$SIZE_T*13`($sp)
991         $POP    r20,`$FRAME-$SIZE_T*12`($sp)
992         $POP    r21,`$FRAME-$SIZE_T*11`($sp)
993         $POP    r22,`$FRAME-$SIZE_T*10`($sp)
994         $POP    r23,`$FRAME-$SIZE_T*9`($sp)
995         $POP    r24,`$FRAME-$SIZE_T*8`($sp)
996         $POP    r25,`$FRAME-$SIZE_T*7`($sp)
997         $POP    r26,`$FRAME-$SIZE_T*6`($sp)
998         $POP    r27,`$FRAME-$SIZE_T*5`($sp)
999         $POP    r28,`$FRAME-$SIZE_T*4`($sp)
1000         $POP    r29,`$FRAME-$SIZE_T*3`($sp)
1001         $POP    r30,`$FRAME-$SIZE_T*2`($sp)
1002         $POP    r31,`$FRAME-$SIZE_T*1`($sp)
1003         mtlr    r0
1004         addi    $sp,$sp,$FRAME
1005         blr
1006         .long   0
1007         .byte   0,12,4,1,0x80,18,3,0
1008         .long   0
1009
1010 .align  5
1011 Lppc_AES_decrypt:
1012         lwz     $acc00,240($key)
1013         addi    $Tbl1,$Tbl0,3
1014         lwz     $t0,0($key)
1015         addi    $Tbl2,$Tbl0,2
1016         lwz     $t1,4($key)
1017         addi    $Tbl3,$Tbl0,1
1018         lwz     $t2,8($key)
1019         addi    $acc00,$acc00,-1
1020         lwz     $t3,12($key)
1021         addi    $key,$key,16
1022         xor     $s0,$s0,$t0
1023         xor     $s1,$s1,$t1
1024         xor     $s2,$s2,$t2
1025         xor     $s3,$s3,$t3
1026         mtctr   $acc00
1027 .align  4
1028 Ldec_loop:
1029         rlwinm  $acc00,$s0,`32-24+3`,21,28
1030         rlwinm  $acc01,$s1,`32-24+3`,21,28
1031         rlwinm  $acc02,$s2,`32-24+3`,21,28
1032         rlwinm  $acc03,$s3,`32-24+3`,21,28
1033         lwz     $t0,0($key)
1034         rlwinm  $acc04,$s3,`32-16+3`,21,28
1035         lwz     $t1,4($key)
1036         rlwinm  $acc05,$s0,`32-16+3`,21,28
1037         lwz     $t2,8($key)
1038         rlwinm  $acc06,$s1,`32-16+3`,21,28
1039         lwz     $t3,12($key)
1040         rlwinm  $acc07,$s2,`32-16+3`,21,28
1041         lwzx    $acc00,$Tbl0,$acc00
1042         rlwinm  $acc08,$s2,`32-8+3`,21,28
1043         lwzx    $acc01,$Tbl0,$acc01
1044         rlwinm  $acc09,$s3,`32-8+3`,21,28
1045         lwzx    $acc02,$Tbl0,$acc02
1046         rlwinm  $acc10,$s0,`32-8+3`,21,28
1047         lwzx    $acc03,$Tbl0,$acc03
1048         rlwinm  $acc11,$s1,`32-8+3`,21,28
1049         lwzx    $acc04,$Tbl1,$acc04
1050         rlwinm  $acc12,$s1,`0+3`,21,28
1051         lwzx    $acc05,$Tbl1,$acc05
1052         rlwinm  $acc13,$s2,`0+3`,21,28
1053         lwzx    $acc06,$Tbl1,$acc06
1054         rlwinm  $acc14,$s3,`0+3`,21,28
1055         lwzx    $acc07,$Tbl1,$acc07
1056         rlwinm  $acc15,$s0,`0+3`,21,28
1057         lwzx    $acc08,$Tbl2,$acc08
1058         xor     $t0,$t0,$acc00
1059         lwzx    $acc09,$Tbl2,$acc09
1060         xor     $t1,$t1,$acc01
1061         lwzx    $acc10,$Tbl2,$acc10
1062         xor     $t2,$t2,$acc02
1063         lwzx    $acc11,$Tbl2,$acc11
1064         xor     $t3,$t3,$acc03
1065         lwzx    $acc12,$Tbl3,$acc12
1066         xor     $t0,$t0,$acc04
1067         lwzx    $acc13,$Tbl3,$acc13
1068         xor     $t1,$t1,$acc05
1069         lwzx    $acc14,$Tbl3,$acc14
1070         xor     $t2,$t2,$acc06
1071         lwzx    $acc15,$Tbl3,$acc15
1072         xor     $t3,$t3,$acc07
1073         xor     $t0,$t0,$acc08
1074         xor     $t1,$t1,$acc09
1075         xor     $t2,$t2,$acc10
1076         xor     $t3,$t3,$acc11
1077         xor     $s0,$t0,$acc12
1078         xor     $s1,$t1,$acc13
1079         xor     $s2,$t2,$acc14
1080         xor     $s3,$t3,$acc15
1081         addi    $key,$key,16
1082         bdnz    Ldec_loop
1083
1084         addi    $Tbl2,$Tbl0,2048
1085         nop
1086         lwz     $t0,0($key)
1087         rlwinm  $acc00,$s0,`32-24`,24,31
1088         lwz     $t1,4($key)
1089         rlwinm  $acc01,$s1,`32-24`,24,31
1090         lwz     $t2,8($key)
1091         rlwinm  $acc02,$s2,`32-24`,24,31
1092         lwz     $t3,12($key)
1093         rlwinm  $acc03,$s3,`32-24`,24,31
1094         lwz     $acc08,`2048+0`($Tbl0)  ! prefetch Td4
1095         rlwinm  $acc04,$s3,`32-16`,24,31
1096         lwz     $acc09,`2048+32`($Tbl0)
1097         rlwinm  $acc05,$s0,`32-16`,24,31
1098         lwz     $acc10,`2048+64`($Tbl0)
1099         lbzx    $acc00,$Tbl2,$acc00
1100         lwz     $acc11,`2048+96`($Tbl0)
1101         lbzx    $acc01,$Tbl2,$acc01
1102         lwz     $acc12,`2048+128`($Tbl0)
1103         rlwinm  $acc06,$s1,`32-16`,24,31
1104         lwz     $acc13,`2048+160`($Tbl0)
1105         rlwinm  $acc07,$s2,`32-16`,24,31
1106         lwz     $acc14,`2048+192`($Tbl0)
1107         rlwinm  $acc08,$s2,`32-8`,24,31
1108         lwz     $acc15,`2048+224`($Tbl0)
1109         rlwinm  $acc09,$s3,`32-8`,24,31
1110         lbzx    $acc02,$Tbl2,$acc02
1111         rlwinm  $acc10,$s0,`32-8`,24,31
1112         lbzx    $acc03,$Tbl2,$acc03
1113         rlwinm  $acc11,$s1,`32-8`,24,31
1114         lbzx    $acc04,$Tbl2,$acc04
1115         rlwinm  $acc12,$s1,`0`,24,31
1116         lbzx    $acc05,$Tbl2,$acc05
1117         rlwinm  $acc13,$s2,`0`,24,31
1118         lbzx    $acc06,$Tbl2,$acc06
1119         rlwinm  $acc14,$s3,`0`,24,31
1120         lbzx    $acc07,$Tbl2,$acc07
1121         rlwinm  $acc15,$s0,`0`,24,31
1122         lbzx    $acc08,$Tbl2,$acc08
1123         rlwinm  $s0,$acc00,24,0,7
1124         lbzx    $acc09,$Tbl2,$acc09
1125         rlwinm  $s1,$acc01,24,0,7
1126         lbzx    $acc10,$Tbl2,$acc10
1127         rlwinm  $s2,$acc02,24,0,7
1128         lbzx    $acc11,$Tbl2,$acc11
1129         rlwinm  $s3,$acc03,24,0,7
1130         lbzx    $acc12,$Tbl2,$acc12
1131         rlwimi  $s0,$acc04,16,8,15
1132         lbzx    $acc13,$Tbl2,$acc13
1133         rlwimi  $s1,$acc05,16,8,15
1134         lbzx    $acc14,$Tbl2,$acc14
1135         rlwimi  $s2,$acc06,16,8,15
1136         lbzx    $acc15,$Tbl2,$acc15
1137         rlwimi  $s3,$acc07,16,8,15
1138         rlwimi  $s0,$acc08,8,16,23
1139         rlwimi  $s1,$acc09,8,16,23
1140         rlwimi  $s2,$acc10,8,16,23
1141         rlwimi  $s3,$acc11,8,16,23
1142         or      $s0,$s0,$acc12
1143         or      $s1,$s1,$acc13
1144         or      $s2,$s2,$acc14
1145         or      $s3,$s3,$acc15
1146         xor     $s0,$s0,$t0
1147         xor     $s1,$s1,$t1
1148         xor     $s2,$s2,$t2
1149         xor     $s3,$s3,$t3
1150         blr
1151         .long   0
1152         .byte   0,12,0x14,0,0,0,0,0
1153
1154 .align  4
1155 Lppc_AES_decrypt_compact:
1156         lwz     $acc00,240($key)
1157         addi    $Tbl1,$Tbl0,2048
1158         lwz     $t0,0($key)
1159         lis     $mask80,0x8080
1160         lwz     $t1,4($key)
1161         lis     $mask1b,0x1b1b
1162         lwz     $t2,8($key)
1163         ori     $mask80,$mask80,0x8080
1164         lwz     $t3,12($key)
1165         ori     $mask1b,$mask1b,0x1b1b
1166         addi    $key,$key,16
1167 ___
1168 $code.=<<___ if ($SIZE_T==8);
1169         insrdi  $mask80,$mask80,32,0
1170         insrdi  $mask1b,$mask1b,32,0
1171 ___
1172 $code.=<<___;
1173         mtctr   $acc00
1174 .align  4
1175 Ldec_compact_loop:
1176         xor     $s0,$s0,$t0
1177         xor     $s1,$s1,$t1
1178         rlwinm  $acc00,$s0,`32-24`,24,31
1179         xor     $s2,$s2,$t2
1180         rlwinm  $acc01,$s1,`32-24`,24,31
1181         xor     $s3,$s3,$t3
1182         rlwinm  $acc02,$s2,`32-24`,24,31
1183         rlwinm  $acc03,$s3,`32-24`,24,31
1184         rlwinm  $acc04,$s3,`32-16`,24,31
1185         rlwinm  $acc05,$s0,`32-16`,24,31
1186         rlwinm  $acc06,$s1,`32-16`,24,31
1187         rlwinm  $acc07,$s2,`32-16`,24,31
1188         lbzx    $acc00,$Tbl1,$acc00
1189         rlwinm  $acc08,$s2,`32-8`,24,31
1190         lbzx    $acc01,$Tbl1,$acc01
1191         rlwinm  $acc09,$s3,`32-8`,24,31
1192         lbzx    $acc02,$Tbl1,$acc02
1193         rlwinm  $acc10,$s0,`32-8`,24,31
1194         lbzx    $acc03,$Tbl1,$acc03
1195         rlwinm  $acc11,$s1,`32-8`,24,31
1196         lbzx    $acc04,$Tbl1,$acc04
1197         rlwinm  $acc12,$s1,`0`,24,31
1198         lbzx    $acc05,$Tbl1,$acc05
1199         rlwinm  $acc13,$s2,`0`,24,31
1200         lbzx    $acc06,$Tbl1,$acc06
1201         rlwinm  $acc14,$s3,`0`,24,31
1202         lbzx    $acc07,$Tbl1,$acc07
1203         rlwinm  $acc15,$s0,`0`,24,31
1204         lbzx    $acc08,$Tbl1,$acc08
1205         rlwinm  $s0,$acc00,24,0,7
1206         lbzx    $acc09,$Tbl1,$acc09
1207         rlwinm  $s1,$acc01,24,0,7
1208         lbzx    $acc10,$Tbl1,$acc10
1209         rlwinm  $s2,$acc02,24,0,7
1210         lbzx    $acc11,$Tbl1,$acc11
1211         rlwinm  $s3,$acc03,24,0,7
1212         lbzx    $acc12,$Tbl1,$acc12
1213         rlwimi  $s0,$acc04,16,8,15
1214         lbzx    $acc13,$Tbl1,$acc13
1215         rlwimi  $s1,$acc05,16,8,15
1216         lbzx    $acc14,$Tbl1,$acc14
1217         rlwimi  $s2,$acc06,16,8,15
1218         lbzx    $acc15,$Tbl1,$acc15
1219         rlwimi  $s3,$acc07,16,8,15
1220         rlwimi  $s0,$acc08,8,16,23
1221         rlwimi  $s1,$acc09,8,16,23
1222         rlwimi  $s2,$acc10,8,16,23
1223         rlwimi  $s3,$acc11,8,16,23
1224         lwz     $t0,0($key)
1225         or      $s0,$s0,$acc12
1226         lwz     $t1,4($key)
1227         or      $s1,$s1,$acc13
1228         lwz     $t2,8($key)
1229         or      $s2,$s2,$acc14
1230         lwz     $t3,12($key)
1231         or      $s3,$s3,$acc15
1232
1233         addi    $key,$key,16
1234         bdz     Ldec_compact_done
1235 ___
1236 $code.=<<___ if ($SIZE_T==8);
1237         # vectorized permutation improves decrypt performance by 10%
1238         insrdi  $s0,$s1,32,0
1239         insrdi  $s2,$s3,32,0
1240
1241         and     $acc00,$s0,$mask80      # r1=r0&0x80808080
1242         and     $acc02,$s2,$mask80
1243         srdi    $acc04,$acc00,7         # r1>>7
1244         srdi    $acc06,$acc02,7
1245         andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
1246         andc    $acc10,$s2,$mask80
1247         sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
1248         sub     $acc02,$acc02,$acc06
1249         add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
1250         add     $acc10,$acc10,$acc10
1251         and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1252         and     $acc02,$acc02,$mask1b
1253         xor     $acc00,$acc00,$acc08    # r2
1254         xor     $acc02,$acc02,$acc10
1255
1256         and     $acc04,$acc00,$mask80   # r1=r2&0x80808080
1257         and     $acc06,$acc02,$mask80
1258         srdi    $acc08,$acc04,7         # r1>>7
1259         srdi    $acc10,$acc06,7
1260         andc    $acc12,$acc00,$mask80   # r2&0x7f7f7f7f
1261         andc    $acc14,$acc02,$mask80
1262         sub     $acc04,$acc04,$acc08    # r1-(r1>>7)
1263         sub     $acc06,$acc06,$acc10
1264         add     $acc12,$acc12,$acc12    # (r2&0x7f7f7f7f)<<1
1265         add     $acc14,$acc14,$acc14
1266         and     $acc04,$acc04,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1267         and     $acc06,$acc06,$mask1b
1268         xor     $acc04,$acc04,$acc12    # r4
1269         xor     $acc06,$acc06,$acc14
1270
1271         and     $acc08,$acc04,$mask80   # r1=r4&0x80808080
1272         and     $acc10,$acc06,$mask80
1273         srdi    $acc12,$acc08,7         # r1>>7
1274         srdi    $acc14,$acc10,7
1275         sub     $acc08,$acc08,$acc12    # r1-(r1>>7)
1276         sub     $acc10,$acc10,$acc14
1277         andc    $acc12,$acc04,$mask80   # r4&0x7f7f7f7f
1278         andc    $acc14,$acc06,$mask80
1279         add     $acc12,$acc12,$acc12    # (r4&0x7f7f7f7f)<<1
1280         add     $acc14,$acc14,$acc14
1281         and     $acc08,$acc08,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1282         and     $acc10,$acc10,$mask1b
1283         xor     $acc08,$acc08,$acc12    # r8
1284         xor     $acc10,$acc10,$acc14
1285
1286         xor     $acc00,$acc00,$s0       # r2^r0
1287         xor     $acc02,$acc02,$s2
1288         xor     $acc04,$acc04,$s0       # r4^r0
1289         xor     $acc06,$acc06,$s2
1290
1291         extrdi  $acc01,$acc00,32,0
1292         extrdi  $acc03,$acc02,32,0
1293         extrdi  $acc05,$acc04,32,0
1294         extrdi  $acc07,$acc06,32,0
1295         extrdi  $acc09,$acc08,32,0
1296         extrdi  $acc11,$acc10,32,0
1297 ___
1298 $code.=<<___ if ($SIZE_T==4);
1299         and     $acc00,$s0,$mask80      # r1=r0&0x80808080
1300         and     $acc01,$s1,$mask80
1301         and     $acc02,$s2,$mask80
1302         and     $acc03,$s3,$mask80
1303         srwi    $acc04,$acc00,7         # r1>>7
1304         andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
1305         srwi    $acc05,$acc01,7
1306         andc    $acc09,$s1,$mask80
1307         srwi    $acc06,$acc02,7
1308         andc    $acc10,$s2,$mask80
1309         srwi    $acc07,$acc03,7
1310         andc    $acc11,$s3,$mask80
1311         sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
1312         sub     $acc01,$acc01,$acc05
1313         sub     $acc02,$acc02,$acc06
1314         sub     $acc03,$acc03,$acc07
1315         add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
1316         add     $acc09,$acc09,$acc09
1317         add     $acc10,$acc10,$acc10
1318         add     $acc11,$acc11,$acc11
1319         and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1320         and     $acc01,$acc01,$mask1b
1321         and     $acc02,$acc02,$mask1b
1322         and     $acc03,$acc03,$mask1b
1323         xor     $acc00,$acc00,$acc08    # r2
1324         xor     $acc01,$acc01,$acc09
1325         xor     $acc02,$acc02,$acc10
1326         xor     $acc03,$acc03,$acc11
1327
1328         and     $acc04,$acc00,$mask80   # r1=r2&0x80808080
1329         and     $acc05,$acc01,$mask80
1330         and     $acc06,$acc02,$mask80
1331         and     $acc07,$acc03,$mask80
1332         srwi    $acc08,$acc04,7         # r1>>7
1333         andc    $acc12,$acc00,$mask80   # r2&0x7f7f7f7f
1334         srwi    $acc09,$acc05,7
1335         andc    $acc13,$acc01,$mask80
1336         srwi    $acc10,$acc06,7
1337         andc    $acc14,$acc02,$mask80
1338         srwi    $acc11,$acc07,7
1339         andc    $acc15,$acc03,$mask80
1340         sub     $acc04,$acc04,$acc08    # r1-(r1>>7)
1341         sub     $acc05,$acc05,$acc09
1342         sub     $acc06,$acc06,$acc10
1343         sub     $acc07,$acc07,$acc11
1344         add     $acc12,$acc12,$acc12    # (r2&0x7f7f7f7f)<<1
1345         add     $acc13,$acc13,$acc13
1346         add     $acc14,$acc14,$acc14
1347         add     $acc15,$acc15,$acc15
1348         and     $acc04,$acc04,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1349         and     $acc05,$acc05,$mask1b
1350         and     $acc06,$acc06,$mask1b
1351         and     $acc07,$acc07,$mask1b
1352         xor     $acc04,$acc04,$acc12    # r4
1353         xor     $acc05,$acc05,$acc13
1354         xor     $acc06,$acc06,$acc14
1355         xor     $acc07,$acc07,$acc15
1356
1357         and     $acc08,$acc04,$mask80   # r1=r4&0x80808080
1358         and     $acc09,$acc05,$mask80
1359         srwi    $acc12,$acc08,7         # r1>>7
1360         and     $acc10,$acc06,$mask80
1361         srwi    $acc13,$acc09,7
1362         and     $acc11,$acc07,$mask80
1363         srwi    $acc14,$acc10,7
1364         sub     $acc08,$acc08,$acc12    # r1-(r1>>7)
1365         srwi    $acc15,$acc11,7
1366         sub     $acc09,$acc09,$acc13
1367         sub     $acc10,$acc10,$acc14
1368         sub     $acc11,$acc11,$acc15
1369         andc    $acc12,$acc04,$mask80   # r4&0x7f7f7f7f
1370         andc    $acc13,$acc05,$mask80
1371         andc    $acc14,$acc06,$mask80
1372         andc    $acc15,$acc07,$mask80
1373         add     $acc12,$acc12,$acc12    # (r4&0x7f7f7f7f)<<1
1374         add     $acc13,$acc13,$acc13
1375         add     $acc14,$acc14,$acc14
1376         add     $acc15,$acc15,$acc15
1377         and     $acc08,$acc08,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1378         and     $acc09,$acc09,$mask1b
1379         and     $acc10,$acc10,$mask1b
1380         and     $acc11,$acc11,$mask1b
1381         xor     $acc08,$acc08,$acc12    # r8
1382         xor     $acc09,$acc09,$acc13
1383         xor     $acc10,$acc10,$acc14
1384         xor     $acc11,$acc11,$acc15
1385
1386         xor     $acc00,$acc00,$s0       # r2^r0
1387         xor     $acc01,$acc01,$s1
1388         xor     $acc02,$acc02,$s2
1389         xor     $acc03,$acc03,$s3
1390         xor     $acc04,$acc04,$s0       # r4^r0
1391         xor     $acc05,$acc05,$s1
1392         xor     $acc06,$acc06,$s2
1393         xor     $acc07,$acc07,$s3
1394 ___
1395 $code.=<<___;
1396         rotrwi  $s0,$s0,8               # = ROTATE(r0,8)
1397         rotrwi  $s1,$s1,8
1398         xor     $s0,$s0,$acc00          # ^= r2^r0
1399         rotrwi  $s2,$s2,8
1400         xor     $s1,$s1,$acc01
1401         rotrwi  $s3,$s3,8
1402         xor     $s2,$s2,$acc02
1403         xor     $s3,$s3,$acc03
1404         xor     $acc00,$acc00,$acc08
1405         xor     $acc01,$acc01,$acc09
1406         xor     $acc02,$acc02,$acc10
1407         xor     $acc03,$acc03,$acc11
1408         xor     $s0,$s0,$acc04          # ^= r4^r0
1409         rotrwi  $acc00,$acc00,24
1410         xor     $s1,$s1,$acc05
1411         rotrwi  $acc01,$acc01,24
1412         xor     $s2,$s2,$acc06
1413         rotrwi  $acc02,$acc02,24
1414         xor     $s3,$s3,$acc07
1415         rotrwi  $acc03,$acc03,24
1416         xor     $acc04,$acc04,$acc08
1417         xor     $acc05,$acc05,$acc09
1418         xor     $acc06,$acc06,$acc10
1419         xor     $acc07,$acc07,$acc11
1420         xor     $s0,$s0,$acc08          # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
1421         rotrwi  $acc04,$acc04,16
1422         xor     $s1,$s1,$acc09
1423         rotrwi  $acc05,$acc05,16
1424         xor     $s2,$s2,$acc10
1425         rotrwi  $acc06,$acc06,16
1426         xor     $s3,$s3,$acc11
1427         rotrwi  $acc07,$acc07,16
1428         xor     $s0,$s0,$acc00          # ^= ROTATE(r8^r2^r0,24)
1429         rotrwi  $acc08,$acc08,8
1430         xor     $s1,$s1,$acc01
1431         rotrwi  $acc09,$acc09,8
1432         xor     $s2,$s2,$acc02
1433         rotrwi  $acc10,$acc10,8
1434         xor     $s3,$s3,$acc03
1435         rotrwi  $acc11,$acc11,8
1436         xor     $s0,$s0,$acc04          # ^= ROTATE(r8^r4^r0,16)
1437         xor     $s1,$s1,$acc05
1438         xor     $s2,$s2,$acc06
1439         xor     $s3,$s3,$acc07
1440         xor     $s0,$s0,$acc08          # ^= ROTATE(r8,8)
1441         xor     $s1,$s1,$acc09
1442         xor     $s2,$s2,$acc10
1443         xor     $s3,$s3,$acc11
1444
1445         b       Ldec_compact_loop
1446 .align  4
1447 Ldec_compact_done:
1448         xor     $s0,$s0,$t0
1449         xor     $s1,$s1,$t1
1450         xor     $s2,$s2,$t2
1451         xor     $s3,$s3,$t3
1452         blr
1453         .long   0
1454         .byte   0,12,0x14,0,0,0,0,0
1455 .size   .AES_decrypt,.-.AES_decrypt
1456
1457 .asciz  "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
1458 .align  7
1459 ___
1460
1461 $code =~ s/\`([^\`]*)\`/eval $1/gem;
1462 print $code;
1463 close STDOUT or die "error closing STDOUT: $!";