Lppc_AES_[en|de]crypt_compact: size optimization.
[openssl.git] / crypto / aes / asm / aes-ppc.pl
1 #!/usr/bin/env perl
2
3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9
10 # Needs more work: key setup, page boundaries, CBC routine...
11 #
12 # ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
13 # 128-bit key, which is ~40% better than 64-bit code generated by gcc
14 # 4.0. But these are not the ones currently used! Their "compact"
15 # counterparts are, for security reason. ppc_AES_encrypt_compact runs
16 # at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
17 # at 1/3 of ppc_AES_decrypt.
18
19 $output = shift;
20
21 if ($output =~ /64\.s/) {
22         $SIZE_T =8;
23         $STU    ="stdu";
24         $POP    ="ld";
25         $PUSH   ="std";
26 } elsif ($output =~ /32\.s/) {
27         $SIZE_T =4;
28         $STU    ="stwu";
29         $POP    ="lwz";
30         $PUSH   ="stw";
31 } else { die "nonsense $output"; }
32
33 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
34 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
35 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
36 die "can't locate ppc-xlate.pl";
37
38 ( defined shift || open STDOUT,"| $^X $xlate $output" ) ||
39         die "can't call $xlate: $!";
40
41 $FRAME=32*$SIZE_T;
42
43 sub _data_word()
44 { my $i;
45     while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
46 }
47
48 $sp="r1";
49 $toc="r2";
50 $inp="r3";
51 $out="r4";
52 $key="r5";
53
54 $Tbl0="r3";
55 $Tbl1="r6";
56 $Tbl2="r7";
57 $Tbl3="r2";
58
59 $s0="r8";
60 $s1="r9";
61 $s2="r10";
62 $s3="r11";
63
64 $t0="r12";
65 $t1="r13";
66 $t2="r14";
67 $t3="r15";
68
69 $acc00="r16";
70 $acc01="r17";
71 $acc02="r18";
72 $acc03="r19";
73
74 $acc04="r20";
75 $acc05="r21";
76 $acc06="r22";
77 $acc07="r23";
78
79 $acc08="r24";
80 $acc09="r25";
81 $acc10="r26";
82 $acc11="r27";
83
84 $acc12="r28";
85 $acc13="r29";
86 $acc14="r30";
87 $acc15="r31";
88
89 # stay away from TLS pointer
90 if ($SIZE_T==8) { die if ($t1 ne "r13");  $t1="r0";             }
91 else            { die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0";  }
92 $mask80=$Tbl2;
93 $mask1b=$Tbl3;
94
95 $code.=<<___;
96 .text
97
98 .align  7
99 LAES_Te:
100         mflr    r0
101         bcl     20,31,\$+4
102         mflr    $Tbl0   ;    vvvvv "distance" between . and 1st data entry
103         addi    $Tbl0,$Tbl0,`128-8`
104         mtlr    r0
105         blr
106         .space  `32-24`
107 LAES_Td:
108         mflr    r0
109         bcl     20,31,\$+4
110         mflr    $Tbl0   ;    vvvvvvvv "distance" between . and 1st data entry
111         addi    $Tbl0,$Tbl0,`128-8-32+2048+256`
112         mtlr    r0
113         blr
114         .space  `128-32-24`
115 ___
116 &_data_word(
117         0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
118         0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
119         0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
120         0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
121         0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
122         0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
123         0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
124         0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
125         0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
126         0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
127         0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
128         0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
129         0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
130         0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
131         0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
132         0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
133         0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
134         0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
135         0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
136         0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
137         0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
138         0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
139         0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
140         0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
141         0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
142         0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
143         0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
144         0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
145         0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
146         0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
147         0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
148         0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
149         0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
150         0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
151         0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
152         0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
153         0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
154         0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
155         0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
156         0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
157         0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
158         0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
159         0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
160         0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
161         0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
162         0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
163         0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
164         0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
165         0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
166         0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
167         0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
168         0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
169         0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
170         0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
171         0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
172         0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
173         0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
174         0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
175         0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
176         0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
177         0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
178         0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
179         0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
180         0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
181 $code.=<<___;
182 .byte   0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
183 .byte   0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
184 .byte   0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
185 .byte   0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
186 .byte   0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
187 .byte   0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
188 .byte   0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
189 .byte   0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
190 .byte   0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
191 .byte   0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
192 .byte   0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
193 .byte   0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
194 .byte   0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
195 .byte   0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
196 .byte   0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
197 .byte   0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
198 .byte   0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
199 .byte   0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
200 .byte   0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
201 .byte   0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
202 .byte   0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
203 .byte   0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
204 .byte   0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
205 .byte   0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
206 .byte   0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
207 .byte   0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
208 .byte   0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
209 .byte   0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
210 .byte   0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
211 .byte   0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
212 .byte   0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
213 .byte   0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
214 ___
215 &_data_word(
216         0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
217         0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
218         0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
219         0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
220         0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
221         0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
222         0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
223         0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
224         0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
225         0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
226         0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
227         0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
228         0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
229         0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
230         0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
231         0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
232         0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
233         0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
234         0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
235         0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
236         0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
237         0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
238         0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
239         0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
240         0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
241         0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
242         0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
243         0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
244         0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
245         0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
246         0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
247         0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
248         0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
249         0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
250         0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
251         0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
252         0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
253         0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
254         0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
255         0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
256         0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
257         0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
258         0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
259         0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
260         0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
261         0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
262         0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
263         0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
264         0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
265         0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
266         0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
267         0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
268         0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
269         0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
270         0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
271         0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
272         0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
273         0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
274         0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
275         0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
276         0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
277         0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
278         0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
279         0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
280 $code.=<<___;
281 .byte   0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
282 .byte   0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
283 .byte   0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
284 .byte   0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
285 .byte   0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
286 .byte   0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
287 .byte   0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
288 .byte   0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
289 .byte   0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
290 .byte   0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
291 .byte   0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
292 .byte   0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
293 .byte   0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
294 .byte   0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
295 .byte   0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
296 .byte   0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
297 .byte   0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
298 .byte   0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
299 .byte   0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
300 .byte   0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
301 .byte   0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
302 .byte   0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
303 .byte   0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
304 .byte   0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
305 .byte   0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
306 .byte   0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
307 .byte   0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
308 .byte   0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
309 .byte   0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
310 .byte   0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
311 .byte   0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
312 .byte   0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
313
314
315 .globl  .AES_encrypt
316 .align  7
317 .AES_encrypt:
318         mflr    r0
319         $STU    $sp,-$FRAME($sp)
320
321         $PUSH   r0,`$FRAME-$SIZE_T*21`($sp)
322         $PUSH   $toc,`$FRAME-$SIZE_T*20`($sp)
323         $PUSH   r13,`$FRAME-$SIZE_T*19`($sp)
324         $PUSH   r14,`$FRAME-$SIZE_T*18`($sp)
325         $PUSH   r15,`$FRAME-$SIZE_T*17`($sp)
326         $PUSH   r16,`$FRAME-$SIZE_T*16`($sp)
327         $PUSH   r17,`$FRAME-$SIZE_T*15`($sp)
328         $PUSH   r18,`$FRAME-$SIZE_T*14`($sp)
329         $PUSH   r19,`$FRAME-$SIZE_T*13`($sp)
330         $PUSH   r20,`$FRAME-$SIZE_T*12`($sp)
331         $PUSH   r21,`$FRAME-$SIZE_T*11`($sp)
332         $PUSH   r22,`$FRAME-$SIZE_T*10`($sp)
333         $PUSH   r23,`$FRAME-$SIZE_T*9`($sp)
334         $PUSH   r24,`$FRAME-$SIZE_T*8`($sp)
335         $PUSH   r25,`$FRAME-$SIZE_T*7`($sp)
336         $PUSH   r26,`$FRAME-$SIZE_T*6`($sp)
337         $PUSH   r27,`$FRAME-$SIZE_T*5`($sp)
338         $PUSH   r28,`$FRAME-$SIZE_T*4`($sp)
339         $PUSH   r29,`$FRAME-$SIZE_T*3`($sp)
340         $PUSH   r30,`$FRAME-$SIZE_T*2`($sp)
341         $PUSH   r31,`$FRAME-$SIZE_T*1`($sp)
342
343         lwz     $s0,0($inp)
344         lwz     $s1,4($inp)
345         lwz     $s2,8($inp)
346         lwz     $s3,12($inp)
347         bl      LAES_Te
348         bl      Lppc_AES_encrypt_compact
349         stw     $s0,0($out)
350         stw     $s1,4($out)
351         stw     $s2,8($out)
352         stw     $s3,12($out)
353
354         $POP    r0,`$FRAME-$SIZE_T*21`($sp)
355         $POP    $toc,`$FRAME-$SIZE_T*20`($sp)
356         $POP    r13,`$FRAME-$SIZE_T*19`($sp)
357         $POP    r14,`$FRAME-$SIZE_T*18`($sp)
358         $POP    r15,`$FRAME-$SIZE_T*17`($sp)
359         $POP    r16,`$FRAME-$SIZE_T*16`($sp)
360         $POP    r17,`$FRAME-$SIZE_T*15`($sp)
361         $POP    r18,`$FRAME-$SIZE_T*14`($sp)
362         $POP    r19,`$FRAME-$SIZE_T*13`($sp)
363         $POP    r20,`$FRAME-$SIZE_T*12`($sp)
364         $POP    r21,`$FRAME-$SIZE_T*11`($sp)
365         $POP    r22,`$FRAME-$SIZE_T*10`($sp)
366         $POP    r23,`$FRAME-$SIZE_T*9`($sp)
367         $POP    r24,`$FRAME-$SIZE_T*8`($sp)
368         $POP    r25,`$FRAME-$SIZE_T*7`($sp)
369         $POP    r26,`$FRAME-$SIZE_T*6`($sp)
370         $POP    r27,`$FRAME-$SIZE_T*5`($sp)
371         $POP    r28,`$FRAME-$SIZE_T*4`($sp)
372         $POP    r29,`$FRAME-$SIZE_T*3`($sp)
373         $POP    r30,`$FRAME-$SIZE_T*2`($sp)
374         $POP    r31,`$FRAME-$SIZE_T*1`($sp)
375         mtlr    r0
376         addi    $sp,$sp,$FRAME
377         blr
378
379 .align  4
380 Lppc_AES_encrypt:
381         lwz     $acc00,240($key)
382         lwz     $t0,0($key)
383         lwz     $t1,4($key)
384         lwz     $t2,8($key)
385         lwz     $t3,12($key)
386         addi    $Tbl1,$Tbl0,3
387         addi    $Tbl2,$Tbl0,2
388         addi    $Tbl3,$Tbl0,1
389         addi    $acc00,$acc00,-1
390         addi    $key,$key,16
391         xor     $s0,$s0,$t0
392         xor     $s1,$s1,$t1
393         xor     $s2,$s2,$t2
394         xor     $s3,$s3,$t3
395         mtctr   $acc00
396 .align  4
397 Lenc_loop:
398         rlwinm  $acc00,$s0,`32-24+3`,21,28
399         rlwinm  $acc01,$s1,`32-24+3`,21,28
400         rlwinm  $acc02,$s2,`32-24+3`,21,28
401         rlwinm  $acc03,$s3,`32-24+3`,21,28
402         lwz     $t0,0($key)
403         lwz     $t1,4($key)
404         lwz     $t2,8($key)
405         lwz     $t3,12($key)
406         rlwinm  $acc04,$s1,`32-16+3`,21,28
407         rlwinm  $acc05,$s2,`32-16+3`,21,28
408         rlwinm  $acc06,$s3,`32-16+3`,21,28
409         rlwinm  $acc07,$s0,`32-16+3`,21,28
410         lwzx    $acc00,$Tbl0,$acc00
411         lwzx    $acc01,$Tbl0,$acc01
412         lwzx    $acc02,$Tbl0,$acc02
413         lwzx    $acc03,$Tbl0,$acc03
414         rlwinm  $acc08,$s2,`32-8+3`,21,28
415         rlwinm  $acc09,$s3,`32-8+3`,21,28
416         rlwinm  $acc10,$s0,`32-8+3`,21,28
417         rlwinm  $acc11,$s1,`32-8+3`,21,28
418         lwzx    $acc04,$Tbl1,$acc04
419         lwzx    $acc05,$Tbl1,$acc05
420         lwzx    $acc06,$Tbl1,$acc06
421         lwzx    $acc07,$Tbl1,$acc07
422         rlwinm  $acc12,$s3,`0+3`,21,28
423         rlwinm  $acc13,$s0,`0+3`,21,28
424         rlwinm  $acc14,$s1,`0+3`,21,28
425         rlwinm  $acc15,$s2,`0+3`,21,28
426         lwzx    $acc08,$Tbl2,$acc08
427         lwzx    $acc09,$Tbl2,$acc09
428         lwzx    $acc10,$Tbl2,$acc10
429         lwzx    $acc11,$Tbl2,$acc11
430         xor     $t0,$t0,$acc00
431         xor     $t1,$t1,$acc01
432         xor     $t2,$t2,$acc02
433         xor     $t3,$t3,$acc03
434         lwzx    $acc12,$Tbl3,$acc12
435         lwzx    $acc13,$Tbl3,$acc13
436         lwzx    $acc14,$Tbl3,$acc14
437         lwzx    $acc15,$Tbl3,$acc15
438         xor     $t0,$t0,$acc04
439         xor     $t1,$t1,$acc05
440         xor     $t2,$t2,$acc06
441         xor     $t3,$t3,$acc07
442         xor     $t0,$t0,$acc08
443         xor     $t1,$t1,$acc09
444         xor     $t2,$t2,$acc10
445         xor     $t3,$t3,$acc11
446         xor     $s0,$t0,$acc12
447         xor     $s1,$t1,$acc13
448         xor     $s2,$t2,$acc14
449         xor     $s3,$t3,$acc15
450         addi    $key,$key,16
451         bdnz-   Lenc_loop
452
453         addi    $Tbl2,$Tbl0,2048
454         nop
455         lwz     $acc08,`2048+0`($Tbl0)  ! prefetch Te4
456         lwz     $acc09,`2048+32`($Tbl0)
457         lwz     $acc10,`2048+64`($Tbl0)
458         lwz     $acc11,`2048+96`($Tbl0)
459         lwz     $acc08,`2048+128`($Tbl0)
460         lwz     $acc09,`2048+160`($Tbl0)
461         lwz     $acc10,`2048+192`($Tbl0)
462         lwz     $acc11,`2048+224`($Tbl0)
463         rlwinm  $acc00,$s0,`32-24`,24,31
464         rlwinm  $acc01,$s1,`32-24`,24,31
465         rlwinm  $acc02,$s2,`32-24`,24,31
466         rlwinm  $acc03,$s3,`32-24`,24,31
467         lwz     $t0,0($key)
468         lwz     $t1,4($key)
469         lwz     $t2,8($key)
470         lwz     $t3,12($key)
471         rlwinm  $acc04,$s1,`32-16`,24,31
472         rlwinm  $acc05,$s2,`32-16`,24,31
473         rlwinm  $acc06,$s3,`32-16`,24,31
474         rlwinm  $acc07,$s0,`32-16`,24,31
475         lbzx    $acc00,$Tbl2,$acc00
476         lbzx    $acc01,$Tbl2,$acc01
477         lbzx    $acc02,$Tbl2,$acc02
478         lbzx    $acc03,$Tbl2,$acc03
479         rlwinm  $acc08,$s2,`32-8`,24,31
480         rlwinm  $acc09,$s3,`32-8`,24,31
481         rlwinm  $acc10,$s0,`32-8`,24,31
482         rlwinm  $acc11,$s1,`32-8`,24,31
483         lbzx    $acc04,$Tbl2,$acc04
484         lbzx    $acc05,$Tbl2,$acc05
485         lbzx    $acc06,$Tbl2,$acc06
486         lbzx    $acc07,$Tbl2,$acc07
487         rlwinm  $acc12,$s3,`0`,24,31
488         rlwinm  $acc13,$s0,`0`,24,31
489         rlwinm  $acc14,$s1,`0`,24,31
490         rlwinm  $acc15,$s2,`0`,24,31
491         lbzx    $acc08,$Tbl2,$acc08
492         lbzx    $acc09,$Tbl2,$acc09
493         lbzx    $acc10,$Tbl2,$acc10
494         lbzx    $acc11,$Tbl2,$acc11
495         rlwinm  $s0,$acc00,24,0,7
496         rlwinm  $s1,$acc01,24,0,7
497         rlwinm  $s2,$acc02,24,0,7
498         rlwinm  $s3,$acc03,24,0,7
499         lbzx    $acc12,$Tbl2,$acc12
500         lbzx    $acc13,$Tbl2,$acc13
501         lbzx    $acc14,$Tbl2,$acc14
502         lbzx    $acc15,$Tbl2,$acc15
503         rlwimi  $s0,$acc04,16,8,15
504         rlwimi  $s1,$acc05,16,8,15
505         rlwimi  $s2,$acc06,16,8,15
506         rlwimi  $s3,$acc07,16,8,15
507         rlwimi  $s0,$acc08,8,16,23
508         rlwimi  $s1,$acc09,8,16,23
509         rlwimi  $s2,$acc10,8,16,23
510         rlwimi  $s3,$acc11,8,16,23
511         or      $s0,$s0,$acc12
512         or      $s1,$s1,$acc13
513         or      $s2,$s2,$acc14
514         or      $s3,$s3,$acc15
515         xor     $s0,$s0,$t0
516         xor     $s1,$s1,$t1
517         xor     $s2,$s2,$t2
518         xor     $s3,$s3,$t3
519         blr
520
521 .align  4
522 Lppc_AES_encrypt_compact:
523         lwz     $acc00,240($key)
524         lwz     $t0,0($key)
525         lwz     $t1,4($key)
526         lwz     $t2,8($key)
527         lwz     $t3,12($key)
528         addi    $Tbl1,$Tbl0,2048
529         lis     $mask80,0x8080
530         lis     $mask1b,0x1b1b
531         addi    $key,$key,16
532         ori     $mask80,$mask80,0x8080
533         ori     $mask1b,$mask1b,0x1b1b
534         mtctr   $acc00
535 .align  4
536 Lenc_compact_loop:
537         xor     $s0,$s0,$t0
538         xor     $s1,$s1,$t1
539         xor     $s2,$s2,$t2
540         xor     $s3,$s3,$t3
541         rlwinm  $acc00,$s0,`32-24`,24,31
542         rlwinm  $acc01,$s1,`32-24`,24,31
543         rlwinm  $acc02,$s2,`32-24`,24,31
544         rlwinm  $acc03,$s3,`32-24`,24,31
545         lwz     $t0,0($key)
546         lwz     $t1,4($key)
547         lwz     $t2,8($key)
548         lwz     $t3,12($key)
549         rlwinm  $acc04,$s1,`32-16`,24,31
550         rlwinm  $acc05,$s2,`32-16`,24,31
551         rlwinm  $acc06,$s3,`32-16`,24,31
552         rlwinm  $acc07,$s0,`32-16`,24,31
553         lbzx    $acc00,$Tbl1,$acc00
554         lbzx    $acc01,$Tbl1,$acc01
555         lbzx    $acc02,$Tbl1,$acc02
556         lbzx    $acc03,$Tbl1,$acc03
557         rlwinm  $acc08,$s2,`32-8`,24,31
558         rlwinm  $acc09,$s3,`32-8`,24,31
559         rlwinm  $acc10,$s0,`32-8`,24,31
560         rlwinm  $acc11,$s1,`32-8`,24,31
561         lbzx    $acc04,$Tbl1,$acc04
562         lbzx    $acc05,$Tbl1,$acc05
563         lbzx    $acc06,$Tbl1,$acc06
564         lbzx    $acc07,$Tbl1,$acc07
565         rlwinm  $acc12,$s3,`0`,24,31
566         rlwinm  $acc13,$s0,`0`,24,31
567         rlwinm  $acc14,$s1,`0`,24,31
568         rlwinm  $acc15,$s2,`0`,24,31
569         lbzx    $acc08,$Tbl1,$acc08
570         lbzx    $acc09,$Tbl1,$acc09
571         lbzx    $acc10,$Tbl1,$acc10
572         lbzx    $acc11,$Tbl1,$acc11
573         rlwinm  $s0,$acc00,24,0,7
574         rlwinm  $s1,$acc01,24,0,7
575         rlwinm  $s2,$acc02,24,0,7
576         rlwinm  $s3,$acc03,24,0,7
577         lbzx    $acc12,$Tbl1,$acc12
578         lbzx    $acc13,$Tbl1,$acc13
579         lbzx    $acc14,$Tbl1,$acc14
580         lbzx    $acc15,$Tbl1,$acc15
581         rlwimi  $s0,$acc04,16,8,15
582         rlwimi  $s1,$acc05,16,8,15
583         rlwimi  $s2,$acc06,16,8,15
584         rlwimi  $s3,$acc07,16,8,15
585         rlwimi  $s0,$acc08,8,16,23
586         rlwimi  $s1,$acc09,8,16,23
587         rlwimi  $s2,$acc10,8,16,23
588         rlwimi  $s3,$acc11,8,16,23
589         or      $s0,$s0,$acc12
590         or      $s1,$s1,$acc13
591         or      $s2,$s2,$acc14
592         or      $s3,$s3,$acc15
593
594         addi    $key,$key,16
595         bdz     Lenc_compact_done
596
597         and     $acc00,$s0,$mask80      # r1=r0&0x80808080
598         and     $acc01,$s1,$mask80
599         and     $acc02,$s2,$mask80
600         and     $acc03,$s3,$mask80
601         srwi    $acc04,$acc00,7         # r1>>7
602         srwi    $acc05,$acc01,7
603         srwi    $acc06,$acc02,7
604         srwi    $acc07,$acc03,7
605         andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
606         andc    $acc09,$s1,$mask80
607         andc    $acc10,$s2,$mask80
608         andc    $acc11,$s3,$mask80
609         sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
610         sub     $acc01,$acc01,$acc05
611         sub     $acc02,$acc02,$acc06
612         sub     $acc03,$acc03,$acc07
613         add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
614         add     $acc09,$acc09,$acc09
615         add     $acc10,$acc10,$acc10
616         add     $acc11,$acc11,$acc11
617         and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
618         and     $acc01,$acc01,$mask1b
619         and     $acc02,$acc02,$mask1b
620         and     $acc03,$acc03,$mask1b
621         xor     $acc00,$acc00,$acc08    # r2
622         xor     $acc01,$acc01,$acc09
623         xor     $acc02,$acc02,$acc10
624         xor     $acc03,$acc03,$acc11
625
626         rotlwi  $acc12,$s0,16           # ROTATE(r0,16)
627         rotlwi  $acc13,$s1,16
628         rotlwi  $acc14,$s2,16
629         rotlwi  $acc15,$s3,16
630         xor     $s0,$s0,$acc00          # r0^r2
631         xor     $s1,$s1,$acc01
632         xor     $s2,$s2,$acc02
633         xor     $s3,$s3,$acc03
634         rotrwi  $s0,$s0,24              # ROTATE(r2^r0,24)
635         rotrwi  $s1,$s1,24
636         rotrwi  $s2,$s2,24
637         rotrwi  $s3,$s3,24
638         xor     $s0,$s0,$acc00          # ROTATE(r2^r0,24)^r2
639         xor     $s1,$s1,$acc01
640         xor     $s2,$s2,$acc02
641         xor     $s3,$s3,$acc03
642         rotlwi  $acc08,$acc12,8         # ROTATE(r0,24)
643         rotlwi  $acc09,$acc13,8
644         rotlwi  $acc10,$acc14,8
645         rotlwi  $acc11,$acc15,8
646         xor     $s0,$s0,$acc12          #
647         xor     $s1,$s1,$acc13
648         xor     $s2,$s2,$acc14
649         xor     $s3,$s3,$acc15
650         xor     $s0,$s0,$acc08          #
651         xor     $s1,$s1,$acc09
652         xor     $s2,$s2,$acc10
653         xor     $s3,$s3,$acc11
654
655         b       Lenc_compact_loop
656 .align  4
657 Lenc_compact_done:
658         xor     $s0,$s0,$t0
659         xor     $s1,$s1,$t1
660         xor     $s2,$s2,$t2
661         xor     $s3,$s3,$t3
662         blr
663
664 .globl  .AES_decrypt
665 .align  7
666 .AES_decrypt:
667         mflr    r0
668         $STU    $sp,-$FRAME($sp)
669
670         $PUSH   r0,`$FRAME-$SIZE_T*21`($sp)
671         $PUSH   $toc,`$FRAME-$SIZE_T*20`($sp)
672         $PUSH   r13,`$FRAME-$SIZE_T*19`($sp)
673         $PUSH   r14,`$FRAME-$SIZE_T*18`($sp)
674         $PUSH   r15,`$FRAME-$SIZE_T*17`($sp)
675         $PUSH   r16,`$FRAME-$SIZE_T*16`($sp)
676         $PUSH   r17,`$FRAME-$SIZE_T*15`($sp)
677         $PUSH   r18,`$FRAME-$SIZE_T*14`($sp)
678         $PUSH   r19,`$FRAME-$SIZE_T*13`($sp)
679         $PUSH   r20,`$FRAME-$SIZE_T*12`($sp)
680         $PUSH   r21,`$FRAME-$SIZE_T*11`($sp)
681         $PUSH   r22,`$FRAME-$SIZE_T*10`($sp)
682         $PUSH   r23,`$FRAME-$SIZE_T*9`($sp)
683         $PUSH   r24,`$FRAME-$SIZE_T*8`($sp)
684         $PUSH   r25,`$FRAME-$SIZE_T*7`($sp)
685         $PUSH   r26,`$FRAME-$SIZE_T*6`($sp)
686         $PUSH   r27,`$FRAME-$SIZE_T*5`($sp)
687         $PUSH   r28,`$FRAME-$SIZE_T*4`($sp)
688         $PUSH   r29,`$FRAME-$SIZE_T*3`($sp)
689         $PUSH   r30,`$FRAME-$SIZE_T*2`($sp)
690         $PUSH   r31,`$FRAME-$SIZE_T*1`($sp)
691
692         lwz     $s0,0($inp)
693         lwz     $s1,4($inp)
694         lwz     $s2,8($inp)
695         lwz     $s3,12($inp)
696         bl      LAES_Td
697         bl      Lppc_AES_decrypt_compact
698         stw     $s0,0($out)
699         stw     $s1,4($out)
700         stw     $s2,8($out)
701         stw     $s3,12($out)
702
703         $POP    r0,`$FRAME-$SIZE_T*21`($sp)
704         $POP    $toc,`$FRAME-$SIZE_T*20`($sp)
705         $POP    r13,`$FRAME-$SIZE_T*19`($sp)
706         $POP    r14,`$FRAME-$SIZE_T*18`($sp)
707         $POP    r15,`$FRAME-$SIZE_T*17`($sp)
708         $POP    r16,`$FRAME-$SIZE_T*16`($sp)
709         $POP    r17,`$FRAME-$SIZE_T*15`($sp)
710         $POP    r18,`$FRAME-$SIZE_T*14`($sp)
711         $POP    r19,`$FRAME-$SIZE_T*13`($sp)
712         $POP    r20,`$FRAME-$SIZE_T*12`($sp)
713         $POP    r21,`$FRAME-$SIZE_T*11`($sp)
714         $POP    r22,`$FRAME-$SIZE_T*10`($sp)
715         $POP    r23,`$FRAME-$SIZE_T*9`($sp)
716         $POP    r24,`$FRAME-$SIZE_T*8`($sp)
717         $POP    r25,`$FRAME-$SIZE_T*7`($sp)
718         $POP    r26,`$FRAME-$SIZE_T*6`($sp)
719         $POP    r27,`$FRAME-$SIZE_T*5`($sp)
720         $POP    r28,`$FRAME-$SIZE_T*4`($sp)
721         $POP    r29,`$FRAME-$SIZE_T*3`($sp)
722         $POP    r30,`$FRAME-$SIZE_T*2`($sp)
723         $POP    r31,`$FRAME-$SIZE_T*1`($sp)
724         mtlr    r0
725         addi    $sp,$sp,$FRAME
726         blr
727
728 .align  4
729 Lppc_AES_decrypt:
730         lwz     $acc00,240($key)
731         lwz     $t0,0($key)
732         lwz     $t1,4($key)
733         lwz     $t2,8($key)
734         lwz     $t3,12($key)
735         addi    $Tbl1,$Tbl0,3
736         addi    $Tbl2,$Tbl0,2
737         addi    $Tbl3,$Tbl0,1
738         addi    $acc00,$acc00,-1
739         addi    $key,$key,16
740         xor     $s0,$s0,$t0
741         xor     $s1,$s1,$t1
742         xor     $s2,$s2,$t2
743         xor     $s3,$s3,$t3
744         mtctr   $acc00
745 .align  4
746 Ldec_loop:
747         rlwinm  $acc00,$s0,`32-24+3`,21,28
748         rlwinm  $acc01,$s1,`32-24+3`,21,28
749         rlwinm  $acc02,$s2,`32-24+3`,21,28
750         rlwinm  $acc03,$s3,`32-24+3`,21,28
751         lwz     $t0,0($key)
752         lwz     $t1,4($key)
753         lwz     $t2,8($key)
754         lwz     $t3,12($key)
755         rlwinm  $acc04,$s3,`32-16+3`,21,28
756         rlwinm  $acc05,$s0,`32-16+3`,21,28
757         rlwinm  $acc06,$s1,`32-16+3`,21,28
758         rlwinm  $acc07,$s2,`32-16+3`,21,28
759         lwzx    $acc00,$Tbl0,$acc00
760         lwzx    $acc01,$Tbl0,$acc01
761         lwzx    $acc02,$Tbl0,$acc02
762         lwzx    $acc03,$Tbl0,$acc03
763         rlwinm  $acc08,$s2,`32-8+3`,21,28
764         rlwinm  $acc09,$s3,`32-8+3`,21,28
765         rlwinm  $acc10,$s0,`32-8+3`,21,28
766         rlwinm  $acc11,$s1,`32-8+3`,21,28
767         lwzx    $acc04,$Tbl1,$acc04
768         lwzx    $acc05,$Tbl1,$acc05
769         lwzx    $acc06,$Tbl1,$acc06
770         lwzx    $acc07,$Tbl1,$acc07
771         rlwinm  $acc12,$s1,`0+3`,21,28
772         rlwinm  $acc13,$s2,`0+3`,21,28
773         rlwinm  $acc14,$s3,`0+3`,21,28
774         rlwinm  $acc15,$s0,`0+3`,21,28
775         lwzx    $acc08,$Tbl2,$acc08
776         lwzx    $acc09,$Tbl2,$acc09
777         lwzx    $acc10,$Tbl2,$acc10
778         lwzx    $acc11,$Tbl2,$acc11
779         xor     $t0,$t0,$acc00
780         xor     $t1,$t1,$acc01
781         xor     $t2,$t2,$acc02
782         xor     $t3,$t3,$acc03
783         lwzx    $acc12,$Tbl3,$acc12
784         lwzx    $acc13,$Tbl3,$acc13
785         lwzx    $acc14,$Tbl3,$acc14
786         lwzx    $acc15,$Tbl3,$acc15
787         xor     $t0,$t0,$acc04
788         xor     $t1,$t1,$acc05
789         xor     $t2,$t2,$acc06
790         xor     $t3,$t3,$acc07
791         xor     $t0,$t0,$acc08
792         xor     $t1,$t1,$acc09
793         xor     $t2,$t2,$acc10
794         xor     $t3,$t3,$acc11
795         xor     $s0,$t0,$acc12
796         xor     $s1,$t1,$acc13
797         xor     $s2,$t2,$acc14
798         xor     $s3,$t3,$acc15
799         addi    $key,$key,16
800         bdnz-   Ldec_loop
801
802         addi    $Tbl2,$Tbl0,2048
803         nop
804         lwz     $acc08,`2048+0`($Tbl0)  ! prefetch Td4
805         lwz     $acc09,`2048+32`($Tbl0)
806         lwz     $acc10,`2048+64`($Tbl0)
807         lwz     $acc11,`2048+96`($Tbl0)
808         lwz     $acc08,`2048+128`($Tbl0)
809         lwz     $acc09,`2048+160`($Tbl0)
810         lwz     $acc10,`2048+192`($Tbl0)
811         lwz     $acc11,`2048+224`($Tbl0)
812         rlwinm  $acc00,$s0,`32-24`,24,31
813         rlwinm  $acc01,$s1,`32-24`,24,31
814         rlwinm  $acc02,$s2,`32-24`,24,31
815         rlwinm  $acc03,$s3,`32-24`,24,31
816         lwz     $t0,0($key)
817         lwz     $t1,4($key)
818         lwz     $t2,8($key)
819         lwz     $t3,12($key)
820         rlwinm  $acc04,$s3,`32-16`,24,31
821         rlwinm  $acc05,$s0,`32-16`,24,31
822         rlwinm  $acc06,$s1,`32-16`,24,31
823         rlwinm  $acc07,$s2,`32-16`,24,31
824         lbzx    $acc00,$Tbl2,$acc00
825         lbzx    $acc01,$Tbl2,$acc01
826         lbzx    $acc02,$Tbl2,$acc02
827         lbzx    $acc03,$Tbl2,$acc03
828         rlwinm  $acc08,$s2,`32-8`,24,31
829         rlwinm  $acc09,$s3,`32-8`,24,31
830         rlwinm  $acc10,$s0,`32-8`,24,31
831         rlwinm  $acc11,$s1,`32-8`,24,31
832         lbzx    $acc04,$Tbl2,$acc04
833         lbzx    $acc05,$Tbl2,$acc05
834         lbzx    $acc06,$Tbl2,$acc06
835         lbzx    $acc07,$Tbl2,$acc07
836         rlwinm  $acc12,$s1,`0`,24,31
837         rlwinm  $acc13,$s2,`0`,24,31
838         rlwinm  $acc14,$s3,`0`,24,31
839         rlwinm  $acc15,$s0,`0`,24,31
840         lbzx    $acc08,$Tbl2,$acc08
841         lbzx    $acc09,$Tbl2,$acc09
842         lbzx    $acc10,$Tbl2,$acc10
843         lbzx    $acc11,$Tbl2,$acc11
844         rlwinm  $s0,$acc00,24,0,7
845         rlwinm  $s1,$acc01,24,0,7
846         rlwinm  $s2,$acc02,24,0,7
847         rlwinm  $s3,$acc03,24,0,7
848         lbzx    $acc12,$Tbl2,$acc12
849         lbzx    $acc13,$Tbl2,$acc13
850         lbzx    $acc14,$Tbl2,$acc14
851         lbzx    $acc15,$Tbl2,$acc15
852         rlwimi  $s0,$acc04,16,8,15
853         rlwimi  $s1,$acc05,16,8,15
854         rlwimi  $s2,$acc06,16,8,15
855         rlwimi  $s3,$acc07,16,8,15
856         rlwimi  $s0,$acc08,8,16,23
857         rlwimi  $s1,$acc09,8,16,23
858         rlwimi  $s2,$acc10,8,16,23
859         rlwimi  $s3,$acc11,8,16,23
860         or      $s0,$s0,$acc12
861         or      $s1,$s1,$acc13
862         or      $s2,$s2,$acc14
863         or      $s3,$s3,$acc15
864         xor     $s0,$s0,$t0
865         xor     $s1,$s1,$t1
866         xor     $s2,$s2,$t2
867         xor     $s3,$s3,$t3
868         blr
869
870 .align  4
871 Lppc_AES_decrypt_compact:
872         lwz     $acc00,240($key)
873         lwz     $t0,0($key)
874         lwz     $t1,4($key)
875         lwz     $t2,8($key)
876         lwz     $t3,12($key)
877         addi    $Tbl1,$Tbl0,2048
878         lis     $mask80,0x8080
879         lis     $mask1b,0x1b1b
880         addi    $key,$key,16
881         ori     $mask80,$mask80,0x8080
882         ori     $mask1b,$mask1b,0x1b1b
883         mtctr   $acc00
884 .align  4
885 Ldec_compact_loop:
886         xor     $s0,$s0,$t0
887         xor     $s1,$s1,$t1
888         xor     $s2,$s2,$t2
889         xor     $s3,$s3,$t3
890         rlwinm  $acc00,$s0,`32-24`,24,31
891         rlwinm  $acc01,$s1,`32-24`,24,31
892         rlwinm  $acc02,$s2,`32-24`,24,31
893         rlwinm  $acc03,$s3,`32-24`,24,31
894         lwz     $t0,0($key)
895         lwz     $t1,4($key)
896         lwz     $t2,8($key)
897         lwz     $t3,12($key)
898         rlwinm  $acc04,$s3,`32-16`,24,31
899         rlwinm  $acc05,$s0,`32-16`,24,31
900         rlwinm  $acc06,$s1,`32-16`,24,31
901         rlwinm  $acc07,$s2,`32-16`,24,31
902         lbzx    $acc00,$Tbl1,$acc00
903         lbzx    $acc01,$Tbl1,$acc01
904         lbzx    $acc02,$Tbl1,$acc02
905         lbzx    $acc03,$Tbl1,$acc03
906         rlwinm  $acc08,$s2,`32-8`,24,31
907         rlwinm  $acc09,$s3,`32-8`,24,31
908         rlwinm  $acc10,$s0,`32-8`,24,31
909         rlwinm  $acc11,$s1,`32-8`,24,31
910         lbzx    $acc04,$Tbl1,$acc04
911         lbzx    $acc05,$Tbl1,$acc05
912         lbzx    $acc06,$Tbl1,$acc06
913         lbzx    $acc07,$Tbl1,$acc07
914         rlwinm  $acc12,$s1,`0`,24,31
915         rlwinm  $acc13,$s2,`0`,24,31
916         rlwinm  $acc14,$s3,`0`,24,31
917         rlwinm  $acc15,$s0,`0`,24,31
918         lbzx    $acc08,$Tbl1,$acc08
919         lbzx    $acc09,$Tbl1,$acc09
920         lbzx    $acc10,$Tbl1,$acc10
921         lbzx    $acc11,$Tbl1,$acc11
922         rlwinm  $s0,$acc00,24,0,7
923         rlwinm  $s1,$acc01,24,0,7
924         rlwinm  $s2,$acc02,24,0,7
925         rlwinm  $s3,$acc03,24,0,7
926         lbzx    $acc12,$Tbl1,$acc12
927         lbzx    $acc13,$Tbl1,$acc13
928         lbzx    $acc14,$Tbl1,$acc14
929         lbzx    $acc15,$Tbl1,$acc15
930         rlwimi  $s0,$acc04,16,8,15
931         rlwimi  $s1,$acc05,16,8,15
932         rlwimi  $s2,$acc06,16,8,15
933         rlwimi  $s3,$acc07,16,8,15
934         rlwimi  $s0,$acc08,8,16,23
935         rlwimi  $s1,$acc09,8,16,23
936         rlwimi  $s2,$acc10,8,16,23
937         rlwimi  $s3,$acc11,8,16,23
938         or      $s0,$s0,$acc12
939         or      $s1,$s1,$acc13
940         or      $s2,$s2,$acc14
941         or      $s3,$s3,$acc15
942
943         addi    $key,$key,16
944         bdz     Ldec_compact_done
945
946         and     $acc00,$s0,$mask80      # r1=r0&0x80808080
947         and     $acc01,$s1,$mask80
948         and     $acc02,$s2,$mask80
949         and     $acc03,$s3,$mask80
950         srwi    $acc04,$acc00,7         # r1>>7
951         srwi    $acc05,$acc01,7
952         srwi    $acc06,$acc02,7
953         srwi    $acc07,$acc03,7
954         andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
955         andc    $acc09,$s1,$mask80
956         andc    $acc10,$s2,$mask80
957         andc    $acc11,$s3,$mask80
958         sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
959         sub     $acc01,$acc01,$acc05
960         sub     $acc02,$acc02,$acc06
961         sub     $acc03,$acc03,$acc07
962         add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
963         add     $acc09,$acc09,$acc09
964         add     $acc10,$acc10,$acc10
965         add     $acc11,$acc11,$acc11
966         and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
967         and     $acc01,$acc01,$mask1b
968         and     $acc02,$acc02,$mask1b
969         and     $acc03,$acc03,$mask1b
970         xor     $acc00,$acc00,$acc08    # r2
971         xor     $acc01,$acc01,$acc09
972         xor     $acc02,$acc02,$acc10
973         xor     $acc03,$acc03,$acc11
974
975         and     $acc04,$acc00,$mask80   # r1=r2&0x80808080
976         and     $acc05,$acc01,$mask80
977         and     $acc06,$acc02,$mask80
978         and     $acc07,$acc03,$mask80
979         srwi    $acc08,$acc04,7         # r1>>7
980         srwi    $acc09,$acc05,7
981         srwi    $acc10,$acc06,7
982         srwi    $acc11,$acc07,7
983         andc    $acc12,$acc00,$mask80   # r2&0x7f7f7f7f
984         andc    $acc13,$acc01,$mask80
985         andc    $acc14,$acc02,$mask80
986         andc    $acc15,$acc03,$mask80
987         sub     $acc04,$acc04,$acc08    # r1-(r1>>7)
988         sub     $acc05,$acc05,$acc09
989         sub     $acc06,$acc06,$acc10
990         sub     $acc07,$acc07,$acc11
991         add     $acc12,$acc12,$acc12    # (r2&0x7f7f7f7f)<<1
992         add     $acc13,$acc13,$acc13
993         add     $acc14,$acc14,$acc14
994         add     $acc15,$acc15,$acc15
995         and     $acc04,$acc04,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
996         and     $acc05,$acc05,$mask1b
997         and     $acc06,$acc06,$mask1b
998         and     $acc07,$acc07,$mask1b
999         xor     $acc04,$acc04,$acc12    # r4
1000         xor     $acc05,$acc05,$acc13
1001         xor     $acc06,$acc06,$acc14
1002         xor     $acc07,$acc07,$acc15
1003
1004         and     $acc08,$acc04,$mask80   # r1=r4&0x80808080
1005         and     $acc09,$acc05,$mask80
1006         and     $acc10,$acc06,$mask80
1007         and     $acc11,$acc07,$mask80
1008         srwi    $acc12,$acc08,7         # r1>>7
1009         srwi    $acc13,$acc09,7
1010         srwi    $acc14,$acc10,7
1011         srwi    $acc15,$acc11,7
1012         sub     $acc08,$acc08,$acc12    # r1-(r1>>7)
1013         sub     $acc09,$acc09,$acc13
1014         sub     $acc10,$acc10,$acc14
1015         sub     $acc11,$acc11,$acc15
1016         andc    $acc12,$acc04,$mask80   # r4&0x7f7f7f7f
1017         andc    $acc13,$acc05,$mask80
1018         andc    $acc14,$acc06,$mask80
1019         andc    $acc15,$acc07,$mask80
1020         add     $acc12,$acc12,$acc12    # (r4&0x7f7f7f7f)<<1
1021         add     $acc13,$acc13,$acc13
1022         add     $acc14,$acc14,$acc14
1023         add     $acc15,$acc15,$acc15
1024         and     $acc08,$acc08,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1025         and     $acc09,$acc09,$mask1b
1026         and     $acc10,$acc10,$mask1b
1027         and     $acc11,$acc11,$mask1b
1028         xor     $acc08,$acc08,$acc12    # r8
1029         xor     $acc09,$acc09,$acc13
1030         xor     $acc10,$acc10,$acc14
1031         xor     $acc11,$acc11,$acc15
1032
1033         xor     $acc00,$acc00,$s0       # r2^r0
1034         xor     $acc01,$acc01,$s1
1035         xor     $acc02,$acc02,$s2
1036         xor     $acc03,$acc03,$s3
1037         xor     $acc04,$acc04,$s0       # r4^r0
1038         xor     $acc05,$acc05,$s1
1039         xor     $acc06,$acc06,$s2
1040         xor     $acc07,$acc07,$s3
1041         rotrwi  $s0,$s0,8               # = ROTATE(r0,8)
1042         rotrwi  $s1,$s1,8
1043         rotrwi  $s2,$s2,8
1044         rotrwi  $s3,$s3,8
1045         xor     $s0,$s0,$acc00          # ^= r2^r0
1046         xor     $s1,$s1,$acc01
1047         xor     $s2,$s2,$acc02
1048         xor     $s3,$s3,$acc03
1049         xor     $acc00,$acc00,$acc08
1050         xor     $acc01,$acc01,$acc09
1051         xor     $acc02,$acc02,$acc10
1052         xor     $acc03,$acc03,$acc11
1053         xor     $s0,$s0,$acc04          # ^= r4^r0
1054         xor     $s1,$s1,$acc05
1055         xor     $s2,$s2,$acc06
1056         xor     $s3,$s3,$acc07
1057         rotrwi  $acc00,$acc00,24
1058         rotrwi  $acc01,$acc01,24
1059         rotrwi  $acc02,$acc02,24
1060         rotrwi  $acc03,$acc03,24
1061         xor     $acc04,$acc04,$acc08
1062         xor     $acc05,$acc05,$acc09
1063         xor     $acc06,$acc06,$acc10
1064         xor     $acc07,$acc07,$acc11
1065         xor     $s0,$s0,$acc08          # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
1066         xor     $s1,$s1,$acc09
1067         xor     $s2,$s2,$acc10
1068         xor     $s3,$s3,$acc11
1069         rotrwi  $acc04,$acc04,16
1070         rotrwi  $acc05,$acc05,16
1071         rotrwi  $acc06,$acc06,16
1072         rotrwi  $acc07,$acc07,16
1073         xor     $s0,$s0,$acc00          # ^= ROTATE(r8^r2^r0,24)
1074         xor     $s1,$s1,$acc01
1075         xor     $s2,$s2,$acc02
1076         xor     $s3,$s3,$acc03
1077         rotrwi  $acc08,$acc08,8
1078         rotrwi  $acc09,$acc09,8
1079         rotrwi  $acc10,$acc10,8
1080         rotrwi  $acc11,$acc11,8
1081         xor     $s0,$s0,$acc04          # ^= ROTATE(r8^r4^r0,16)
1082         xor     $s1,$s1,$acc05
1083         xor     $s2,$s2,$acc06
1084         xor     $s3,$s3,$acc07
1085         xor     $s0,$s0,$acc08          # ^= ROTATE(r8,8)       
1086         xor     $s1,$s1,$acc09  
1087         xor     $s2,$s2,$acc10  
1088         xor     $s3,$s3,$acc11  
1089
1090         b       Ldec_compact_loop
1091 .align  4
1092 Ldec_compact_done:
1093         xor     $s0,$s0,$t0
1094         xor     $s1,$s1,$t1
1095         xor     $s2,$s2,$t2
1096         xor     $s3,$s3,$t3
1097         blr
1098 .long   0
1099 .asciz  "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
1100 .align  7
1101 ___
1102
1103 $code =~ s/\`([^\`]*)\`/eval $1/gem;
1104 print $code;
1105 close STDOUT;