Initial draft of AES for PPC.
[openssl.git] / crypto / aes / asm / aes-ppc.pl
1 #!/usr/bin/env perl
2
3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9
10 # Needs more work: key setup, page boundaries, CBC routine...
11 #
12 # ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
13 # 128-bit key, which is ~40% better than 64-bit code generated by gcc
14 # 4.0. But these are not the ones currently used! Their "compact"
15 # counterparts are, for security reason. ppc_AES_crypt_compact runs at
16 # 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact - at 1/3
17 # of ppc_AES_decrypt.
18
19 $output = shift;
20
21 if ($output =~ /64\.s/) {
22         $SIZE_T =8;
23         $STU    ="stdu";
24         $POP    ="ld";
25         $PUSH   ="std";
26 } elsif ($output =~ /32\.s/) {
27         $SIZE_T =4;
28         $STU    ="stwu";
29         $POP    ="lwz";
30         $PUSH   ="stw";
31 } else { die "nonsense $output"; }
32
33 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
34 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
35 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
36 die "can't locate ppc-xlate.pl";
37
38 ( defined shift || open STDOUT,"| $^X $xlate $output" ) ||
39         die "can't call $xlate: $!";
40
41 $FRAME=32*$SIZE_T;
42
43 sub _data_word()
44 { my $i;
45     while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
46 }
47
48 $sp="r1";
49 $toc="r2";
50 $inp="r3";
51 $out="r4";
52 $key="r5";
53
54 $Tbl0="r3";
55 $Tbl1="r6";
56 $Tbl2="r7";
57 $Tbl3="r2";
58
59 $s0="r8";
60 $s1="r9";
61 $s2="r10";
62 $s3="r11";
63
64 $t0="r12";
65 $t1="r13";
66 $t2="r14";
67 $t3="r15";
68
69 $acc00="r16";
70 $acc01="r17";
71 $acc02="r18";
72 $acc03="r19";
73
74 $acc04="r20";
75 $acc05="r21";
76 $acc06="r22";
77 $acc07="r23";
78
79 $acc08="r24";
80 $acc09="r25";
81 $acc10="r26";
82 $acc11="r27";
83
84 $acc12="r28";
85 $acc13="r29";
86 $acc14="r30";
87 $acc15="r31";
88
89 # stay away from TLS pointer
90 if ($SIZE_T==8) { $t1="r0";             }
91 else            { $Tbl3=$t0; $t="r0";   }
92 $mask80=$Tbl2;
93 $mask1b=$Tbl3;
94
95 $code.=<<___;
96 .text
97
98 .align  7
99 LAES_Te:
100         mflr    r0
101         bcl     20,31,\$+4
102         mflr    $Tbl0   ;    vvvvv "distance" between . and 1st data entry
103         addi    $Tbl0,$Tbl0,`128-8`
104         mtlr    r0
105         blr
106         .space  `32-24`
107 LAES_Td:
108         mflr    r0
109         bcl     20,31,\$+4
110         mflr    $Tbl0   ;    vvvvvvvv "distance" between . and 1st data entry
111         addi    $Tbl0,$Tbl0,`128-8-32+2048+256`
112         mtlr    r0
113         blr
114         .space  `128-32-24`
115 ___
116 &_data_word(
117         0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
118         0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
119         0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
120         0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
121         0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
122         0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
123         0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
124         0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
125         0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
126         0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
127         0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
128         0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
129         0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
130         0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
131         0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
132         0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
133         0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
134         0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
135         0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
136         0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
137         0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
138         0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
139         0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
140         0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
141         0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
142         0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
143         0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
144         0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
145         0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
146         0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
147         0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
148         0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
149         0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
150         0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
151         0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
152         0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
153         0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
154         0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
155         0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
156         0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
157         0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
158         0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
159         0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
160         0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
161         0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
162         0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
163         0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
164         0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
165         0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
166         0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
167         0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
168         0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
169         0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
170         0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
171         0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
172         0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
173         0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
174         0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
175         0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
176         0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
177         0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
178         0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
179         0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
180         0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
181 $code.=<<___;
182 .byte   0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
183 .byte   0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
184 .byte   0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
185 .byte   0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
186 .byte   0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
187 .byte   0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
188 .byte   0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
189 .byte   0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
190 .byte   0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
191 .byte   0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
192 .byte   0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
193 .byte   0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
194 .byte   0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
195 .byte   0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
196 .byte   0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
197 .byte   0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
198 .byte   0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
199 .byte   0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
200 .byte   0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
201 .byte   0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
202 .byte   0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
203 .byte   0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
204 .byte   0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
205 .byte   0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
206 .byte   0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
207 .byte   0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
208 .byte   0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
209 .byte   0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
210 .byte   0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
211 .byte   0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
212 .byte   0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
213 .byte   0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
214 ___
215 &_data_word(
216         0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
217         0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
218         0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
219         0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
220         0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
221         0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
222         0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
223         0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
224         0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
225         0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
226         0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
227         0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
228         0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
229         0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
230         0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
231         0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
232         0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
233         0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
234         0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
235         0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
236         0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
237         0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
238         0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
239         0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
240         0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
241         0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
242         0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
243         0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
244         0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
245         0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
246         0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
247         0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
248         0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
249         0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
250         0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
251         0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
252         0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
253         0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
254         0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
255         0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
256         0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
257         0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
258         0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
259         0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
260         0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
261         0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
262         0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
263         0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
264         0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
265         0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
266         0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
267         0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
268         0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
269         0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
270         0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
271         0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
272         0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
273         0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
274         0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
275         0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
276         0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
277         0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
278         0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
279         0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
280 $code.=<<___;
281 .byte   0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
282 .byte   0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
283 .byte   0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
284 .byte   0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
285 .byte   0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
286 .byte   0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
287 .byte   0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
288 .byte   0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
289 .byte   0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
290 .byte   0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
291 .byte   0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
292 .byte   0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
293 .byte   0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
294 .byte   0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
295 .byte   0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
296 .byte   0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
297 .byte   0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
298 .byte   0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
299 .byte   0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
300 .byte   0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
301 .byte   0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
302 .byte   0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
303 .byte   0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
304 .byte   0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
305 .byte   0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
306 .byte   0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
307 .byte   0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
308 .byte   0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
309 .byte   0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
310 .byte   0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
311 .byte   0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
312 .byte   0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
313
314
315 .globl  .AES_encrypt
316 .align  7
317 .AES_encrypt:
318         mflr    r0
319         $STU    $sp,-$FRAME($sp)
320
321         $PUSH   r0,`$FRAME-$SIZE_T*21`($sp)
322         $PUSH   $toc,`$FRAME-$SIZE_T*20`($sp)
323         $PUSH   r13,`$FRAME-$SIZE_T*19`($sp)
324         $PUSH   r14,`$FRAME-$SIZE_T*18`($sp)
325         $PUSH   r15,`$FRAME-$SIZE_T*17`($sp)
326         $PUSH   r16,`$FRAME-$SIZE_T*16`($sp)
327         $PUSH   r17,`$FRAME-$SIZE_T*15`($sp)
328         $PUSH   r18,`$FRAME-$SIZE_T*14`($sp)
329         $PUSH   r19,`$FRAME-$SIZE_T*13`($sp)
330         $PUSH   r20,`$FRAME-$SIZE_T*12`($sp)
331         $PUSH   r21,`$FRAME-$SIZE_T*11`($sp)
332         $PUSH   r22,`$FRAME-$SIZE_T*10`($sp)
333         $PUSH   r23,`$FRAME-$SIZE_T*9`($sp)
334         $PUSH   r24,`$FRAME-$SIZE_T*8`($sp)
335         $PUSH   r25,`$FRAME-$SIZE_T*7`($sp)
336         $PUSH   r26,`$FRAME-$SIZE_T*6`($sp)
337         $PUSH   r27,`$FRAME-$SIZE_T*5`($sp)
338         $PUSH   r28,`$FRAME-$SIZE_T*4`($sp)
339         $PUSH   r29,`$FRAME-$SIZE_T*3`($sp)
340         $PUSH   r30,`$FRAME-$SIZE_T*2`($sp)
341         $PUSH   r31,`$FRAME-$SIZE_T*1`($sp)
342
343         lwz     $s0,0($inp)
344         lwz     $s1,4($inp)
345         lwz     $s2,8($inp)
346         lwz     $s3,12($inp)
347         bl      LAES_Te
348         bl      Lppc_AES_encrypt_compact
349         stw     $s0,0($out)
350         stw     $s1,4($out)
351         stw     $s2,8($out)
352         stw     $s3,12($out)
353
354         $POP    r0,`$FRAME-$SIZE_T*21`($sp)
355         $POP    $toc,`$FRAME-$SIZE_T*20`($sp)
356         $POP    r13,`$FRAME-$SIZE_T*19`($sp)
357         $POP    r14,`$FRAME-$SIZE_T*18`($sp)
358         $POP    r15,`$FRAME-$SIZE_T*17`($sp)
359         $POP    r16,`$FRAME-$SIZE_T*16`($sp)
360         $POP    r17,`$FRAME-$SIZE_T*15`($sp)
361         $POP    r18,`$FRAME-$SIZE_T*14`($sp)
362         $POP    r19,`$FRAME-$SIZE_T*13`($sp)
363         $POP    r20,`$FRAME-$SIZE_T*12`($sp)
364         $POP    r21,`$FRAME-$SIZE_T*11`($sp)
365         $POP    r22,`$FRAME-$SIZE_T*10`($sp)
366         $POP    r23,`$FRAME-$SIZE_T*9`($sp)
367         $POP    r24,`$FRAME-$SIZE_T*8`($sp)
368         $POP    r25,`$FRAME-$SIZE_T*7`($sp)
369         $POP    r26,`$FRAME-$SIZE_T*6`($sp)
370         $POP    r27,`$FRAME-$SIZE_T*5`($sp)
371         $POP    r28,`$FRAME-$SIZE_T*4`($sp)
372         $POP    r29,`$FRAME-$SIZE_T*3`($sp)
373         $POP    r30,`$FRAME-$SIZE_T*2`($sp)
374         $POP    r31,`$FRAME-$SIZE_T*1`($sp)
375         mtlr    r0
376         addi    $sp,$sp,$FRAME
377         blr
378
379 .align  4
380 Lppc_AES_encrypt:
381         lwz     $acc00,240($key)
382         lwz     $t0,0($key)
383         lwz     $t1,4($key)
384         lwz     $t2,8($key)
385         lwz     $t3,12($key)
386         addi    $Tbl1,$Tbl0,3
387         addi    $Tbl2,$Tbl0,2
388         addi    $Tbl3,$Tbl0,1
389         addi    $acc00,$acc00,-1
390         addi    $key,$key,16
391         xor     $s0,$s0,$t0
392         xor     $s1,$s1,$t1
393         xor     $s2,$s2,$t2
394         xor     $s3,$s3,$t3
395         mtctr   $acc00
396 .align  4
397 Lenc_loop:
398         rlwinm  $acc00,$s0,`32-24+3`,21,28
399         rlwinm  $acc01,$s1,`32-24+3`,21,28
400         rlwinm  $acc02,$s2,`32-24+3`,21,28
401         rlwinm  $acc03,$s3,`32-24+3`,21,28
402         lwz     $t0,0($key)
403         lwz     $t1,4($key)
404         lwz     $t2,8($key)
405         lwz     $t3,12($key)
406         rlwinm  $acc04,$s1,`32-16+3`,21,28
407         rlwinm  $acc05,$s2,`32-16+3`,21,28
408         rlwinm  $acc06,$s3,`32-16+3`,21,28
409         rlwinm  $acc07,$s0,`32-16+3`,21,28
410         lwzx    $acc00,$Tbl0,$acc00
411         lwzx    $acc01,$Tbl0,$acc01
412         lwzx    $acc02,$Tbl0,$acc02
413         lwzx    $acc03,$Tbl0,$acc03
414         rlwinm  $acc08,$s2,`32-8+3`,21,28
415         rlwinm  $acc09,$s3,`32-8+3`,21,28
416         rlwinm  $acc10,$s0,`32-8+3`,21,28
417         rlwinm  $acc11,$s1,`32-8+3`,21,28
418         lwzx    $acc04,$Tbl1,$acc04
419         lwzx    $acc05,$Tbl1,$acc05
420         lwzx    $acc06,$Tbl1,$acc06
421         lwzx    $acc07,$Tbl1,$acc07
422         rlwinm  $acc12,$s3,`0+3`,21,28
423         rlwinm  $acc13,$s0,`0+3`,21,28
424         rlwinm  $acc14,$s1,`0+3`,21,28
425         rlwinm  $acc15,$s2,`0+3`,21,28
426         lwzx    $acc08,$Tbl2,$acc08
427         lwzx    $acc09,$Tbl2,$acc09
428         lwzx    $acc10,$Tbl2,$acc10
429         lwzx    $acc11,$Tbl2,$acc11
430         xor     $t0,$t0,$acc00
431         xor     $t1,$t1,$acc01
432         xor     $t2,$t2,$acc02
433         xor     $t3,$t3,$acc03
434         lwzx    $acc12,$Tbl3,$acc12
435         lwzx    $acc13,$Tbl3,$acc13
436         lwzx    $acc14,$Tbl3,$acc14
437         lwzx    $acc15,$Tbl3,$acc15
438         xor     $t0,$t0,$acc04
439         xor     $t1,$t1,$acc05
440         xor     $t2,$t2,$acc06
441         xor     $t3,$t3,$acc07
442         xor     $t0,$t0,$acc08
443         xor     $t1,$t1,$acc09
444         xor     $t2,$t2,$acc10
445         xor     $t3,$t3,$acc11
446         xor     $s0,$t0,$acc12
447         xor     $s1,$t1,$acc13
448         xor     $s2,$t2,$acc14
449         xor     $s3,$t3,$acc15
450         addi    $key,$key,16
451         bdnz-   Lenc_loop
452
453         addi    $Tbl2,$Tbl0,2048
454         nop
455         lwz     $acc08,`2048+0`($Tbl0)  ! prefetch Te4
456         lwz     $acc09,`2048+32`($Tbl0)
457         lwz     $acc10,`2048+64`($Tbl0)
458         lwz     $acc11,`2048+96`($Tbl0)
459         lwz     $acc08,`2048+128`($Tbl0)
460         lwz     $acc09,`2048+160`($Tbl0)
461         lwz     $acc10,`2048+192`($Tbl0)
462         lwz     $acc11,`2048+224`($Tbl0)
463         rlwinm  $acc00,$s0,`32-24`,24,31
464         rlwinm  $acc01,$s1,`32-24`,24,31
465         rlwinm  $acc02,$s2,`32-24`,24,31
466         rlwinm  $acc03,$s3,`32-24`,24,31
467         lwz     $t0,0($key)
468         lwz     $t1,4($key)
469         lwz     $t2,8($key)
470         lwz     $t3,12($key)
471         rlwinm  $acc04,$s1,`32-16`,24,31
472         rlwinm  $acc05,$s2,`32-16`,24,31
473         rlwinm  $acc06,$s3,`32-16`,24,31
474         rlwinm  $acc07,$s0,`32-16`,24,31
475         lbzx    $acc00,$Tbl2,$acc00
476         lbzx    $acc01,$Tbl2,$acc01
477         lbzx    $acc02,$Tbl2,$acc02
478         lbzx    $acc03,$Tbl2,$acc03
479         rlwinm  $acc08,$s2,`32-8`,24,31
480         rlwinm  $acc09,$s3,`32-8`,24,31
481         rlwinm  $acc10,$s0,`32-8`,24,31
482         rlwinm  $acc11,$s1,`32-8`,24,31
483         lbzx    $acc04,$Tbl2,$acc04
484         lbzx    $acc05,$Tbl2,$acc05
485         lbzx    $acc06,$Tbl2,$acc06
486         lbzx    $acc07,$Tbl2,$acc07
487         rlwinm  $acc12,$s3,`0`,24,31
488         rlwinm  $acc13,$s0,`0`,24,31
489         rlwinm  $acc14,$s1,`0`,24,31
490         rlwinm  $acc15,$s2,`0`,24,31
491         lbzx    $acc08,$Tbl2,$acc08
492         lbzx    $acc09,$Tbl2,$acc09
493         lbzx    $acc10,$Tbl2,$acc10
494         lbzx    $acc11,$Tbl2,$acc11
495         rlwinm  $s0,$acc00,24,0,7
496         rlwinm  $s1,$acc01,24,0,7
497         rlwinm  $s2,$acc02,24,0,7
498         rlwinm  $s3,$acc03,24,0,7
499         lbzx    $acc12,$Tbl2,$acc12
500         lbzx    $acc13,$Tbl2,$acc13
501         lbzx    $acc14,$Tbl2,$acc14
502         lbzx    $acc15,$Tbl2,$acc15
503         rlwimi  $s0,$acc04,16,8,15
504         rlwimi  $s1,$acc05,16,8,15
505         rlwimi  $s2,$acc06,16,8,15
506         rlwimi  $s3,$acc07,16,8,15
507         rlwimi  $s0,$acc08,8,16,23
508         rlwimi  $s1,$acc09,8,16,23
509         rlwimi  $s2,$acc10,8,16,23
510         rlwimi  $s3,$acc11,8,16,23
511         or      $s0,$s0,$acc12
512         or      $s1,$s1,$acc13
513         or      $s2,$s2,$acc14
514         or      $s3,$s3,$acc15
515         xor     $s0,$s0,$t0
516         xor     $s1,$s1,$t1
517         xor     $s2,$s2,$t2
518         xor     $s3,$s3,$t3
519         blr
520
521 .align  4
522 Lppc_AES_encrypt_compact:
523         lwz     $acc00,240($key)
524         lwz     $t0,0($key)
525         lwz     $t1,4($key)
526         lwz     $t2,8($key)
527         lwz     $t3,12($key)
528         addi    $Tbl1,$Tbl0,2048
529         lis     $mask80,0x8080
530         lis     $mask1b,0x1b1b
531         addi    $acc00,$acc00,-1
532         addi    $key,$key,16
533         ori     $mask80,$mask80,0x8080
534         ori     $mask1b,$mask1b,0x1b1b
535         xor     $s0,$s0,$t0
536         xor     $s1,$s1,$t1
537         xor     $s2,$s2,$t2
538         xor     $s3,$s3,$t3
539         mtctr   $acc00
540 .align  4
541 Lenc_compact_loop:
542         rlwinm  $acc00,$s0,`32-24`,24,31
543         rlwinm  $acc01,$s1,`32-24`,24,31
544         rlwinm  $acc02,$s2,`32-24`,24,31
545         rlwinm  $acc03,$s3,`32-24`,24,31
546         lwz     $t0,0($key)
547         lwz     $t1,4($key)
548         lwz     $t2,8($key)
549         lwz     $t3,12($key)
550         rlwinm  $acc04,$s1,`32-16`,24,31
551         rlwinm  $acc05,$s2,`32-16`,24,31
552         rlwinm  $acc06,$s3,`32-16`,24,31
553         rlwinm  $acc07,$s0,`32-16`,24,31
554         lbzx    $acc00,$Tbl1,$acc00
555         lbzx    $acc01,$Tbl1,$acc01
556         lbzx    $acc02,$Tbl1,$acc02
557         lbzx    $acc03,$Tbl1,$acc03
558         rlwinm  $acc08,$s2,`32-8`,24,31
559         rlwinm  $acc09,$s3,`32-8`,24,31
560         rlwinm  $acc10,$s0,`32-8`,24,31
561         rlwinm  $acc11,$s1,`32-8`,24,31
562         lbzx    $acc04,$Tbl1,$acc04
563         lbzx    $acc05,$Tbl1,$acc05
564         lbzx    $acc06,$Tbl1,$acc06
565         lbzx    $acc07,$Tbl1,$acc07
566         rlwinm  $acc12,$s3,`0`,24,31
567         rlwinm  $acc13,$s0,`0`,24,31
568         rlwinm  $acc14,$s1,`0`,24,31
569         rlwinm  $acc15,$s2,`0`,24,31
570         lbzx    $acc08,$Tbl1,$acc08
571         lbzx    $acc09,$Tbl1,$acc09
572         lbzx    $acc10,$Tbl1,$acc10
573         lbzx    $acc11,$Tbl1,$acc11
574         rlwinm  $s0,$acc00,24,0,7
575         rlwinm  $s1,$acc01,24,0,7
576         rlwinm  $s2,$acc02,24,0,7
577         rlwinm  $s3,$acc03,24,0,7
578         lbzx    $acc12,$Tbl1,$acc12
579         lbzx    $acc13,$Tbl1,$acc13
580         lbzx    $acc14,$Tbl1,$acc14
581         lbzx    $acc15,$Tbl1,$acc15
582         rlwimi  $s0,$acc04,16,8,15
583         rlwimi  $s1,$acc05,16,8,15
584         rlwimi  $s2,$acc06,16,8,15
585         rlwimi  $s3,$acc07,16,8,15
586         rlwimi  $s0,$acc08,8,16,23
587         rlwimi  $s1,$acc09,8,16,23
588         rlwimi  $s2,$acc10,8,16,23
589         rlwimi  $s3,$acc11,8,16,23
590         or      $s0,$s0,$acc12
591         or      $s1,$s1,$acc13
592         or      $s2,$s2,$acc14
593         or      $s3,$s3,$acc15
594
595         and     $acc00,$s0,$mask80      # r1=r0&0x80808080
596         and     $acc01,$s1,$mask80
597         and     $acc02,$s2,$mask80
598         and     $acc03,$s3,$mask80
599         srwi    $acc04,$acc00,7         # r1>>7
600         srwi    $acc05,$acc01,7
601         srwi    $acc06,$acc02,7
602         srwi    $acc07,$acc03,7
603         andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
604         andc    $acc09,$s1,$mask80
605         andc    $acc10,$s2,$mask80
606         andc    $acc11,$s3,$mask80
607         sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
608         sub     $acc01,$acc01,$acc05
609         sub     $acc02,$acc02,$acc06
610         sub     $acc03,$acc03,$acc07
611         add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
612         add     $acc09,$acc09,$acc09
613         add     $acc10,$acc10,$acc10
614         add     $acc11,$acc11,$acc11
615         and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
616         and     $acc01,$acc01,$mask1b
617         and     $acc02,$acc02,$mask1b
618         and     $acc03,$acc03,$mask1b
619         xor     $acc00,$acc00,$acc08    # r2
620         xor     $acc01,$acc01,$acc09
621         xor     $acc02,$acc02,$acc10
622         xor     $acc03,$acc03,$acc11
623
624         rotlwi  $acc12,$s0,16           # ROTATE(r0,16)
625         rotlwi  $acc13,$s1,16
626         rotlwi  $acc14,$s2,16
627         rotlwi  $acc15,$s3,16
628         xor     $s0,$s0,$acc00          # r0^r2
629         xor     $s1,$s1,$acc01
630         xor     $s2,$s2,$acc02
631         xor     $s3,$s3,$acc03
632         rotrwi  $s0,$s0,24              # ROTATE(r2^r0,24)
633         rotrwi  $s1,$s1,24
634         rotrwi  $s2,$s2,24
635         rotrwi  $s3,$s3,24
636         xor     $s0,$s0,$acc00          # ROTATE(r2^r0,24)^r2
637         xor     $s1,$s1,$acc01
638         xor     $s2,$s2,$acc02
639         xor     $s3,$s3,$acc03
640         rotlwi  $acc08,$acc12,8         # ROTATE(r0,24)
641         rotlwi  $acc09,$acc13,8
642         rotlwi  $acc10,$acc14,8
643         rotlwi  $acc11,$acc15,8
644         xor     $s0,$s0,$acc12          #
645         xor     $s1,$s1,$acc13
646         xor     $s2,$s2,$acc14
647         xor     $s3,$s3,$acc15
648         xor     $s0,$s0,$acc08          #
649         xor     $s1,$s1,$acc09
650         xor     $s2,$s2,$acc10
651         xor     $s3,$s3,$acc11
652
653         xor     $s0,$s0,$t0
654         xor     $s1,$s1,$t1
655         xor     $s2,$s2,$t2
656         xor     $s3,$s3,$t3
657
658         addi    $key,$key,16
659         bdnz-   Lenc_compact_loop
660
661         rlwinm  $acc00,$s0,`32-24`,24,31
662         rlwinm  $acc01,$s1,`32-24`,24,31
663         rlwinm  $acc02,$s2,`32-24`,24,31
664         rlwinm  $acc03,$s3,`32-24`,24,31
665         lwz     $t0,0($key)
666         lwz     $t1,4($key)
667         lwz     $t2,8($key)
668         lwz     $t3,12($key)
669         rlwinm  $acc04,$s1,`32-16`,24,31
670         rlwinm  $acc05,$s2,`32-16`,24,31
671         rlwinm  $acc06,$s3,`32-16`,24,31
672         rlwinm  $acc07,$s0,`32-16`,24,31
673         lbzx    $acc00,$Tbl1,$acc00
674         lbzx    $acc01,$Tbl1,$acc01
675         lbzx    $acc02,$Tbl1,$acc02
676         lbzx    $acc03,$Tbl1,$acc03
677         rlwinm  $acc08,$s2,`32-8`,24,31
678         rlwinm  $acc09,$s3,`32-8`,24,31
679         rlwinm  $acc10,$s0,`32-8`,24,31
680         rlwinm  $acc11,$s1,`32-8`,24,31
681         lbzx    $acc04,$Tbl1,$acc04
682         lbzx    $acc05,$Tbl1,$acc05
683         lbzx    $acc06,$Tbl1,$acc06
684         lbzx    $acc07,$Tbl1,$acc07
685         rlwinm  $acc12,$s3,`0`,24,31
686         rlwinm  $acc13,$s0,`0`,24,31
687         rlwinm  $acc14,$s1,`0`,24,31
688         rlwinm  $acc15,$s2,`0`,24,31
689         lbzx    $acc08,$Tbl1,$acc08
690         lbzx    $acc09,$Tbl1,$acc09
691         lbzx    $acc10,$Tbl1,$acc10
692         lbzx    $acc11,$Tbl1,$acc11
693         rlwinm  $s0,$acc00,24,0,7
694         rlwinm  $s1,$acc01,24,0,7
695         rlwinm  $s2,$acc02,24,0,7
696         rlwinm  $s3,$acc03,24,0,7
697         lbzx    $acc12,$Tbl1,$acc12
698         lbzx    $acc13,$Tbl1,$acc13
699         lbzx    $acc14,$Tbl1,$acc14
700         lbzx    $acc15,$Tbl1,$acc15
701         rlwimi  $s0,$acc04,16,8,15
702         rlwimi  $s1,$acc05,16,8,15
703         rlwimi  $s2,$acc06,16,8,15
704         rlwimi  $s3,$acc07,16,8,15
705         rlwimi  $s0,$acc08,8,16,23
706         rlwimi  $s1,$acc09,8,16,23
707         rlwimi  $s2,$acc10,8,16,23
708         rlwimi  $s3,$acc11,8,16,23
709         or      $s0,$s0,$acc12
710         or      $s1,$s1,$acc13
711         or      $s2,$s2,$acc14
712         or      $s3,$s3,$acc15
713         xor     $s0,$s0,$t0
714         xor     $s1,$s1,$t1
715         xor     $s2,$s2,$t2
716         xor     $s3,$s3,$t3
717         blr
718
719 .globl  .AES_decrypt
720 .align  7
721 .AES_decrypt:
722         mflr    r0
723         $STU    $sp,-$FRAME($sp)
724
725         $PUSH   r0,`$FRAME-$SIZE_T*21`($sp)
726         $PUSH   $toc,`$FRAME-$SIZE_T*20`($sp)
727         $PUSH   r13,`$FRAME-$SIZE_T*19`($sp)
728         $PUSH   r14,`$FRAME-$SIZE_T*18`($sp)
729         $PUSH   r15,`$FRAME-$SIZE_T*17`($sp)
730         $PUSH   r16,`$FRAME-$SIZE_T*16`($sp)
731         $PUSH   r17,`$FRAME-$SIZE_T*15`($sp)
732         $PUSH   r18,`$FRAME-$SIZE_T*14`($sp)
733         $PUSH   r19,`$FRAME-$SIZE_T*13`($sp)
734         $PUSH   r20,`$FRAME-$SIZE_T*12`($sp)
735         $PUSH   r21,`$FRAME-$SIZE_T*11`($sp)
736         $PUSH   r22,`$FRAME-$SIZE_T*10`($sp)
737         $PUSH   r23,`$FRAME-$SIZE_T*9`($sp)
738         $PUSH   r24,`$FRAME-$SIZE_T*8`($sp)
739         $PUSH   r25,`$FRAME-$SIZE_T*7`($sp)
740         $PUSH   r26,`$FRAME-$SIZE_T*6`($sp)
741         $PUSH   r27,`$FRAME-$SIZE_T*5`($sp)
742         $PUSH   r28,`$FRAME-$SIZE_T*4`($sp)
743         $PUSH   r29,`$FRAME-$SIZE_T*3`($sp)
744         $PUSH   r30,`$FRAME-$SIZE_T*2`($sp)
745         $PUSH   r31,`$FRAME-$SIZE_T*1`($sp)
746
747         lwz     $s0,0($inp)
748         lwz     $s1,4($inp)
749         lwz     $s2,8($inp)
750         lwz     $s3,12($inp)
751         bl      LAES_Td
752         bl      Lppc_AES_decrypt_compact
753         stw     $s0,0($out)
754         stw     $s1,4($out)
755         stw     $s2,8($out)
756         stw     $s3,12($out)
757
758         $POP    r0,`$FRAME-$SIZE_T*21`($sp)
759         $POP    $toc,`$FRAME-$SIZE_T*20`($sp)
760         $POP    r13,`$FRAME-$SIZE_T*19`($sp)
761         $POP    r14,`$FRAME-$SIZE_T*18`($sp)
762         $POP    r15,`$FRAME-$SIZE_T*17`($sp)
763         $POP    r16,`$FRAME-$SIZE_T*16`($sp)
764         $POP    r17,`$FRAME-$SIZE_T*15`($sp)
765         $POP    r18,`$FRAME-$SIZE_T*14`($sp)
766         $POP    r19,`$FRAME-$SIZE_T*13`($sp)
767         $POP    r20,`$FRAME-$SIZE_T*12`($sp)
768         $POP    r21,`$FRAME-$SIZE_T*11`($sp)
769         $POP    r22,`$FRAME-$SIZE_T*10`($sp)
770         $POP    r23,`$FRAME-$SIZE_T*9`($sp)
771         $POP    r24,`$FRAME-$SIZE_T*8`($sp)
772         $POP    r25,`$FRAME-$SIZE_T*7`($sp)
773         $POP    r26,`$FRAME-$SIZE_T*6`($sp)
774         $POP    r27,`$FRAME-$SIZE_T*5`($sp)
775         $POP    r28,`$FRAME-$SIZE_T*4`($sp)
776         $POP    r29,`$FRAME-$SIZE_T*3`($sp)
777         $POP    r30,`$FRAME-$SIZE_T*2`($sp)
778         $POP    r31,`$FRAME-$SIZE_T*1`($sp)
779         mtlr    r0
780         addi    $sp,$sp,$FRAME
781         blr
782
783 .align  4
784 Lppc_AES_decrypt:
785         lwz     $acc00,240($key)
786         lwz     $t0,0($key)
787         lwz     $t1,4($key)
788         lwz     $t2,8($key)
789         lwz     $t3,12($key)
790         addi    $Tbl1,$Tbl0,3
791         addi    $Tbl2,$Tbl0,2
792         addi    $Tbl3,$Tbl0,1
793         addi    $acc00,$acc00,-1
794         addi    $key,$key,16
795         xor     $s0,$s0,$t0
796         xor     $s1,$s1,$t1
797         xor     $s2,$s2,$t2
798         xor     $s3,$s3,$t3
799         mtctr   $acc00
800 .align  4
801 Ldec_loop:
802         rlwinm  $acc00,$s0,`32-24+3`,21,28
803         rlwinm  $acc01,$s1,`32-24+3`,21,28
804         rlwinm  $acc02,$s2,`32-24+3`,21,28
805         rlwinm  $acc03,$s3,`32-24+3`,21,28
806         lwz     $t0,0($key)
807         lwz     $t1,4($key)
808         lwz     $t2,8($key)
809         lwz     $t3,12($key)
810         rlwinm  $acc04,$s3,`32-16+3`,21,28
811         rlwinm  $acc05,$s0,`32-16+3`,21,28
812         rlwinm  $acc06,$s1,`32-16+3`,21,28
813         rlwinm  $acc07,$s2,`32-16+3`,21,28
814         lwzx    $acc00,$Tbl0,$acc00
815         lwzx    $acc01,$Tbl0,$acc01
816         lwzx    $acc02,$Tbl0,$acc02
817         lwzx    $acc03,$Tbl0,$acc03
818         rlwinm  $acc08,$s2,`32-8+3`,21,28
819         rlwinm  $acc09,$s3,`32-8+3`,21,28
820         rlwinm  $acc10,$s0,`32-8+3`,21,28
821         rlwinm  $acc11,$s1,`32-8+3`,21,28
822         lwzx    $acc04,$Tbl1,$acc04
823         lwzx    $acc05,$Tbl1,$acc05
824         lwzx    $acc06,$Tbl1,$acc06
825         lwzx    $acc07,$Tbl1,$acc07
826         rlwinm  $acc12,$s1,`0+3`,21,28
827         rlwinm  $acc13,$s2,`0+3`,21,28
828         rlwinm  $acc14,$s3,`0+3`,21,28
829         rlwinm  $acc15,$s0,`0+3`,21,28
830         lwzx    $acc08,$Tbl2,$acc08
831         lwzx    $acc09,$Tbl2,$acc09
832         lwzx    $acc10,$Tbl2,$acc10
833         lwzx    $acc11,$Tbl2,$acc11
834         xor     $t0,$t0,$acc00
835         xor     $t1,$t1,$acc01
836         xor     $t2,$t2,$acc02
837         xor     $t3,$t3,$acc03
838         lwzx    $acc12,$Tbl3,$acc12
839         lwzx    $acc13,$Tbl3,$acc13
840         lwzx    $acc14,$Tbl3,$acc14
841         lwzx    $acc15,$Tbl3,$acc15
842         xor     $t0,$t0,$acc04
843         xor     $t1,$t1,$acc05
844         xor     $t2,$t2,$acc06
845         xor     $t3,$t3,$acc07
846         xor     $t0,$t0,$acc08
847         xor     $t1,$t1,$acc09
848         xor     $t2,$t2,$acc10
849         xor     $t3,$t3,$acc11
850         xor     $s0,$t0,$acc12
851         xor     $s1,$t1,$acc13
852         xor     $s2,$t2,$acc14
853         xor     $s3,$t3,$acc15
854         addi    $key,$key,16
855         bdnz-   Ldec_loop
856
857         addi    $Tbl2,$Tbl0,2048
858         nop
859         lwz     $acc08,`2048+0`($Tbl0)  ! prefetch Td4
860         lwz     $acc09,`2048+32`($Tbl0)
861         lwz     $acc10,`2048+64`($Tbl0)
862         lwz     $acc11,`2048+96`($Tbl0)
863         lwz     $acc08,`2048+128`($Tbl0)
864         lwz     $acc09,`2048+160`($Tbl0)
865         lwz     $acc10,`2048+192`($Tbl0)
866         lwz     $acc11,`2048+224`($Tbl0)
867         rlwinm  $acc00,$s0,`32-24`,24,31
868         rlwinm  $acc01,$s1,`32-24`,24,31
869         rlwinm  $acc02,$s2,`32-24`,24,31
870         rlwinm  $acc03,$s3,`32-24`,24,31
871         lwz     $t0,0($key)
872         lwz     $t1,4($key)
873         lwz     $t2,8($key)
874         lwz     $t3,12($key)
875         rlwinm  $acc04,$s3,`32-16`,24,31
876         rlwinm  $acc05,$s0,`32-16`,24,31
877         rlwinm  $acc06,$s1,`32-16`,24,31
878         rlwinm  $acc07,$s2,`32-16`,24,31
879         lbzx    $acc00,$Tbl2,$acc00
880         lbzx    $acc01,$Tbl2,$acc01
881         lbzx    $acc02,$Tbl2,$acc02
882         lbzx    $acc03,$Tbl2,$acc03
883         rlwinm  $acc08,$s2,`32-8`,24,31
884         rlwinm  $acc09,$s3,`32-8`,24,31
885         rlwinm  $acc10,$s0,`32-8`,24,31
886         rlwinm  $acc11,$s1,`32-8`,24,31
887         lbzx    $acc04,$Tbl2,$acc04
888         lbzx    $acc05,$Tbl2,$acc05
889         lbzx    $acc06,$Tbl2,$acc06
890         lbzx    $acc07,$Tbl2,$acc07
891         rlwinm  $acc12,$s1,`0`,24,31
892         rlwinm  $acc13,$s2,`0`,24,31
893         rlwinm  $acc14,$s3,`0`,24,31
894         rlwinm  $acc15,$s0,`0`,24,31
895         lbzx    $acc08,$Tbl2,$acc08
896         lbzx    $acc09,$Tbl2,$acc09
897         lbzx    $acc10,$Tbl2,$acc10
898         lbzx    $acc11,$Tbl2,$acc11
899         rlwinm  $s0,$acc00,24,0,7
900         rlwinm  $s1,$acc01,24,0,7
901         rlwinm  $s2,$acc02,24,0,7
902         rlwinm  $s3,$acc03,24,0,7
903         lbzx    $acc12,$Tbl2,$acc12
904         lbzx    $acc13,$Tbl2,$acc13
905         lbzx    $acc14,$Tbl2,$acc14
906         lbzx    $acc15,$Tbl2,$acc15
907         rlwimi  $s0,$acc04,16,8,15
908         rlwimi  $s1,$acc05,16,8,15
909         rlwimi  $s2,$acc06,16,8,15
910         rlwimi  $s3,$acc07,16,8,15
911         rlwimi  $s0,$acc08,8,16,23
912         rlwimi  $s1,$acc09,8,16,23
913         rlwimi  $s2,$acc10,8,16,23
914         rlwimi  $s3,$acc11,8,16,23
915         or      $s0,$s0,$acc12
916         or      $s1,$s1,$acc13
917         or      $s2,$s2,$acc14
918         or      $s3,$s3,$acc15
919         xor     $s0,$s0,$t0
920         xor     $s1,$s1,$t1
921         xor     $s2,$s2,$t2
922         xor     $s3,$s3,$t3
923         blr
924
925 .align  4
926 Lppc_AES_decrypt_compact:
927         lwz     $acc00,240($key)
928         lwz     $t0,0($key)
929         lwz     $t1,4($key)
930         lwz     $t2,8($key)
931         lwz     $t3,12($key)
932         addi    $Tbl1,$Tbl0,2048
933         lis     $mask80,0x8080
934         lis     $mask1b,0x1b1b
935         addi    $acc00,$acc00,-1
936         addi    $key,$key,16
937         ori     $mask80,$mask80,0x8080
938         ori     $mask1b,$mask1b,0x1b1b
939         xor     $s0,$s0,$t0
940         xor     $s1,$s1,$t1
941         xor     $s2,$s2,$t2
942         xor     $s3,$s3,$t3
943         mtctr   $acc00
944 .align  4
945 Ldec_compact_loop:
946         rlwinm  $acc00,$s0,`32-24`,24,31
947         rlwinm  $acc01,$s1,`32-24`,24,31
948         rlwinm  $acc02,$s2,`32-24`,24,31
949         rlwinm  $acc03,$s3,`32-24`,24,31
950         lwz     $t0,0($key)
951         lwz     $t1,4($key)
952         lwz     $t2,8($key)
953         lwz     $t3,12($key)
954         rlwinm  $acc04,$s3,`32-16`,24,31
955         rlwinm  $acc05,$s0,`32-16`,24,31
956         rlwinm  $acc06,$s1,`32-16`,24,31
957         rlwinm  $acc07,$s2,`32-16`,24,31
958         lbzx    $acc00,$Tbl1,$acc00
959         lbzx    $acc01,$Tbl1,$acc01
960         lbzx    $acc02,$Tbl1,$acc02
961         lbzx    $acc03,$Tbl1,$acc03
962         rlwinm  $acc08,$s2,`32-8`,24,31
963         rlwinm  $acc09,$s3,`32-8`,24,31
964         rlwinm  $acc10,$s0,`32-8`,24,31
965         rlwinm  $acc11,$s1,`32-8`,24,31
966         lbzx    $acc04,$Tbl1,$acc04
967         lbzx    $acc05,$Tbl1,$acc05
968         lbzx    $acc06,$Tbl1,$acc06
969         lbzx    $acc07,$Tbl1,$acc07
970         rlwinm  $acc12,$s1,`0`,24,31
971         rlwinm  $acc13,$s2,`0`,24,31
972         rlwinm  $acc14,$s3,`0`,24,31
973         rlwinm  $acc15,$s0,`0`,24,31
974         lbzx    $acc08,$Tbl1,$acc08
975         lbzx    $acc09,$Tbl1,$acc09
976         lbzx    $acc10,$Tbl1,$acc10
977         lbzx    $acc11,$Tbl1,$acc11
978         rlwinm  $s0,$acc00,24,0,7
979         rlwinm  $s1,$acc01,24,0,7
980         rlwinm  $s2,$acc02,24,0,7
981         rlwinm  $s3,$acc03,24,0,7
982         lbzx    $acc12,$Tbl1,$acc12
983         lbzx    $acc13,$Tbl1,$acc13
984         lbzx    $acc14,$Tbl1,$acc14
985         lbzx    $acc15,$Tbl1,$acc15
986         rlwimi  $s0,$acc04,16,8,15
987         rlwimi  $s1,$acc05,16,8,15
988         rlwimi  $s2,$acc06,16,8,15
989         rlwimi  $s3,$acc07,16,8,15
990         rlwimi  $s0,$acc08,8,16,23
991         rlwimi  $s1,$acc09,8,16,23
992         rlwimi  $s2,$acc10,8,16,23
993         rlwimi  $s3,$acc11,8,16,23
994         or      $s0,$s0,$acc12
995         or      $s1,$s1,$acc13
996         or      $s2,$s2,$acc14
997         or      $s3,$s3,$acc15
998
999         and     $acc00,$s0,$mask80      # r1=r0&0x80808080
1000         and     $acc01,$s1,$mask80
1001         and     $acc02,$s2,$mask80
1002         and     $acc03,$s3,$mask80
1003         srwi    $acc04,$acc00,7         # r1>>7
1004         srwi    $acc05,$acc01,7
1005         srwi    $acc06,$acc02,7
1006         srwi    $acc07,$acc03,7
1007         andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
1008         andc    $acc09,$s1,$mask80
1009         andc    $acc10,$s2,$mask80
1010         andc    $acc11,$s3,$mask80
1011         sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
1012         sub     $acc01,$acc01,$acc05
1013         sub     $acc02,$acc02,$acc06
1014         sub     $acc03,$acc03,$acc07
1015         add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
1016         add     $acc09,$acc09,$acc09
1017         add     $acc10,$acc10,$acc10
1018         add     $acc11,$acc11,$acc11
1019         and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1020         and     $acc01,$acc01,$mask1b
1021         and     $acc02,$acc02,$mask1b
1022         and     $acc03,$acc03,$mask1b
1023         xor     $acc00,$acc00,$acc08    # r2
1024         xor     $acc01,$acc01,$acc09
1025         xor     $acc02,$acc02,$acc10
1026         xor     $acc03,$acc03,$acc11
1027
1028         and     $acc04,$acc00,$mask80   # r1=r2&0x80808080
1029         and     $acc05,$acc01,$mask80
1030         and     $acc06,$acc02,$mask80
1031         and     $acc07,$acc03,$mask80
1032         srwi    $acc08,$acc04,7         # r1>>7
1033         srwi    $acc09,$acc05,7
1034         srwi    $acc10,$acc06,7
1035         srwi    $acc11,$acc07,7
1036         andc    $acc12,$acc00,$mask80   # r2&0x7f7f7f7f
1037         andc    $acc13,$acc01,$mask80
1038         andc    $acc14,$acc02,$mask80
1039         andc    $acc15,$acc03,$mask80
1040         sub     $acc04,$acc04,$acc08    # r1-(r1>>7)
1041         sub     $acc05,$acc05,$acc09
1042         sub     $acc06,$acc06,$acc10
1043         sub     $acc07,$acc07,$acc11
1044         add     $acc12,$acc12,$acc12    # (r2&0x7f7f7f7f)<<1
1045         add     $acc13,$acc13,$acc13
1046         add     $acc14,$acc14,$acc14
1047         add     $acc15,$acc15,$acc15
1048         and     $acc04,$acc04,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1049         and     $acc05,$acc05,$mask1b
1050         and     $acc06,$acc06,$mask1b
1051         and     $acc07,$acc07,$mask1b
1052         xor     $acc04,$acc04,$acc12    # r4
1053         xor     $acc05,$acc05,$acc13
1054         xor     $acc06,$acc06,$acc14
1055         xor     $acc07,$acc07,$acc15
1056
1057         and     $acc08,$acc04,$mask80   # r1=r4&0x80808080
1058         and     $acc09,$acc05,$mask80
1059         and     $acc10,$acc06,$mask80
1060         and     $acc11,$acc07,$mask80
1061         srwi    $acc12,$acc08,7         # r1>>7
1062         srwi    $acc13,$acc09,7
1063         srwi    $acc14,$acc10,7
1064         srwi    $acc15,$acc11,7
1065         sub     $acc08,$acc08,$acc12    # r1-(r1>>7)
1066         sub     $acc09,$acc09,$acc13
1067         sub     $acc10,$acc10,$acc14
1068         sub     $acc11,$acc11,$acc15
1069         andc    $acc12,$acc04,$mask80   # r4&0x7f7f7f7f
1070         andc    $acc13,$acc05,$mask80
1071         andc    $acc14,$acc06,$mask80
1072         andc    $acc15,$acc07,$mask80
1073         add     $acc12,$acc12,$acc12    # (r4&0x7f7f7f7f)<<1
1074         add     $acc13,$acc13,$acc13
1075         add     $acc14,$acc14,$acc14
1076         add     $acc15,$acc15,$acc15
1077         and     $acc08,$acc08,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1078         and     $acc09,$acc09,$mask1b
1079         and     $acc10,$acc10,$mask1b
1080         and     $acc11,$acc11,$mask1b
1081         xor     $acc08,$acc08,$acc12    # r8
1082         xor     $acc09,$acc09,$acc13
1083         xor     $acc10,$acc10,$acc14
1084         xor     $acc11,$acc11,$acc15
1085
1086         xor     $acc00,$acc00,$s0       # r2^r0
1087         xor     $acc01,$acc01,$s1
1088         xor     $acc02,$acc02,$s2
1089         xor     $acc03,$acc03,$s3
1090         xor     $acc04,$acc04,$s0       # r4^r0
1091         xor     $acc05,$acc05,$s1
1092         xor     $acc06,$acc06,$s2
1093         xor     $acc07,$acc07,$s3
1094         rotrwi  $s0,$s0,8               # = ROTATE(r0,8)
1095         rotrwi  $s1,$s1,8
1096         rotrwi  $s2,$s2,8
1097         rotrwi  $s3,$s3,8
1098         xor     $s0,$s0,$acc00          # ^= r2^r0
1099         xor     $s1,$s1,$acc01
1100         xor     $s2,$s2,$acc02
1101         xor     $s3,$s3,$acc03
1102         xor     $acc00,$acc00,$acc08
1103         xor     $acc01,$acc01,$acc09
1104         xor     $acc02,$acc02,$acc10
1105         xor     $acc03,$acc03,$acc11
1106         xor     $s0,$s0,$acc04          # ^= r4^r0
1107         xor     $s1,$s1,$acc05
1108         xor     $s2,$s2,$acc06
1109         xor     $s3,$s3,$acc07
1110         rotrwi  $acc00,$acc00,24
1111         rotrwi  $acc01,$acc01,24
1112         rotrwi  $acc02,$acc02,24
1113         rotrwi  $acc03,$acc03,24
1114         xor     $acc04,$acc04,$acc08
1115         xor     $acc05,$acc05,$acc09
1116         xor     $acc06,$acc06,$acc10
1117         xor     $acc07,$acc07,$acc11
1118         xor     $s0,$s0,$acc08          # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
1119         xor     $s1,$s1,$acc09
1120         xor     $s2,$s2,$acc10
1121         xor     $s3,$s3,$acc11
1122         rotrwi  $acc04,$acc04,16
1123         rotrwi  $acc05,$acc05,16
1124         rotrwi  $acc06,$acc06,16
1125         rotrwi  $acc07,$acc07,16
1126         xor     $s0,$s0,$acc00          # ^= ROTATE(r8^r2^r0,24)
1127         xor     $s1,$s1,$acc01
1128         xor     $s2,$s2,$acc02
1129         xor     $s3,$s3,$acc03
1130         rotrwi  $acc08,$acc08,8
1131         rotrwi  $acc09,$acc09,8
1132         rotrwi  $acc10,$acc10,8
1133         rotrwi  $acc11,$acc11,8
1134         xor     $s0,$s0,$acc04          # ^= ROTATE(r8^r4^r0,16)
1135         xor     $s1,$s1,$acc05
1136         xor     $s2,$s2,$acc06
1137         xor     $s3,$s3,$acc07
1138         xor     $s0,$s0,$acc08          # ^= ROTATE(r8,8)       
1139         xor     $s1,$s1,$acc09  
1140         xor     $s2,$s2,$acc10  
1141         xor     $s3,$s3,$acc11  
1142
1143         xor     $s0,$s0,$t0
1144         xor     $s1,$s1,$t1
1145         xor     $s2,$s2,$t2
1146         xor     $s3,$s3,$t3
1147
1148         addi    $key,$key,16
1149         bdnz-   Ldec_compact_loop
1150
1151         rlwinm  $acc00,$s0,`32-24`,24,31
1152         rlwinm  $acc01,$s1,`32-24`,24,31
1153         rlwinm  $acc02,$s2,`32-24`,24,31
1154         rlwinm  $acc03,$s3,`32-24`,24,31
1155         lwz     $t0,0($key)
1156         lwz     $t1,4($key)
1157         lwz     $t2,8($key)
1158         lwz     $t3,12($key)
1159         rlwinm  $acc04,$s3,`32-16`,24,31
1160         rlwinm  $acc05,$s0,`32-16`,24,31
1161         rlwinm  $acc06,$s1,`32-16`,24,31
1162         rlwinm  $acc07,$s2,`32-16`,24,31
1163         lbzx    $acc00,$Tbl1,$acc00
1164         lbzx    $acc01,$Tbl1,$acc01
1165         lbzx    $acc02,$Tbl1,$acc02
1166         lbzx    $acc03,$Tbl1,$acc03
1167         rlwinm  $acc08,$s2,`32-8`,24,31
1168         rlwinm  $acc09,$s3,`32-8`,24,31
1169         rlwinm  $acc10,$s0,`32-8`,24,31
1170         rlwinm  $acc11,$s1,`32-8`,24,31
1171         lbzx    $acc04,$Tbl1,$acc04
1172         lbzx    $acc05,$Tbl1,$acc05
1173         lbzx    $acc06,$Tbl1,$acc06
1174         lbzx    $acc07,$Tbl1,$acc07
1175         rlwinm  $acc12,$s1,`0`,24,31
1176         rlwinm  $acc13,$s2,`0`,24,31
1177         rlwinm  $acc14,$s3,`0`,24,31
1178         rlwinm  $acc15,$s0,`0`,24,31
1179         lbzx    $acc08,$Tbl1,$acc08
1180         lbzx    $acc09,$Tbl1,$acc09
1181         lbzx    $acc10,$Tbl1,$acc10
1182         lbzx    $acc11,$Tbl1,$acc11
1183         rlwinm  $s0,$acc00,24,0,7
1184         rlwinm  $s1,$acc01,24,0,7
1185         rlwinm  $s2,$acc02,24,0,7
1186         rlwinm  $s3,$acc03,24,0,7
1187         lbzx    $acc12,$Tbl1,$acc12
1188         lbzx    $acc13,$Tbl1,$acc13
1189         lbzx    $acc14,$Tbl1,$acc14
1190         lbzx    $acc15,$Tbl1,$acc15
1191         rlwimi  $s0,$acc04,16,8,15
1192         rlwimi  $s1,$acc05,16,8,15
1193         rlwimi  $s2,$acc06,16,8,15
1194         rlwimi  $s3,$acc07,16,8,15
1195         rlwimi  $s0,$acc08,8,16,23
1196         rlwimi  $s1,$acc09,8,16,23
1197         rlwimi  $s2,$acc10,8,16,23
1198         rlwimi  $s3,$acc11,8,16,23
1199         or      $s0,$s0,$acc12
1200         or      $s1,$s1,$acc13
1201         or      $s2,$s2,$acc14
1202         or      $s3,$s3,$acc15
1203         xor     $s0,$s0,$t0
1204         xor     $s1,$s1,$t1
1205         xor     $s2,$s2,$t2
1206         xor     $s3,$s3,$t3
1207         blr
1208 .long   0
1209 .asciz  "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
1210 .align  7
1211 ___
1212
1213 $code =~ s/\`([^\`]*)\`/eval $1/gem;
1214 print $code;
1215 close STDOUT;