Unify ppc assembler make rules.
[openssl.git] / crypto / aes / asm / aes-ppc.pl
1 #!/usr/bin/env perl
2
3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9
10 # Needs more work: key setup, page boundaries, CBC routine...
11 #
12 # ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
13 # 128-bit key, which is ~40% better than 64-bit code generated by gcc
14 # 4.0. But these are not the ones currently used! Their "compact"
15 # counterparts are, for security reason. ppc_AES_encrypt_compact runs
16 # at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
17 # at 1/3 of ppc_AES_decrypt.
18
19 $flavour = shift;
20
21 if ($flavour =~ /64/) {
22         $SIZE_T =8;
23         $STU    ="stdu";
24         $POP    ="ld";
25         $PUSH   ="std";
26 } elsif ($flavour =~ /32/) {
27         $SIZE_T =4;
28         $STU    ="stwu";
29         $POP    ="lwz";
30         $PUSH   ="stw";
31 } else { die "nonsense $flavour"; }
32
33 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
34 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
35 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
36 die "can't locate ppc-xlate.pl";
37
38 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
39
40 $FRAME=32*$SIZE_T;
41
42 sub _data_word()
43 { my $i;
44     while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
45 }
46
47 $sp="r1";
48 $toc="r2";
49 $inp="r3";
50 $out="r4";
51 $key="r5";
52
53 $Tbl0="r3";
54 $Tbl1="r6";
55 $Tbl2="r7";
56 $Tbl3="r2";
57
58 $s0="r8";
59 $s1="r9";
60 $s2="r10";
61 $s3="r11";
62
63 $t0="r12";
64 $t1="r13";
65 $t2="r14";
66 $t3="r15";
67
68 $acc00="r16";
69 $acc01="r17";
70 $acc02="r18";
71 $acc03="r19";
72
73 $acc04="r20";
74 $acc05="r21";
75 $acc06="r22";
76 $acc07="r23";
77
78 $acc08="r24";
79 $acc09="r25";
80 $acc10="r26";
81 $acc11="r27";
82
83 $acc12="r28";
84 $acc13="r29";
85 $acc14="r30";
86 $acc15="r31";
87
88 # stay away from TLS pointer
89 if ($SIZE_T==8) { die if ($t1 ne "r13");  $t1="r0";             }
90 else            { die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0";  }
91 $mask80=$Tbl2;
92 $mask1b=$Tbl3;
93
94 $code.=<<___;
95 .text
96
97 .align  7
98 LAES_Te:
99         mflr    r0
100         bcl     20,31,\$+4
101         mflr    $Tbl0   ;    vvvvv "distance" between . and 1st data entry
102         addi    $Tbl0,$Tbl0,`128-8`
103         mtlr    r0
104         blr
105         .space  `32-24`
106 LAES_Td:
107         mflr    r0
108         bcl     20,31,\$+4
109         mflr    $Tbl0   ;    vvvvvvvv "distance" between . and 1st data entry
110         addi    $Tbl0,$Tbl0,`128-8-32+2048+256`
111         mtlr    r0
112         blr
113         .space  `128-32-24`
114 ___
115 &_data_word(
116         0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
117         0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
118         0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
119         0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
120         0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
121         0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
122         0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
123         0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
124         0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
125         0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
126         0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
127         0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
128         0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
129         0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
130         0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
131         0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
132         0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
133         0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
134         0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
135         0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
136         0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
137         0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
138         0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
139         0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
140         0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
141         0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
142         0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
143         0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
144         0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
145         0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
146         0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
147         0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
148         0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
149         0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
150         0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
151         0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
152         0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
153         0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
154         0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
155         0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
156         0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
157         0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
158         0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
159         0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
160         0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
161         0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
162         0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
163         0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
164         0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
165         0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
166         0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
167         0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
168         0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
169         0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
170         0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
171         0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
172         0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
173         0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
174         0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
175         0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
176         0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
177         0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
178         0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
179         0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
180 $code.=<<___;
181 .byte   0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
182 .byte   0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
183 .byte   0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
184 .byte   0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
185 .byte   0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
186 .byte   0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
187 .byte   0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
188 .byte   0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
189 .byte   0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
190 .byte   0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
191 .byte   0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
192 .byte   0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
193 .byte   0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
194 .byte   0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
195 .byte   0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
196 .byte   0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
197 .byte   0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
198 .byte   0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
199 .byte   0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
200 .byte   0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
201 .byte   0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
202 .byte   0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
203 .byte   0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
204 .byte   0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
205 .byte   0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
206 .byte   0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
207 .byte   0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
208 .byte   0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
209 .byte   0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
210 .byte   0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
211 .byte   0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
212 .byte   0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
213 ___
214 &_data_word(
215         0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
216         0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
217         0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
218         0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
219         0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
220         0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
221         0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
222         0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
223         0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
224         0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
225         0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
226         0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
227         0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
228         0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
229         0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
230         0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
231         0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
232         0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
233         0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
234         0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
235         0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
236         0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
237         0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
238         0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
239         0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
240         0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
241         0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
242         0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
243         0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
244         0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
245         0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
246         0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
247         0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
248         0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
249         0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
250         0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
251         0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
252         0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
253         0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
254         0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
255         0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
256         0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
257         0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
258         0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
259         0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
260         0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
261         0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
262         0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
263         0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
264         0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
265         0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
266         0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
267         0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
268         0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
269         0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
270         0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
271         0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
272         0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
273         0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
274         0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
275         0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
276         0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
277         0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
278         0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
279 $code.=<<___;
280 .byte   0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
281 .byte   0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
282 .byte   0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
283 .byte   0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
284 .byte   0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
285 .byte   0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
286 .byte   0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
287 .byte   0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
288 .byte   0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
289 .byte   0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
290 .byte   0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
291 .byte   0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
292 .byte   0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
293 .byte   0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
294 .byte   0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
295 .byte   0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
296 .byte   0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
297 .byte   0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
298 .byte   0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
299 .byte   0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
300 .byte   0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
301 .byte   0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
302 .byte   0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
303 .byte   0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
304 .byte   0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
305 .byte   0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
306 .byte   0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
307 .byte   0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
308 .byte   0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
309 .byte   0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
310 .byte   0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
311 .byte   0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
312
313
314 .globl  .AES_encrypt
315 .align  7
316 .AES_encrypt:
317         mflr    r0
318         $STU    $sp,-$FRAME($sp)
319
320         $PUSH   r0,`$FRAME-$SIZE_T*21`($sp)
321         $PUSH   $toc,`$FRAME-$SIZE_T*20`($sp)
322         $PUSH   r13,`$FRAME-$SIZE_T*19`($sp)
323         $PUSH   r14,`$FRAME-$SIZE_T*18`($sp)
324         $PUSH   r15,`$FRAME-$SIZE_T*17`($sp)
325         $PUSH   r16,`$FRAME-$SIZE_T*16`($sp)
326         $PUSH   r17,`$FRAME-$SIZE_T*15`($sp)
327         $PUSH   r18,`$FRAME-$SIZE_T*14`($sp)
328         $PUSH   r19,`$FRAME-$SIZE_T*13`($sp)
329         $PUSH   r20,`$FRAME-$SIZE_T*12`($sp)
330         $PUSH   r21,`$FRAME-$SIZE_T*11`($sp)
331         $PUSH   r22,`$FRAME-$SIZE_T*10`($sp)
332         $PUSH   r23,`$FRAME-$SIZE_T*9`($sp)
333         $PUSH   r24,`$FRAME-$SIZE_T*8`($sp)
334         $PUSH   r25,`$FRAME-$SIZE_T*7`($sp)
335         $PUSH   r26,`$FRAME-$SIZE_T*6`($sp)
336         $PUSH   r27,`$FRAME-$SIZE_T*5`($sp)
337         $PUSH   r28,`$FRAME-$SIZE_T*4`($sp)
338         $PUSH   r29,`$FRAME-$SIZE_T*3`($sp)
339         $PUSH   r30,`$FRAME-$SIZE_T*2`($sp)
340         $PUSH   r31,`$FRAME-$SIZE_T*1`($sp)
341
342         lwz     $s0,0($inp)
343         lwz     $s1,4($inp)
344         lwz     $s2,8($inp)
345         lwz     $s3,12($inp)
346         bl      LAES_Te
347         bl      Lppc_AES_encrypt_compact
348         stw     $s0,0($out)
349         stw     $s1,4($out)
350         stw     $s2,8($out)
351         stw     $s3,12($out)
352
353         $POP    r0,`$FRAME-$SIZE_T*21`($sp)
354         $POP    $toc,`$FRAME-$SIZE_T*20`($sp)
355         $POP    r13,`$FRAME-$SIZE_T*19`($sp)
356         $POP    r14,`$FRAME-$SIZE_T*18`($sp)
357         $POP    r15,`$FRAME-$SIZE_T*17`($sp)
358         $POP    r16,`$FRAME-$SIZE_T*16`($sp)
359         $POP    r17,`$FRAME-$SIZE_T*15`($sp)
360         $POP    r18,`$FRAME-$SIZE_T*14`($sp)
361         $POP    r19,`$FRAME-$SIZE_T*13`($sp)
362         $POP    r20,`$FRAME-$SIZE_T*12`($sp)
363         $POP    r21,`$FRAME-$SIZE_T*11`($sp)
364         $POP    r22,`$FRAME-$SIZE_T*10`($sp)
365         $POP    r23,`$FRAME-$SIZE_T*9`($sp)
366         $POP    r24,`$FRAME-$SIZE_T*8`($sp)
367         $POP    r25,`$FRAME-$SIZE_T*7`($sp)
368         $POP    r26,`$FRAME-$SIZE_T*6`($sp)
369         $POP    r27,`$FRAME-$SIZE_T*5`($sp)
370         $POP    r28,`$FRAME-$SIZE_T*4`($sp)
371         $POP    r29,`$FRAME-$SIZE_T*3`($sp)
372         $POP    r30,`$FRAME-$SIZE_T*2`($sp)
373         $POP    r31,`$FRAME-$SIZE_T*1`($sp)
374         mtlr    r0
375         addi    $sp,$sp,$FRAME
376         blr
377
378 .align  4
379 Lppc_AES_encrypt:
380         lwz     $acc00,240($key)
381         lwz     $t0,0($key)
382         lwz     $t1,4($key)
383         lwz     $t2,8($key)
384         lwz     $t3,12($key)
385         addi    $Tbl1,$Tbl0,3
386         addi    $Tbl2,$Tbl0,2
387         addi    $Tbl3,$Tbl0,1
388         addi    $acc00,$acc00,-1
389         addi    $key,$key,16
390         xor     $s0,$s0,$t0
391         xor     $s1,$s1,$t1
392         xor     $s2,$s2,$t2
393         xor     $s3,$s3,$t3
394         mtctr   $acc00
395 .align  4
396 Lenc_loop:
397         rlwinm  $acc00,$s0,`32-24+3`,21,28
398         rlwinm  $acc01,$s1,`32-24+3`,21,28
399         rlwinm  $acc02,$s2,`32-24+3`,21,28
400         rlwinm  $acc03,$s3,`32-24+3`,21,28
401         lwz     $t0,0($key)
402         lwz     $t1,4($key)
403         lwz     $t2,8($key)
404         lwz     $t3,12($key)
405         rlwinm  $acc04,$s1,`32-16+3`,21,28
406         rlwinm  $acc05,$s2,`32-16+3`,21,28
407         rlwinm  $acc06,$s3,`32-16+3`,21,28
408         rlwinm  $acc07,$s0,`32-16+3`,21,28
409         lwzx    $acc00,$Tbl0,$acc00
410         lwzx    $acc01,$Tbl0,$acc01
411         lwzx    $acc02,$Tbl0,$acc02
412         lwzx    $acc03,$Tbl0,$acc03
413         rlwinm  $acc08,$s2,`32-8+3`,21,28
414         rlwinm  $acc09,$s3,`32-8+3`,21,28
415         rlwinm  $acc10,$s0,`32-8+3`,21,28
416         rlwinm  $acc11,$s1,`32-8+3`,21,28
417         lwzx    $acc04,$Tbl1,$acc04
418         lwzx    $acc05,$Tbl1,$acc05
419         lwzx    $acc06,$Tbl1,$acc06
420         lwzx    $acc07,$Tbl1,$acc07
421         rlwinm  $acc12,$s3,`0+3`,21,28
422         rlwinm  $acc13,$s0,`0+3`,21,28
423         rlwinm  $acc14,$s1,`0+3`,21,28
424         rlwinm  $acc15,$s2,`0+3`,21,28
425         lwzx    $acc08,$Tbl2,$acc08
426         lwzx    $acc09,$Tbl2,$acc09
427         lwzx    $acc10,$Tbl2,$acc10
428         lwzx    $acc11,$Tbl2,$acc11
429         xor     $t0,$t0,$acc00
430         xor     $t1,$t1,$acc01
431         xor     $t2,$t2,$acc02
432         xor     $t3,$t3,$acc03
433         lwzx    $acc12,$Tbl3,$acc12
434         lwzx    $acc13,$Tbl3,$acc13
435         lwzx    $acc14,$Tbl3,$acc14
436         lwzx    $acc15,$Tbl3,$acc15
437         xor     $t0,$t0,$acc04
438         xor     $t1,$t1,$acc05
439         xor     $t2,$t2,$acc06
440         xor     $t3,$t3,$acc07
441         xor     $t0,$t0,$acc08
442         xor     $t1,$t1,$acc09
443         xor     $t2,$t2,$acc10
444         xor     $t3,$t3,$acc11
445         xor     $s0,$t0,$acc12
446         xor     $s1,$t1,$acc13
447         xor     $s2,$t2,$acc14
448         xor     $s3,$t3,$acc15
449         addi    $key,$key,16
450         bdnz-   Lenc_loop
451
452         addi    $Tbl2,$Tbl0,2048
453         nop
454         lwz     $acc08,`2048+0`($Tbl0)  ! prefetch Te4
455         lwz     $acc09,`2048+32`($Tbl0)
456         lwz     $acc10,`2048+64`($Tbl0)
457         lwz     $acc11,`2048+96`($Tbl0)
458         lwz     $acc08,`2048+128`($Tbl0)
459         lwz     $acc09,`2048+160`($Tbl0)
460         lwz     $acc10,`2048+192`($Tbl0)
461         lwz     $acc11,`2048+224`($Tbl0)
462         rlwinm  $acc00,$s0,`32-24`,24,31
463         rlwinm  $acc01,$s1,`32-24`,24,31
464         rlwinm  $acc02,$s2,`32-24`,24,31
465         rlwinm  $acc03,$s3,`32-24`,24,31
466         lwz     $t0,0($key)
467         lwz     $t1,4($key)
468         lwz     $t2,8($key)
469         lwz     $t3,12($key)
470         rlwinm  $acc04,$s1,`32-16`,24,31
471         rlwinm  $acc05,$s2,`32-16`,24,31
472         rlwinm  $acc06,$s3,`32-16`,24,31
473         rlwinm  $acc07,$s0,`32-16`,24,31
474         lbzx    $acc00,$Tbl2,$acc00
475         lbzx    $acc01,$Tbl2,$acc01
476         lbzx    $acc02,$Tbl2,$acc02
477         lbzx    $acc03,$Tbl2,$acc03
478         rlwinm  $acc08,$s2,`32-8`,24,31
479         rlwinm  $acc09,$s3,`32-8`,24,31
480         rlwinm  $acc10,$s0,`32-8`,24,31
481         rlwinm  $acc11,$s1,`32-8`,24,31
482         lbzx    $acc04,$Tbl2,$acc04
483         lbzx    $acc05,$Tbl2,$acc05
484         lbzx    $acc06,$Tbl2,$acc06
485         lbzx    $acc07,$Tbl2,$acc07
486         rlwinm  $acc12,$s3,`0`,24,31
487         rlwinm  $acc13,$s0,`0`,24,31
488         rlwinm  $acc14,$s1,`0`,24,31
489         rlwinm  $acc15,$s2,`0`,24,31
490         lbzx    $acc08,$Tbl2,$acc08
491         lbzx    $acc09,$Tbl2,$acc09
492         lbzx    $acc10,$Tbl2,$acc10
493         lbzx    $acc11,$Tbl2,$acc11
494         rlwinm  $s0,$acc00,24,0,7
495         rlwinm  $s1,$acc01,24,0,7
496         rlwinm  $s2,$acc02,24,0,7
497         rlwinm  $s3,$acc03,24,0,7
498         lbzx    $acc12,$Tbl2,$acc12
499         lbzx    $acc13,$Tbl2,$acc13
500         lbzx    $acc14,$Tbl2,$acc14
501         lbzx    $acc15,$Tbl2,$acc15
502         rlwimi  $s0,$acc04,16,8,15
503         rlwimi  $s1,$acc05,16,8,15
504         rlwimi  $s2,$acc06,16,8,15
505         rlwimi  $s3,$acc07,16,8,15
506         rlwimi  $s0,$acc08,8,16,23
507         rlwimi  $s1,$acc09,8,16,23
508         rlwimi  $s2,$acc10,8,16,23
509         rlwimi  $s3,$acc11,8,16,23
510         or      $s0,$s0,$acc12
511         or      $s1,$s1,$acc13
512         or      $s2,$s2,$acc14
513         or      $s3,$s3,$acc15
514         xor     $s0,$s0,$t0
515         xor     $s1,$s1,$t1
516         xor     $s2,$s2,$t2
517         xor     $s3,$s3,$t3
518         blr
519
520 .align  4
521 Lppc_AES_encrypt_compact:
522         lwz     $acc00,240($key)
523         lwz     $t0,0($key)
524         lwz     $t1,4($key)
525         lwz     $t2,8($key)
526         lwz     $t3,12($key)
527         addi    $Tbl1,$Tbl0,2048
528         lis     $mask80,0x8080
529         lis     $mask1b,0x1b1b
530         addi    $key,$key,16
531         ori     $mask80,$mask80,0x8080
532         ori     $mask1b,$mask1b,0x1b1b
533         mtctr   $acc00
534 .align  4
535 Lenc_compact_loop:
536         xor     $s0,$s0,$t0
537         xor     $s1,$s1,$t1
538         xor     $s2,$s2,$t2
539         xor     $s3,$s3,$t3
540         rlwinm  $acc00,$s0,`32-24`,24,31
541         rlwinm  $acc01,$s1,`32-24`,24,31
542         rlwinm  $acc02,$s2,`32-24`,24,31
543         rlwinm  $acc03,$s3,`32-24`,24,31
544         lwz     $t0,0($key)
545         lwz     $t1,4($key)
546         lwz     $t2,8($key)
547         lwz     $t3,12($key)
548         rlwinm  $acc04,$s1,`32-16`,24,31
549         rlwinm  $acc05,$s2,`32-16`,24,31
550         rlwinm  $acc06,$s3,`32-16`,24,31
551         rlwinm  $acc07,$s0,`32-16`,24,31
552         lbzx    $acc00,$Tbl1,$acc00
553         lbzx    $acc01,$Tbl1,$acc01
554         lbzx    $acc02,$Tbl1,$acc02
555         lbzx    $acc03,$Tbl1,$acc03
556         rlwinm  $acc08,$s2,`32-8`,24,31
557         rlwinm  $acc09,$s3,`32-8`,24,31
558         rlwinm  $acc10,$s0,`32-8`,24,31
559         rlwinm  $acc11,$s1,`32-8`,24,31
560         lbzx    $acc04,$Tbl1,$acc04
561         lbzx    $acc05,$Tbl1,$acc05
562         lbzx    $acc06,$Tbl1,$acc06
563         lbzx    $acc07,$Tbl1,$acc07
564         rlwinm  $acc12,$s3,`0`,24,31
565         rlwinm  $acc13,$s0,`0`,24,31
566         rlwinm  $acc14,$s1,`0`,24,31
567         rlwinm  $acc15,$s2,`0`,24,31
568         lbzx    $acc08,$Tbl1,$acc08
569         lbzx    $acc09,$Tbl1,$acc09
570         lbzx    $acc10,$Tbl1,$acc10
571         lbzx    $acc11,$Tbl1,$acc11
572         rlwinm  $s0,$acc00,24,0,7
573         rlwinm  $s1,$acc01,24,0,7
574         rlwinm  $s2,$acc02,24,0,7
575         rlwinm  $s3,$acc03,24,0,7
576         lbzx    $acc12,$Tbl1,$acc12
577         lbzx    $acc13,$Tbl1,$acc13
578         lbzx    $acc14,$Tbl1,$acc14
579         lbzx    $acc15,$Tbl1,$acc15
580         rlwimi  $s0,$acc04,16,8,15
581         rlwimi  $s1,$acc05,16,8,15
582         rlwimi  $s2,$acc06,16,8,15
583         rlwimi  $s3,$acc07,16,8,15
584         rlwimi  $s0,$acc08,8,16,23
585         rlwimi  $s1,$acc09,8,16,23
586         rlwimi  $s2,$acc10,8,16,23
587         rlwimi  $s3,$acc11,8,16,23
588         or      $s0,$s0,$acc12
589         or      $s1,$s1,$acc13
590         or      $s2,$s2,$acc14
591         or      $s3,$s3,$acc15
592
593         addi    $key,$key,16
594         bdz     Lenc_compact_done
595
596         and     $acc00,$s0,$mask80      # r1=r0&0x80808080
597         and     $acc01,$s1,$mask80
598         and     $acc02,$s2,$mask80
599         and     $acc03,$s3,$mask80
600         srwi    $acc04,$acc00,7         # r1>>7
601         srwi    $acc05,$acc01,7
602         srwi    $acc06,$acc02,7
603         srwi    $acc07,$acc03,7
604         andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
605         andc    $acc09,$s1,$mask80
606         andc    $acc10,$s2,$mask80
607         andc    $acc11,$s3,$mask80
608         sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
609         sub     $acc01,$acc01,$acc05
610         sub     $acc02,$acc02,$acc06
611         sub     $acc03,$acc03,$acc07
612         add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
613         add     $acc09,$acc09,$acc09
614         add     $acc10,$acc10,$acc10
615         add     $acc11,$acc11,$acc11
616         and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
617         and     $acc01,$acc01,$mask1b
618         and     $acc02,$acc02,$mask1b
619         and     $acc03,$acc03,$mask1b
620         xor     $acc00,$acc00,$acc08    # r2
621         xor     $acc01,$acc01,$acc09
622         xor     $acc02,$acc02,$acc10
623         xor     $acc03,$acc03,$acc11
624
625         rotlwi  $acc12,$s0,16           # ROTATE(r0,16)
626         rotlwi  $acc13,$s1,16
627         rotlwi  $acc14,$s2,16
628         rotlwi  $acc15,$s3,16
629         xor     $s0,$s0,$acc00          # r0^r2
630         xor     $s1,$s1,$acc01
631         xor     $s2,$s2,$acc02
632         xor     $s3,$s3,$acc03
633         rotrwi  $s0,$s0,24              # ROTATE(r2^r0,24)
634         rotrwi  $s1,$s1,24
635         rotrwi  $s2,$s2,24
636         rotrwi  $s3,$s3,24
637         xor     $s0,$s0,$acc00          # ROTATE(r2^r0,24)^r2
638         xor     $s1,$s1,$acc01
639         xor     $s2,$s2,$acc02
640         xor     $s3,$s3,$acc03
641         rotlwi  $acc08,$acc12,8         # ROTATE(r0,24)
642         rotlwi  $acc09,$acc13,8
643         rotlwi  $acc10,$acc14,8
644         rotlwi  $acc11,$acc15,8
645         xor     $s0,$s0,$acc12          #
646         xor     $s1,$s1,$acc13
647         xor     $s2,$s2,$acc14
648         xor     $s3,$s3,$acc15
649         xor     $s0,$s0,$acc08          #
650         xor     $s1,$s1,$acc09
651         xor     $s2,$s2,$acc10
652         xor     $s3,$s3,$acc11
653
654         b       Lenc_compact_loop
655 .align  4
656 Lenc_compact_done:
657         xor     $s0,$s0,$t0
658         xor     $s1,$s1,$t1
659         xor     $s2,$s2,$t2
660         xor     $s3,$s3,$t3
661         blr
662
663 .globl  .AES_decrypt
664 .align  7
665 .AES_decrypt:
666         mflr    r0
667         $STU    $sp,-$FRAME($sp)
668
669         $PUSH   r0,`$FRAME-$SIZE_T*21`($sp)
670         $PUSH   $toc,`$FRAME-$SIZE_T*20`($sp)
671         $PUSH   r13,`$FRAME-$SIZE_T*19`($sp)
672         $PUSH   r14,`$FRAME-$SIZE_T*18`($sp)
673         $PUSH   r15,`$FRAME-$SIZE_T*17`($sp)
674         $PUSH   r16,`$FRAME-$SIZE_T*16`($sp)
675         $PUSH   r17,`$FRAME-$SIZE_T*15`($sp)
676         $PUSH   r18,`$FRAME-$SIZE_T*14`($sp)
677         $PUSH   r19,`$FRAME-$SIZE_T*13`($sp)
678         $PUSH   r20,`$FRAME-$SIZE_T*12`($sp)
679         $PUSH   r21,`$FRAME-$SIZE_T*11`($sp)
680         $PUSH   r22,`$FRAME-$SIZE_T*10`($sp)
681         $PUSH   r23,`$FRAME-$SIZE_T*9`($sp)
682         $PUSH   r24,`$FRAME-$SIZE_T*8`($sp)
683         $PUSH   r25,`$FRAME-$SIZE_T*7`($sp)
684         $PUSH   r26,`$FRAME-$SIZE_T*6`($sp)
685         $PUSH   r27,`$FRAME-$SIZE_T*5`($sp)
686         $PUSH   r28,`$FRAME-$SIZE_T*4`($sp)
687         $PUSH   r29,`$FRAME-$SIZE_T*3`($sp)
688         $PUSH   r30,`$FRAME-$SIZE_T*2`($sp)
689         $PUSH   r31,`$FRAME-$SIZE_T*1`($sp)
690
691         lwz     $s0,0($inp)
692         lwz     $s1,4($inp)
693         lwz     $s2,8($inp)
694         lwz     $s3,12($inp)
695         bl      LAES_Td
696         bl      Lppc_AES_decrypt_compact
697         stw     $s0,0($out)
698         stw     $s1,4($out)
699         stw     $s2,8($out)
700         stw     $s3,12($out)
701
702         $POP    r0,`$FRAME-$SIZE_T*21`($sp)
703         $POP    $toc,`$FRAME-$SIZE_T*20`($sp)
704         $POP    r13,`$FRAME-$SIZE_T*19`($sp)
705         $POP    r14,`$FRAME-$SIZE_T*18`($sp)
706         $POP    r15,`$FRAME-$SIZE_T*17`($sp)
707         $POP    r16,`$FRAME-$SIZE_T*16`($sp)
708         $POP    r17,`$FRAME-$SIZE_T*15`($sp)
709         $POP    r18,`$FRAME-$SIZE_T*14`($sp)
710         $POP    r19,`$FRAME-$SIZE_T*13`($sp)
711         $POP    r20,`$FRAME-$SIZE_T*12`($sp)
712         $POP    r21,`$FRAME-$SIZE_T*11`($sp)
713         $POP    r22,`$FRAME-$SIZE_T*10`($sp)
714         $POP    r23,`$FRAME-$SIZE_T*9`($sp)
715         $POP    r24,`$FRAME-$SIZE_T*8`($sp)
716         $POP    r25,`$FRAME-$SIZE_T*7`($sp)
717         $POP    r26,`$FRAME-$SIZE_T*6`($sp)
718         $POP    r27,`$FRAME-$SIZE_T*5`($sp)
719         $POP    r28,`$FRAME-$SIZE_T*4`($sp)
720         $POP    r29,`$FRAME-$SIZE_T*3`($sp)
721         $POP    r30,`$FRAME-$SIZE_T*2`($sp)
722         $POP    r31,`$FRAME-$SIZE_T*1`($sp)
723         mtlr    r0
724         addi    $sp,$sp,$FRAME
725         blr
726
727 .align  4
728 Lppc_AES_decrypt:
729         lwz     $acc00,240($key)
730         lwz     $t0,0($key)
731         lwz     $t1,4($key)
732         lwz     $t2,8($key)
733         lwz     $t3,12($key)
734         addi    $Tbl1,$Tbl0,3
735         addi    $Tbl2,$Tbl0,2
736         addi    $Tbl3,$Tbl0,1
737         addi    $acc00,$acc00,-1
738         addi    $key,$key,16
739         xor     $s0,$s0,$t0
740         xor     $s1,$s1,$t1
741         xor     $s2,$s2,$t2
742         xor     $s3,$s3,$t3
743         mtctr   $acc00
744 .align  4
745 Ldec_loop:
746         rlwinm  $acc00,$s0,`32-24+3`,21,28
747         rlwinm  $acc01,$s1,`32-24+3`,21,28
748         rlwinm  $acc02,$s2,`32-24+3`,21,28
749         rlwinm  $acc03,$s3,`32-24+3`,21,28
750         lwz     $t0,0($key)
751         lwz     $t1,4($key)
752         lwz     $t2,8($key)
753         lwz     $t3,12($key)
754         rlwinm  $acc04,$s3,`32-16+3`,21,28
755         rlwinm  $acc05,$s0,`32-16+3`,21,28
756         rlwinm  $acc06,$s1,`32-16+3`,21,28
757         rlwinm  $acc07,$s2,`32-16+3`,21,28
758         lwzx    $acc00,$Tbl0,$acc00
759         lwzx    $acc01,$Tbl0,$acc01
760         lwzx    $acc02,$Tbl0,$acc02
761         lwzx    $acc03,$Tbl0,$acc03
762         rlwinm  $acc08,$s2,`32-8+3`,21,28
763         rlwinm  $acc09,$s3,`32-8+3`,21,28
764         rlwinm  $acc10,$s0,`32-8+3`,21,28
765         rlwinm  $acc11,$s1,`32-8+3`,21,28
766         lwzx    $acc04,$Tbl1,$acc04
767         lwzx    $acc05,$Tbl1,$acc05
768         lwzx    $acc06,$Tbl1,$acc06
769         lwzx    $acc07,$Tbl1,$acc07
770         rlwinm  $acc12,$s1,`0+3`,21,28
771         rlwinm  $acc13,$s2,`0+3`,21,28
772         rlwinm  $acc14,$s3,`0+3`,21,28
773         rlwinm  $acc15,$s0,`0+3`,21,28
774         lwzx    $acc08,$Tbl2,$acc08
775         lwzx    $acc09,$Tbl2,$acc09
776         lwzx    $acc10,$Tbl2,$acc10
777         lwzx    $acc11,$Tbl2,$acc11
778         xor     $t0,$t0,$acc00
779         xor     $t1,$t1,$acc01
780         xor     $t2,$t2,$acc02
781         xor     $t3,$t3,$acc03
782         lwzx    $acc12,$Tbl3,$acc12
783         lwzx    $acc13,$Tbl3,$acc13
784         lwzx    $acc14,$Tbl3,$acc14
785         lwzx    $acc15,$Tbl3,$acc15
786         xor     $t0,$t0,$acc04
787         xor     $t1,$t1,$acc05
788         xor     $t2,$t2,$acc06
789         xor     $t3,$t3,$acc07
790         xor     $t0,$t0,$acc08
791         xor     $t1,$t1,$acc09
792         xor     $t2,$t2,$acc10
793         xor     $t3,$t3,$acc11
794         xor     $s0,$t0,$acc12
795         xor     $s1,$t1,$acc13
796         xor     $s2,$t2,$acc14
797         xor     $s3,$t3,$acc15
798         addi    $key,$key,16
799         bdnz-   Ldec_loop
800
801         addi    $Tbl2,$Tbl0,2048
802         nop
803         lwz     $acc08,`2048+0`($Tbl0)  ! prefetch Td4
804         lwz     $acc09,`2048+32`($Tbl0)
805         lwz     $acc10,`2048+64`($Tbl0)
806         lwz     $acc11,`2048+96`($Tbl0)
807         lwz     $acc08,`2048+128`($Tbl0)
808         lwz     $acc09,`2048+160`($Tbl0)
809         lwz     $acc10,`2048+192`($Tbl0)
810         lwz     $acc11,`2048+224`($Tbl0)
811         rlwinm  $acc00,$s0,`32-24`,24,31
812         rlwinm  $acc01,$s1,`32-24`,24,31
813         rlwinm  $acc02,$s2,`32-24`,24,31
814         rlwinm  $acc03,$s3,`32-24`,24,31
815         lwz     $t0,0($key)
816         lwz     $t1,4($key)
817         lwz     $t2,8($key)
818         lwz     $t3,12($key)
819         rlwinm  $acc04,$s3,`32-16`,24,31
820         rlwinm  $acc05,$s0,`32-16`,24,31
821         rlwinm  $acc06,$s1,`32-16`,24,31
822         rlwinm  $acc07,$s2,`32-16`,24,31
823         lbzx    $acc00,$Tbl2,$acc00
824         lbzx    $acc01,$Tbl2,$acc01
825         lbzx    $acc02,$Tbl2,$acc02
826         lbzx    $acc03,$Tbl2,$acc03
827         rlwinm  $acc08,$s2,`32-8`,24,31
828         rlwinm  $acc09,$s3,`32-8`,24,31
829         rlwinm  $acc10,$s0,`32-8`,24,31
830         rlwinm  $acc11,$s1,`32-8`,24,31
831         lbzx    $acc04,$Tbl2,$acc04
832         lbzx    $acc05,$Tbl2,$acc05
833         lbzx    $acc06,$Tbl2,$acc06
834         lbzx    $acc07,$Tbl2,$acc07
835         rlwinm  $acc12,$s1,`0`,24,31
836         rlwinm  $acc13,$s2,`0`,24,31
837         rlwinm  $acc14,$s3,`0`,24,31
838         rlwinm  $acc15,$s0,`0`,24,31
839         lbzx    $acc08,$Tbl2,$acc08
840         lbzx    $acc09,$Tbl2,$acc09
841         lbzx    $acc10,$Tbl2,$acc10
842         lbzx    $acc11,$Tbl2,$acc11
843         rlwinm  $s0,$acc00,24,0,7
844         rlwinm  $s1,$acc01,24,0,7
845         rlwinm  $s2,$acc02,24,0,7
846         rlwinm  $s3,$acc03,24,0,7
847         lbzx    $acc12,$Tbl2,$acc12
848         lbzx    $acc13,$Tbl2,$acc13
849         lbzx    $acc14,$Tbl2,$acc14
850         lbzx    $acc15,$Tbl2,$acc15
851         rlwimi  $s0,$acc04,16,8,15
852         rlwimi  $s1,$acc05,16,8,15
853         rlwimi  $s2,$acc06,16,8,15
854         rlwimi  $s3,$acc07,16,8,15
855         rlwimi  $s0,$acc08,8,16,23
856         rlwimi  $s1,$acc09,8,16,23
857         rlwimi  $s2,$acc10,8,16,23
858         rlwimi  $s3,$acc11,8,16,23
859         or      $s0,$s0,$acc12
860         or      $s1,$s1,$acc13
861         or      $s2,$s2,$acc14
862         or      $s3,$s3,$acc15
863         xor     $s0,$s0,$t0
864         xor     $s1,$s1,$t1
865         xor     $s2,$s2,$t2
866         xor     $s3,$s3,$t3
867         blr
868
869 .align  4
870 Lppc_AES_decrypt_compact:
871         lwz     $acc00,240($key)
872         lwz     $t0,0($key)
873         lwz     $t1,4($key)
874         lwz     $t2,8($key)
875         lwz     $t3,12($key)
876         addi    $Tbl1,$Tbl0,2048
877         lis     $mask80,0x8080
878         lis     $mask1b,0x1b1b
879         addi    $key,$key,16
880         ori     $mask80,$mask80,0x8080
881         ori     $mask1b,$mask1b,0x1b1b
882 ___
883 $code.=<<___ if ($SIZE_T==8);
884         insrdi  $mask80,$mask80,32,0
885         insrdi  $mask1b,$mask1b,32,0
886 ___
887 $code.=<<___;
888         mtctr   $acc00
889 .align  4
890 Ldec_compact_loop:
891         xor     $s0,$s0,$t0
892         xor     $s1,$s1,$t1
893         xor     $s2,$s2,$t2
894         xor     $s3,$s3,$t3
895         rlwinm  $acc00,$s0,`32-24`,24,31
896         rlwinm  $acc01,$s1,`32-24`,24,31
897         rlwinm  $acc02,$s2,`32-24`,24,31
898         rlwinm  $acc03,$s3,`32-24`,24,31
899         lwz     $t0,0($key)
900         lwz     $t1,4($key)
901         lwz     $t2,8($key)
902         lwz     $t3,12($key)
903         rlwinm  $acc04,$s3,`32-16`,24,31
904         rlwinm  $acc05,$s0,`32-16`,24,31
905         rlwinm  $acc06,$s1,`32-16`,24,31
906         rlwinm  $acc07,$s2,`32-16`,24,31
907         lbzx    $acc00,$Tbl1,$acc00
908         lbzx    $acc01,$Tbl1,$acc01
909         lbzx    $acc02,$Tbl1,$acc02
910         lbzx    $acc03,$Tbl1,$acc03
911         rlwinm  $acc08,$s2,`32-8`,24,31
912         rlwinm  $acc09,$s3,`32-8`,24,31
913         rlwinm  $acc10,$s0,`32-8`,24,31
914         rlwinm  $acc11,$s1,`32-8`,24,31
915         lbzx    $acc04,$Tbl1,$acc04
916         lbzx    $acc05,$Tbl1,$acc05
917         lbzx    $acc06,$Tbl1,$acc06
918         lbzx    $acc07,$Tbl1,$acc07
919         rlwinm  $acc12,$s1,`0`,24,31
920         rlwinm  $acc13,$s2,`0`,24,31
921         rlwinm  $acc14,$s3,`0`,24,31
922         rlwinm  $acc15,$s0,`0`,24,31
923         lbzx    $acc08,$Tbl1,$acc08
924         lbzx    $acc09,$Tbl1,$acc09
925         lbzx    $acc10,$Tbl1,$acc10
926         lbzx    $acc11,$Tbl1,$acc11
927         rlwinm  $s0,$acc00,24,0,7
928         rlwinm  $s1,$acc01,24,0,7
929         rlwinm  $s2,$acc02,24,0,7
930         rlwinm  $s3,$acc03,24,0,7
931         lbzx    $acc12,$Tbl1,$acc12
932         lbzx    $acc13,$Tbl1,$acc13
933         lbzx    $acc14,$Tbl1,$acc14
934         lbzx    $acc15,$Tbl1,$acc15
935         rlwimi  $s0,$acc04,16,8,15
936         rlwimi  $s1,$acc05,16,8,15
937         rlwimi  $s2,$acc06,16,8,15
938         rlwimi  $s3,$acc07,16,8,15
939         rlwimi  $s0,$acc08,8,16,23
940         rlwimi  $s1,$acc09,8,16,23
941         rlwimi  $s2,$acc10,8,16,23
942         rlwimi  $s3,$acc11,8,16,23
943         or      $s0,$s0,$acc12
944         or      $s1,$s1,$acc13
945         or      $s2,$s2,$acc14
946         or      $s3,$s3,$acc15
947
948         addi    $key,$key,16
949         bdz     Ldec_compact_done
950 ___
951 $code.=<<___ if ($SIZE_T==8);
952         # vectorized permutation improves decrypt performance by 10%
953         insrdi  $s0,$s1,32,0
954         insrdi  $s2,$s3,32,0
955
956         and     $acc00,$s0,$mask80      # r1=r0&0x80808080
957         and     $acc02,$s2,$mask80
958         srdi    $acc04,$acc00,7         # r1>>7
959         srdi    $acc06,$acc02,7
960         andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
961         andc    $acc10,$s2,$mask80
962         sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
963         sub     $acc02,$acc02,$acc06
964         add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
965         add     $acc10,$acc10,$acc10
966         and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
967         and     $acc02,$acc02,$mask1b
968         xor     $acc00,$acc00,$acc08    # r2
969         xor     $acc02,$acc02,$acc10
970
971         and     $acc04,$acc00,$mask80   # r1=r2&0x80808080
972         and     $acc06,$acc02,$mask80
973         srdi    $acc08,$acc04,7         # r1>>7
974         srdi    $acc10,$acc06,7
975         andc    $acc12,$acc00,$mask80   # r2&0x7f7f7f7f
976         andc    $acc14,$acc02,$mask80
977         sub     $acc04,$acc04,$acc08    # r1-(r1>>7)
978         sub     $acc06,$acc06,$acc10
979         add     $acc12,$acc12,$acc12    # (r2&0x7f7f7f7f)<<1
980         add     $acc14,$acc14,$acc14
981         and     $acc04,$acc04,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
982         and     $acc06,$acc06,$mask1b
983         xor     $acc04,$acc04,$acc12    # r4
984         xor     $acc06,$acc06,$acc14
985
986         and     $acc08,$acc04,$mask80   # r1=r4&0x80808080
987         and     $acc10,$acc06,$mask80
988         srdi    $acc12,$acc08,7         # r1>>7
989         srdi    $acc14,$acc10,7
990         sub     $acc08,$acc08,$acc12    # r1-(r1>>7)
991         sub     $acc10,$acc10,$acc14
992         andc    $acc12,$acc04,$mask80   # r4&0x7f7f7f7f
993         andc    $acc14,$acc06,$mask80
994         add     $acc12,$acc12,$acc12    # (r4&0x7f7f7f7f)<<1
995         add     $acc14,$acc14,$acc14
996         and     $acc08,$acc08,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
997         and     $acc10,$acc10,$mask1b
998         xor     $acc08,$acc08,$acc12    # r8
999         xor     $acc10,$acc10,$acc14
1000
1001         xor     $acc00,$acc00,$s0       # r2^r0
1002         xor     $acc02,$acc02,$s2
1003         xor     $acc04,$acc04,$s0       # r4^r0
1004         xor     $acc06,$acc06,$s2
1005
1006         extrdi  $acc01,$acc00,0,32
1007         extrdi  $acc03,$acc02,0,32
1008         extrdi  $acc05,$acc04,0,32
1009         extrdi  $acc07,$acc06,0,32
1010         extrdi  $acc09,$acc08,0,32
1011         extrdi  $acc11,$acc10,0,32
1012 ___
1013 $code.=<<___ if ($SIZE_T==4);
1014         and     $acc00,$s0,$mask80      # r1=r0&0x80808080
1015         and     $acc01,$s1,$mask80
1016         and     $acc02,$s2,$mask80
1017         and     $acc03,$s3,$mask80
1018         srwi    $acc04,$acc00,7         # r1>>7
1019         srwi    $acc05,$acc01,7
1020         srwi    $acc06,$acc02,7
1021         srwi    $acc07,$acc03,7
1022         andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
1023         andc    $acc09,$s1,$mask80
1024         andc    $acc10,$s2,$mask80
1025         andc    $acc11,$s3,$mask80
1026         sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
1027         sub     $acc01,$acc01,$acc05
1028         sub     $acc02,$acc02,$acc06
1029         sub     $acc03,$acc03,$acc07
1030         add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
1031         add     $acc09,$acc09,$acc09
1032         add     $acc10,$acc10,$acc10
1033         add     $acc11,$acc11,$acc11
1034         and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1035         and     $acc01,$acc01,$mask1b
1036         and     $acc02,$acc02,$mask1b
1037         and     $acc03,$acc03,$mask1b
1038         xor     $acc00,$acc00,$acc08    # r2
1039         xor     $acc01,$acc01,$acc09
1040         xor     $acc02,$acc02,$acc10
1041         xor     $acc03,$acc03,$acc11
1042
1043         and     $acc04,$acc00,$mask80   # r1=r2&0x80808080
1044         and     $acc05,$acc01,$mask80
1045         and     $acc06,$acc02,$mask80
1046         and     $acc07,$acc03,$mask80
1047         srwi    $acc08,$acc04,7         # r1>>7
1048         srwi    $acc09,$acc05,7
1049         srwi    $acc10,$acc06,7
1050         srwi    $acc11,$acc07,7
1051         andc    $acc12,$acc00,$mask80   # r2&0x7f7f7f7f
1052         andc    $acc13,$acc01,$mask80
1053         andc    $acc14,$acc02,$mask80
1054         andc    $acc15,$acc03,$mask80
1055         sub     $acc04,$acc04,$acc08    # r1-(r1>>7)
1056         sub     $acc05,$acc05,$acc09
1057         sub     $acc06,$acc06,$acc10
1058         sub     $acc07,$acc07,$acc11
1059         add     $acc12,$acc12,$acc12    # (r2&0x7f7f7f7f)<<1
1060         add     $acc13,$acc13,$acc13
1061         add     $acc14,$acc14,$acc14
1062         add     $acc15,$acc15,$acc15
1063         and     $acc04,$acc04,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1064         and     $acc05,$acc05,$mask1b
1065         and     $acc06,$acc06,$mask1b
1066         and     $acc07,$acc07,$mask1b
1067         xor     $acc04,$acc04,$acc12    # r4
1068         xor     $acc05,$acc05,$acc13
1069         xor     $acc06,$acc06,$acc14
1070         xor     $acc07,$acc07,$acc15
1071
1072         and     $acc08,$acc04,$mask80   # r1=r4&0x80808080
1073         and     $acc09,$acc05,$mask80
1074         and     $acc10,$acc06,$mask80
1075         and     $acc11,$acc07,$mask80
1076         srwi    $acc12,$acc08,7         # r1>>7
1077         srwi    $acc13,$acc09,7
1078         srwi    $acc14,$acc10,7
1079         srwi    $acc15,$acc11,7
1080         sub     $acc08,$acc08,$acc12    # r1-(r1>>7)
1081         sub     $acc09,$acc09,$acc13
1082         sub     $acc10,$acc10,$acc14
1083         sub     $acc11,$acc11,$acc15
1084         andc    $acc12,$acc04,$mask80   # r4&0x7f7f7f7f
1085         andc    $acc13,$acc05,$mask80
1086         andc    $acc14,$acc06,$mask80
1087         andc    $acc15,$acc07,$mask80
1088         add     $acc12,$acc12,$acc12    # (r4&0x7f7f7f7f)<<1
1089         add     $acc13,$acc13,$acc13
1090         add     $acc14,$acc14,$acc14
1091         add     $acc15,$acc15,$acc15
1092         and     $acc08,$acc08,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1093         and     $acc09,$acc09,$mask1b
1094         and     $acc10,$acc10,$mask1b
1095         and     $acc11,$acc11,$mask1b
1096         xor     $acc08,$acc08,$acc12    # r8
1097         xor     $acc09,$acc09,$acc13
1098         xor     $acc10,$acc10,$acc14
1099         xor     $acc11,$acc11,$acc15
1100
1101         xor     $acc00,$acc00,$s0       # r2^r0
1102         xor     $acc01,$acc01,$s1
1103         xor     $acc02,$acc02,$s2
1104         xor     $acc03,$acc03,$s3
1105         xor     $acc04,$acc04,$s0       # r4^r0
1106         xor     $acc05,$acc05,$s1
1107         xor     $acc06,$acc06,$s2
1108         xor     $acc07,$acc07,$s3
1109 ___
1110 $code.=<<___;
1111         rotrwi  $s0,$s0,8               # = ROTATE(r0,8)
1112         rotrwi  $s1,$s1,8
1113         rotrwi  $s2,$s2,8
1114         rotrwi  $s3,$s3,8
1115         xor     $s0,$s0,$acc00          # ^= r2^r0
1116         xor     $s1,$s1,$acc01
1117         xor     $s2,$s2,$acc02
1118         xor     $s3,$s3,$acc03
1119         xor     $acc00,$acc00,$acc08
1120         xor     $acc01,$acc01,$acc09
1121         xor     $acc02,$acc02,$acc10
1122         xor     $acc03,$acc03,$acc11
1123         xor     $s0,$s0,$acc04          # ^= r4^r0
1124         xor     $s1,$s1,$acc05
1125         xor     $s2,$s2,$acc06
1126         xor     $s3,$s3,$acc07
1127         rotrwi  $acc00,$acc00,24
1128         rotrwi  $acc01,$acc01,24
1129         rotrwi  $acc02,$acc02,24
1130         rotrwi  $acc03,$acc03,24
1131         xor     $acc04,$acc04,$acc08
1132         xor     $acc05,$acc05,$acc09
1133         xor     $acc06,$acc06,$acc10
1134         xor     $acc07,$acc07,$acc11
1135         xor     $s0,$s0,$acc08          # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
1136         xor     $s1,$s1,$acc09
1137         xor     $s2,$s2,$acc10
1138         xor     $s3,$s3,$acc11
1139         rotrwi  $acc04,$acc04,16
1140         rotrwi  $acc05,$acc05,16
1141         rotrwi  $acc06,$acc06,16
1142         rotrwi  $acc07,$acc07,16
1143         xor     $s0,$s0,$acc00          # ^= ROTATE(r8^r2^r0,24)
1144         xor     $s1,$s1,$acc01
1145         xor     $s2,$s2,$acc02
1146         xor     $s3,$s3,$acc03
1147         rotrwi  $acc08,$acc08,8
1148         rotrwi  $acc09,$acc09,8
1149         rotrwi  $acc10,$acc10,8
1150         rotrwi  $acc11,$acc11,8
1151         xor     $s0,$s0,$acc04          # ^= ROTATE(r8^r4^r0,16)
1152         xor     $s1,$s1,$acc05
1153         xor     $s2,$s2,$acc06
1154         xor     $s3,$s3,$acc07
1155         xor     $s0,$s0,$acc08          # ^= ROTATE(r8,8)       
1156         xor     $s1,$s1,$acc09  
1157         xor     $s2,$s2,$acc10  
1158         xor     $s3,$s3,$acc11  
1159
1160         b       Ldec_compact_loop
1161 .align  4
1162 Ldec_compact_done:
1163         xor     $s0,$s0,$t0
1164         xor     $s1,$s1,$t1
1165         xor     $s2,$s2,$t2
1166         xor     $s3,$s3,$t3
1167         blr
1168 .long   0
1169 .asciz  "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
1170 .align  7
1171 ___
1172
1173 $code =~ s/\`([^\`]*)\`/eval $1/gem;
1174 print $code;
1175 close STDOUT;