aes-ppc.pl: minor optimization favoring embedded processors (performance
[openssl.git] / crypto / aes / asm / aes-ppc.pl
1 #!/usr/bin/env perl
2
3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9
10 # Needs more work: key setup, CBC routine...
11 #
12 # ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
13 # 128-bit key, which is ~40% better than 64-bit code generated by gcc
14 # 4.0. But these are not the ones currently used! Their "compact"
15 # counterparts are, for security reason. ppc_AES_encrypt_compact runs
16 # at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
17 # at 1/3 of ppc_AES_decrypt.
18
19 # February 2010
20 #
21 # Rescheduling instructions to favour Power6 pipeline gave 10%
22 # performance improvement on the platfrom in question (and marginal
23 # improvement even on others). It should be noted that Power6 fails
24 # to process byte in 18 cycles, only in 23, because it fails to issue
25 # 4 load instructions in two cycles, only in 3. As result non-compact
26 # block subroutines are 25% slower than one would expect. Compact
27 # functions scale better, because they have pure computational part,
28 # which scales perfectly with clock frequency. To be specific
29 # ppc_AES_encrypt_compact operates at 42 cycles per byte, while
30 # ppc_AES_decrypt_compact - at 55 (in 64-bit build).
31
32 $flavour = shift;
33
34 if ($flavour =~ /64/) {
35         $SIZE_T =8;
36         $LRSAVE =2*$SIZE_T;
37         $STU    ="stdu";
38         $POP    ="ld";
39         $PUSH   ="std";
40 } elsif ($flavour =~ /32/) {
41         $SIZE_T =4;
42         $LRSAVE =$SIZE_T;
43         $STU    ="stwu";
44         $POP    ="lwz";
45         $PUSH   ="stw";
46 } else { die "nonsense $flavour"; }
47
48 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
49 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
50 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
51 die "can't locate ppc-xlate.pl";
52
53 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
54
55 $FRAME=32*$SIZE_T;
56
57 sub _data_word()
58 { my $i;
59     while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
60 }
61
62 $sp="r1";
63 $toc="r2";
64 $inp="r3";
65 $out="r4";
66 $key="r5";
67
68 $Tbl0="r3";
69 $Tbl1="r6";
70 $Tbl2="r7";
71 $Tbl3="r2";
72
73 $s0="r8";
74 $s1="r9";
75 $s2="r10";
76 $s3="r11";
77
78 $t0="r12";
79 $t1="r13";
80 $t2="r14";
81 $t3="r15";
82
83 $acc00="r16";
84 $acc01="r17";
85 $acc02="r18";
86 $acc03="r19";
87
88 $acc04="r20";
89 $acc05="r21";
90 $acc06="r22";
91 $acc07="r23";
92
93 $acc08="r24";
94 $acc09="r25";
95 $acc10="r26";
96 $acc11="r27";
97
98 $acc12="r28";
99 $acc13="r29";
100 $acc14="r30";
101 $acc15="r31";
102
103 # stay away from TLS pointer
104 if ($SIZE_T==8) { die if ($t1 ne "r13");  $t1="r0";             }
105 else            { die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0";  }
106 $mask80=$Tbl2;
107 $mask1b=$Tbl3;
108
109 $code.=<<___;
110 .machine        "any"
111 .text
112
113 .align  7
114 LAES_Te:
115         mflr    r0
116         bcl     20,31,\$+4
117         mflr    $Tbl0   ;    vvvvv "distance" between . and 1st data entry
118         addi    $Tbl0,$Tbl0,`128-8`
119         mtlr    r0
120         blr
121         .long   0
122         .byte   0,12,0x14,0,0,0,0,0
123         .space  `64-9*4`
124 LAES_Td:
125         mflr    r0
126         bcl     20,31,\$+4
127         mflr    $Tbl0   ;    vvvvvvvv "distance" between . and 1st data entry
128         addi    $Tbl0,$Tbl0,`128-64-8+2048+256`
129         mtlr    r0
130         blr
131         .long   0
132         .byte   0,12,0x14,0,0,0,0,0
133         .space  `128-64-9*4`
134 ___
135 &_data_word(
136         0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
137         0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
138         0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
139         0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
140         0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
141         0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
142         0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
143         0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
144         0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
145         0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
146         0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
147         0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
148         0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
149         0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
150         0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
151         0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
152         0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
153         0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
154         0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
155         0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
156         0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
157         0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
158         0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
159         0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
160         0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
161         0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
162         0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
163         0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
164         0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
165         0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
166         0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
167         0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
168         0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
169         0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
170         0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
171         0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
172         0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
173         0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
174         0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
175         0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
176         0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
177         0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
178         0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
179         0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
180         0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
181         0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
182         0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
183         0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
184         0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
185         0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
186         0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
187         0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
188         0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
189         0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
190         0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
191         0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
192         0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
193         0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
194         0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
195         0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
196         0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
197         0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
198         0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
199         0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
200 $code.=<<___;
201 .byte   0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
202 .byte   0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
203 .byte   0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
204 .byte   0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
205 .byte   0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
206 .byte   0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
207 .byte   0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
208 .byte   0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
209 .byte   0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
210 .byte   0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
211 .byte   0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
212 .byte   0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
213 .byte   0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
214 .byte   0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
215 .byte   0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
216 .byte   0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
217 .byte   0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
218 .byte   0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
219 .byte   0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
220 .byte   0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
221 .byte   0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
222 .byte   0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
223 .byte   0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
224 .byte   0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
225 .byte   0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
226 .byte   0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
227 .byte   0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
228 .byte   0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
229 .byte   0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
230 .byte   0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
231 .byte   0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
232 .byte   0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
233 ___
234 &_data_word(
235         0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
236         0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
237         0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
238         0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
239         0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
240         0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
241         0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
242         0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
243         0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
244         0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
245         0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
246         0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
247         0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
248         0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
249         0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
250         0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
251         0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
252         0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
253         0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
254         0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
255         0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
256         0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
257         0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
258         0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
259         0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
260         0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
261         0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
262         0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
263         0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
264         0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
265         0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
266         0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
267         0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
268         0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
269         0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
270         0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
271         0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
272         0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
273         0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
274         0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
275         0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
276         0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
277         0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
278         0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
279         0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
280         0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
281         0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
282         0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
283         0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
284         0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
285         0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
286         0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
287         0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
288         0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
289         0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
290         0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
291         0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
292         0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
293         0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
294         0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
295         0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
296         0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
297         0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
298         0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
299 $code.=<<___;
300 .byte   0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
301 .byte   0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
302 .byte   0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
303 .byte   0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
304 .byte   0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
305 .byte   0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
306 .byte   0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
307 .byte   0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
308 .byte   0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
309 .byte   0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
310 .byte   0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
311 .byte   0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
312 .byte   0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
313 .byte   0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
314 .byte   0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
315 .byte   0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
316 .byte   0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
317 .byte   0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
318 .byte   0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
319 .byte   0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
320 .byte   0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
321 .byte   0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
322 .byte   0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
323 .byte   0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
324 .byte   0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
325 .byte   0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
326 .byte   0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
327 .byte   0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
328 .byte   0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
329 .byte   0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
330 .byte   0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
331 .byte   0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
332
333
334 .globl  .AES_encrypt
335 .align  7
336 .AES_encrypt:
337         $STU    $sp,-$FRAME($sp)
338         mflr    r0
339
340         $PUSH   $toc,`$FRAME-$SIZE_T*20`($sp)
341         $PUSH   r13,`$FRAME-$SIZE_T*19`($sp)
342         $PUSH   r14,`$FRAME-$SIZE_T*18`($sp)
343         $PUSH   r15,`$FRAME-$SIZE_T*17`($sp)
344         $PUSH   r16,`$FRAME-$SIZE_T*16`($sp)
345         $PUSH   r17,`$FRAME-$SIZE_T*15`($sp)
346         $PUSH   r18,`$FRAME-$SIZE_T*14`($sp)
347         $PUSH   r19,`$FRAME-$SIZE_T*13`($sp)
348         $PUSH   r20,`$FRAME-$SIZE_T*12`($sp)
349         $PUSH   r21,`$FRAME-$SIZE_T*11`($sp)
350         $PUSH   r22,`$FRAME-$SIZE_T*10`($sp)
351         $PUSH   r23,`$FRAME-$SIZE_T*9`($sp)
352         $PUSH   r24,`$FRAME-$SIZE_T*8`($sp)
353         $PUSH   r25,`$FRAME-$SIZE_T*7`($sp)
354         $PUSH   r26,`$FRAME-$SIZE_T*6`($sp)
355         $PUSH   r27,`$FRAME-$SIZE_T*5`($sp)
356         $PUSH   r28,`$FRAME-$SIZE_T*4`($sp)
357         $PUSH   r29,`$FRAME-$SIZE_T*3`($sp)
358         $PUSH   r30,`$FRAME-$SIZE_T*2`($sp)
359         $PUSH   r31,`$FRAME-$SIZE_T*1`($sp)
360         $PUSH   r0,`$FRAME+$LRSAVE`($sp)
361
362         andi.   $t0,$inp,3
363         andi.   $t1,$out,3
364         or.     $t0,$t0,$t1
365         bne     Lenc_unaligned
366
367 Lenc_unaligned_ok:
368         lwz     $s0,0($inp)
369         lwz     $s1,4($inp)
370         lwz     $s2,8($inp)
371         lwz     $s3,12($inp)
372         bl      LAES_Te
373         bl      Lppc_AES_encrypt_compact
374         stw     $s0,0($out)
375         stw     $s1,4($out)
376         stw     $s2,8($out)
377         stw     $s3,12($out)
378         b       Lenc_done
379
380 Lenc_unaligned:
381         subfic  $t0,$inp,4096
382         subfic  $t1,$out,4096
383         andi.   $t0,$t0,4096-16
384         beq     Lenc_xpage
385         andi.   $t1,$t1,4096-16
386         bne     Lenc_unaligned_ok
387
388 Lenc_xpage:
389         lbz     $acc00,0($inp)
390         lbz     $acc01,1($inp)
391         lbz     $acc02,2($inp)
392         lbz     $s0,3($inp)
393         lbz     $acc04,4($inp)
394         lbz     $acc05,5($inp)
395         lbz     $acc06,6($inp)
396         lbz     $s1,7($inp)
397         lbz     $acc08,8($inp)
398         lbz     $acc09,9($inp)
399         lbz     $acc10,10($inp)
400         insrwi  $s0,$acc00,8,0
401         lbz     $s2,11($inp)
402         insrwi  $s1,$acc04,8,0
403         lbz     $acc12,12($inp)
404         insrwi  $s0,$acc01,8,8
405         lbz     $acc13,13($inp)
406         insrwi  $s1,$acc05,8,8
407         lbz     $acc14,14($inp)
408         insrwi  $s0,$acc02,8,16
409         lbz     $s3,15($inp)
410         insrwi  $s1,$acc06,8,16
411         insrwi  $s2,$acc08,8,0
412         insrwi  $s3,$acc12,8,0
413         insrwi  $s2,$acc09,8,8
414         insrwi  $s3,$acc13,8,8
415         insrwi  $s2,$acc10,8,16
416         insrwi  $s3,$acc14,8,16
417
418         bl      LAES_Te
419         bl      Lppc_AES_encrypt_compact
420
421         extrwi  $acc00,$s0,8,0
422         extrwi  $acc01,$s0,8,8
423         stb     $acc00,0($out)
424         extrwi  $acc02,$s0,8,16
425         stb     $acc01,1($out)
426         stb     $acc02,2($out)
427         extrwi  $acc04,$s1,8,0
428         stb     $s0,3($out)
429         extrwi  $acc05,$s1,8,8
430         stb     $acc04,4($out)
431         extrwi  $acc06,$s1,8,16
432         stb     $acc05,5($out)
433         stb     $acc06,6($out)
434         extrwi  $acc08,$s2,8,0
435         stb     $s1,7($out)
436         extrwi  $acc09,$s2,8,8
437         stb     $acc08,8($out)
438         extrwi  $acc10,$s2,8,16
439         stb     $acc09,9($out)
440         stb     $acc10,10($out)
441         extrwi  $acc12,$s3,8,0
442         stb     $s2,11($out)
443         extrwi  $acc13,$s3,8,8
444         stb     $acc12,12($out)
445         extrwi  $acc14,$s3,8,16
446         stb     $acc13,13($out)
447         stb     $acc14,14($out)
448         stb     $s3,15($out)
449
450 Lenc_done:
451         $POP    r0,`$FRAME+$LRSAVE`($sp)
452         $POP    $toc,`$FRAME-$SIZE_T*20`($sp)
453         $POP    r13,`$FRAME-$SIZE_T*19`($sp)
454         $POP    r14,`$FRAME-$SIZE_T*18`($sp)
455         $POP    r15,`$FRAME-$SIZE_T*17`($sp)
456         $POP    r16,`$FRAME-$SIZE_T*16`($sp)
457         $POP    r17,`$FRAME-$SIZE_T*15`($sp)
458         $POP    r18,`$FRAME-$SIZE_T*14`($sp)
459         $POP    r19,`$FRAME-$SIZE_T*13`($sp)
460         $POP    r20,`$FRAME-$SIZE_T*12`($sp)
461         $POP    r21,`$FRAME-$SIZE_T*11`($sp)
462         $POP    r22,`$FRAME-$SIZE_T*10`($sp)
463         $POP    r23,`$FRAME-$SIZE_T*9`($sp)
464         $POP    r24,`$FRAME-$SIZE_T*8`($sp)
465         $POP    r25,`$FRAME-$SIZE_T*7`($sp)
466         $POP    r26,`$FRAME-$SIZE_T*6`($sp)
467         $POP    r27,`$FRAME-$SIZE_T*5`($sp)
468         $POP    r28,`$FRAME-$SIZE_T*4`($sp)
469         $POP    r29,`$FRAME-$SIZE_T*3`($sp)
470         $POP    r30,`$FRAME-$SIZE_T*2`($sp)
471         $POP    r31,`$FRAME-$SIZE_T*1`($sp)
472         mtlr    r0
473         addi    $sp,$sp,$FRAME
474         blr
475         .long   0
476         .byte   0,12,4,1,0x80,18,3,0
477         .long   0
478
479 .align  5
480 Lppc_AES_encrypt:
481         lwz     $acc00,240($key)
482         addi    $Tbl1,$Tbl0,3
483         lwz     $t0,0($key)
484         addi    $Tbl2,$Tbl0,2
485         lwz     $t1,4($key)
486         addi    $Tbl3,$Tbl0,1
487         lwz     $t2,8($key)
488         addi    $acc00,$acc00,-1
489         lwz     $t3,12($key)
490         addi    $key,$key,16
491         xor     $s0,$s0,$t0
492         xor     $s1,$s1,$t1
493         xor     $s2,$s2,$t2
494         xor     $s3,$s3,$t3
495         mtctr   $acc00
496 .align  4
497 Lenc_loop:
498         rlwinm  $acc00,$s0,`32-24+3`,21,28
499         rlwinm  $acc01,$s1,`32-24+3`,21,28
500         rlwinm  $acc02,$s2,`32-24+3`,21,28
501         rlwinm  $acc03,$s3,`32-24+3`,21,28
502         lwz     $t0,0($key)
503         rlwinm  $acc04,$s1,`32-16+3`,21,28
504         lwz     $t1,4($key)
505         rlwinm  $acc05,$s2,`32-16+3`,21,28
506         lwz     $t2,8($key)
507         rlwinm  $acc06,$s3,`32-16+3`,21,28
508         lwz     $t3,12($key)
509         rlwinm  $acc07,$s0,`32-16+3`,21,28
510         lwzx    $acc00,$Tbl0,$acc00
511         rlwinm  $acc08,$s2,`32-8+3`,21,28
512         lwzx    $acc01,$Tbl0,$acc01
513         rlwinm  $acc09,$s3,`32-8+3`,21,28
514         lwzx    $acc02,$Tbl0,$acc02
515         rlwinm  $acc10,$s0,`32-8+3`,21,28
516         lwzx    $acc03,$Tbl0,$acc03
517         rlwinm  $acc11,$s1,`32-8+3`,21,28
518         lwzx    $acc04,$Tbl1,$acc04
519         rlwinm  $acc12,$s3,`0+3`,21,28
520         lwzx    $acc05,$Tbl1,$acc05
521         rlwinm  $acc13,$s0,`0+3`,21,28
522         lwzx    $acc06,$Tbl1,$acc06
523         rlwinm  $acc14,$s1,`0+3`,21,28
524         lwzx    $acc07,$Tbl1,$acc07
525         rlwinm  $acc15,$s2,`0+3`,21,28
526         lwzx    $acc08,$Tbl2,$acc08
527         xor     $t0,$t0,$acc00
528         lwzx    $acc09,$Tbl2,$acc09
529         xor     $t1,$t1,$acc01
530         lwzx    $acc10,$Tbl2,$acc10
531         xor     $t2,$t2,$acc02
532         lwzx    $acc11,$Tbl2,$acc11
533         xor     $t3,$t3,$acc03
534         lwzx    $acc12,$Tbl3,$acc12
535         xor     $t0,$t0,$acc04
536         lwzx    $acc13,$Tbl3,$acc13
537         xor     $t1,$t1,$acc05
538         lwzx    $acc14,$Tbl3,$acc14
539         xor     $t2,$t2,$acc06
540         lwzx    $acc15,$Tbl3,$acc15
541         xor     $t3,$t3,$acc07
542         xor     $t0,$t0,$acc08
543         xor     $t1,$t1,$acc09
544         xor     $t2,$t2,$acc10
545         xor     $t3,$t3,$acc11
546         xor     $s0,$t0,$acc12
547         xor     $s1,$t1,$acc13
548         xor     $s2,$t2,$acc14
549         xor     $s3,$t3,$acc15
550         addi    $key,$key,16
551         bdnz-   Lenc_loop
552
553         addi    $Tbl2,$Tbl0,2048
554         nop
555         lwz     $t0,0($key)
556         rlwinm  $acc00,$s0,`32-24`,24,31
557         lwz     $t1,4($key)
558         rlwinm  $acc01,$s1,`32-24`,24,31
559         lwz     $t2,8($key)
560         rlwinm  $acc02,$s2,`32-24`,24,31
561         lwz     $t3,12($key)
562         rlwinm  $acc03,$s3,`32-24`,24,31
563         lwz     $acc08,`2048+0`($Tbl0)  ! prefetch Te4
564         rlwinm  $acc04,$s1,`32-16`,24,31
565         lwz     $acc09,`2048+32`($Tbl0)
566         rlwinm  $acc05,$s2,`32-16`,24,31
567         lwz     $acc10,`2048+64`($Tbl0)
568         rlwinm  $acc06,$s3,`32-16`,24,31
569         lwz     $acc11,`2048+96`($Tbl0)
570         rlwinm  $acc07,$s0,`32-16`,24,31
571         lwz     $acc12,`2048+128`($Tbl0)
572         rlwinm  $acc08,$s2,`32-8`,24,31
573         lwz     $acc13,`2048+160`($Tbl0)
574         rlwinm  $acc09,$s3,`32-8`,24,31
575         lwz     $acc14,`2048+192`($Tbl0)
576         rlwinm  $acc10,$s0,`32-8`,24,31
577         lwz     $acc15,`2048+224`($Tbl0)
578         rlwinm  $acc11,$s1,`32-8`,24,31
579         lbzx    $acc00,$Tbl2,$acc00
580         rlwinm  $acc12,$s3,`0`,24,31
581         lbzx    $acc01,$Tbl2,$acc01
582         rlwinm  $acc13,$s0,`0`,24,31
583         lbzx    $acc02,$Tbl2,$acc02
584         rlwinm  $acc14,$s1,`0`,24,31
585         lbzx    $acc03,$Tbl2,$acc03
586         rlwinm  $acc15,$s2,`0`,24,31
587         lbzx    $acc04,$Tbl2,$acc04
588         rlwinm  $s0,$acc00,24,0,7
589         lbzx    $acc05,$Tbl2,$acc05
590         rlwinm  $s1,$acc01,24,0,7
591         lbzx    $acc06,$Tbl2,$acc06
592         rlwinm  $s2,$acc02,24,0,7
593         lbzx    $acc07,$Tbl2,$acc07
594         rlwinm  $s3,$acc03,24,0,7
595         lbzx    $acc08,$Tbl2,$acc08
596         rlwimi  $s0,$acc04,16,8,15
597         lbzx    $acc09,$Tbl2,$acc09
598         rlwimi  $s1,$acc05,16,8,15
599         lbzx    $acc10,$Tbl2,$acc10
600         rlwimi  $s2,$acc06,16,8,15
601         lbzx    $acc11,$Tbl2,$acc11
602         rlwimi  $s3,$acc07,16,8,15
603         lbzx    $acc12,$Tbl2,$acc12
604         rlwimi  $s0,$acc08,8,16,23
605         lbzx    $acc13,$Tbl2,$acc13
606         rlwimi  $s1,$acc09,8,16,23
607         lbzx    $acc14,$Tbl2,$acc14
608         rlwimi  $s2,$acc10,8,16,23
609         lbzx    $acc15,$Tbl2,$acc15
610         rlwimi  $s3,$acc11,8,16,23
611         or      $s0,$s0,$acc12
612         or      $s1,$s1,$acc13
613         or      $s2,$s2,$acc14
614         or      $s3,$s3,$acc15
615         xor     $s0,$s0,$t0
616         xor     $s1,$s1,$t1
617         xor     $s2,$s2,$t2
618         xor     $s3,$s3,$t3
619         blr
620         .long   0
621         .byte   0,12,0x14,0,0,0,0,0
622
623 .align  4
624 Lppc_AES_encrypt_compact:
625         lwz     $acc00,240($key)
626         addi    $Tbl1,$Tbl0,2048
627         lwz     $t0,0($key)
628         lis     $mask80,0x8080
629         lwz     $t1,4($key)
630         lis     $mask1b,0x1b1b
631         lwz     $t2,8($key)
632         ori     $mask80,$mask80,0x8080
633         lwz     $t3,12($key)
634         ori     $mask1b,$mask1b,0x1b1b
635         addi    $key,$key,16
636         mtctr   $acc00
637 .align  4
638 Lenc_compact_loop:
639         xor     $s0,$s0,$t0
640         xor     $s1,$s1,$t1
641         rlwinm  $acc00,$s0,`32-24`,24,31
642         xor     $s2,$s2,$t2
643         rlwinm  $acc01,$s1,`32-24`,24,31
644         xor     $s3,$s3,$t3
645         rlwinm  $acc02,$s2,`32-24`,24,31
646         rlwinm  $acc03,$s3,`32-24`,24,31
647         rlwinm  $acc04,$s1,`32-16`,24,31
648         rlwinm  $acc05,$s2,`32-16`,24,31
649         rlwinm  $acc06,$s3,`32-16`,24,31
650         rlwinm  $acc07,$s0,`32-16`,24,31
651         lbzx    $acc00,$Tbl1,$acc00
652         rlwinm  $acc08,$s2,`32-8`,24,31
653         lbzx    $acc01,$Tbl1,$acc01
654         rlwinm  $acc09,$s3,`32-8`,24,31
655         lbzx    $acc02,$Tbl1,$acc02
656         rlwinm  $acc10,$s0,`32-8`,24,31
657         lbzx    $acc03,$Tbl1,$acc03
658         rlwinm  $acc11,$s1,`32-8`,24,31
659         lbzx    $acc04,$Tbl1,$acc04
660         rlwinm  $acc12,$s3,`0`,24,31
661         lbzx    $acc05,$Tbl1,$acc05
662         rlwinm  $acc13,$s0,`0`,24,31
663         lbzx    $acc06,$Tbl1,$acc06
664         rlwinm  $acc14,$s1,`0`,24,31
665         lbzx    $acc07,$Tbl1,$acc07
666         rlwinm  $acc15,$s2,`0`,24,31
667         lbzx    $acc08,$Tbl1,$acc08
668         rlwinm  $s0,$acc00,24,0,7
669         lbzx    $acc09,$Tbl1,$acc09
670         rlwinm  $s1,$acc01,24,0,7
671         lbzx    $acc10,$Tbl1,$acc10
672         rlwinm  $s2,$acc02,24,0,7
673         lbzx    $acc11,$Tbl1,$acc11
674         rlwinm  $s3,$acc03,24,0,7
675         lbzx    $acc12,$Tbl1,$acc12
676         rlwimi  $s0,$acc04,16,8,15
677         lbzx    $acc13,$Tbl1,$acc13
678         rlwimi  $s1,$acc05,16,8,15
679         lbzx    $acc14,$Tbl1,$acc14
680         rlwimi  $s2,$acc06,16,8,15
681         lbzx    $acc15,$Tbl1,$acc15
682         rlwimi  $s3,$acc07,16,8,15
683         rlwimi  $s0,$acc08,8,16,23
684         rlwimi  $s1,$acc09,8,16,23
685         rlwimi  $s2,$acc10,8,16,23
686         rlwimi  $s3,$acc11,8,16,23
687         lwz     $t0,0($key)
688         or      $s0,$s0,$acc12
689         lwz     $t1,4($key)
690         or      $s1,$s1,$acc13
691         lwz     $t2,8($key)
692         or      $s2,$s2,$acc14
693         lwz     $t3,12($key)
694         or      $s3,$s3,$acc15
695
696         addi    $key,$key,16
697         bdz     Lenc_compact_done
698
699         and     $acc00,$s0,$mask80      # r1=r0&0x80808080
700         and     $acc01,$s1,$mask80
701         and     $acc02,$s2,$mask80
702         and     $acc03,$s3,$mask80
703         srwi    $acc04,$acc00,7         # r1>>7
704         andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
705         srwi    $acc05,$acc01,7
706         andc    $acc09,$s1,$mask80
707         srwi    $acc06,$acc02,7
708         andc    $acc10,$s2,$mask80
709         srwi    $acc07,$acc03,7
710         andc    $acc11,$s3,$mask80
711         sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
712         sub     $acc01,$acc01,$acc05
713         sub     $acc02,$acc02,$acc06
714         sub     $acc03,$acc03,$acc07
715         add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
716         add     $acc09,$acc09,$acc09
717         add     $acc10,$acc10,$acc10
718         add     $acc11,$acc11,$acc11
719         and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
720         and     $acc01,$acc01,$mask1b
721         and     $acc02,$acc02,$mask1b
722         and     $acc03,$acc03,$mask1b
723         xor     $acc00,$acc00,$acc08    # r2
724         xor     $acc01,$acc01,$acc09
725          rotlwi $acc12,$s0,16           # ROTATE(r0,16)
726         xor     $acc02,$acc02,$acc10
727          rotlwi $acc13,$s1,16
728         xor     $acc03,$acc03,$acc11
729          rotlwi $acc14,$s2,16
730
731         xor     $s0,$s0,$acc00          # r0^r2
732         rotlwi  $acc15,$s3,16
733         xor     $s1,$s1,$acc01
734         rotrwi  $s0,$s0,24              # ROTATE(r2^r0,24)
735         xor     $s2,$s2,$acc02
736         rotrwi  $s1,$s1,24
737         xor     $s3,$s3,$acc03
738         rotrwi  $s2,$s2,24
739         xor     $s0,$s0,$acc00          # ROTATE(r2^r0,24)^r2
740         rotrwi  $s3,$s3,24
741         xor     $s1,$s1,$acc01
742         xor     $s2,$s2,$acc02
743         xor     $s3,$s3,$acc03
744         rotlwi  $acc08,$acc12,8         # ROTATE(r0,24)
745         xor     $s0,$s0,$acc12          #
746         rotlwi  $acc09,$acc13,8
747         xor     $s1,$s1,$acc13
748         rotlwi  $acc10,$acc14,8
749         xor     $s2,$s2,$acc14
750         rotlwi  $acc11,$acc15,8
751         xor     $s3,$s3,$acc15
752         xor     $s0,$s0,$acc08          #
753         xor     $s1,$s1,$acc09
754         xor     $s2,$s2,$acc10
755         xor     $s3,$s3,$acc11
756
757         b       Lenc_compact_loop
758 .align  4
759 Lenc_compact_done:
760         xor     $s0,$s0,$t0
761         xor     $s1,$s1,$t1
762         xor     $s2,$s2,$t2
763         xor     $s3,$s3,$t3
764         blr
765         .long   0
766         .byte   0,12,0x14,0,0,0,0,0
767
768 .globl  .AES_decrypt
769 .align  7
770 .AES_decrypt:
771         $STU    $sp,-$FRAME($sp)
772         mflr    r0
773
774         $PUSH   $toc,`$FRAME-$SIZE_T*20`($sp)
775         $PUSH   r13,`$FRAME-$SIZE_T*19`($sp)
776         $PUSH   r14,`$FRAME-$SIZE_T*18`($sp)
777         $PUSH   r15,`$FRAME-$SIZE_T*17`($sp)
778         $PUSH   r16,`$FRAME-$SIZE_T*16`($sp)
779         $PUSH   r17,`$FRAME-$SIZE_T*15`($sp)
780         $PUSH   r18,`$FRAME-$SIZE_T*14`($sp)
781         $PUSH   r19,`$FRAME-$SIZE_T*13`($sp)
782         $PUSH   r20,`$FRAME-$SIZE_T*12`($sp)
783         $PUSH   r21,`$FRAME-$SIZE_T*11`($sp)
784         $PUSH   r22,`$FRAME-$SIZE_T*10`($sp)
785         $PUSH   r23,`$FRAME-$SIZE_T*9`($sp)
786         $PUSH   r24,`$FRAME-$SIZE_T*8`($sp)
787         $PUSH   r25,`$FRAME-$SIZE_T*7`($sp)
788         $PUSH   r26,`$FRAME-$SIZE_T*6`($sp)
789         $PUSH   r27,`$FRAME-$SIZE_T*5`($sp)
790         $PUSH   r28,`$FRAME-$SIZE_T*4`($sp)
791         $PUSH   r29,`$FRAME-$SIZE_T*3`($sp)
792         $PUSH   r30,`$FRAME-$SIZE_T*2`($sp)
793         $PUSH   r31,`$FRAME-$SIZE_T*1`($sp)
794         $PUSH   r0,`$FRAME+$LRSAVE`($sp)
795
796         andi.   $t0,$inp,3
797         andi.   $t1,$out,3
798         or.     $t0,$t0,$t1
799         bne     Ldec_unaligned
800
801 Ldec_unaligned_ok:
802         lwz     $s0,0($inp)
803         lwz     $s1,4($inp)
804         lwz     $s2,8($inp)
805         lwz     $s3,12($inp)
806         bl      LAES_Td
807         bl      Lppc_AES_decrypt_compact
808         stw     $s0,0($out)
809         stw     $s1,4($out)
810         stw     $s2,8($out)
811         stw     $s3,12($out)
812         b       Ldec_done
813
814 Ldec_unaligned:
815         subfic  $t0,$inp,4096
816         subfic  $t1,$out,4096
817         andi.   $t0,$t0,4096-16
818         beq     Ldec_xpage
819         andi.   $t1,$t1,4096-16
820         bne     Ldec_unaligned_ok
821
822 Ldec_xpage:
823         lbz     $acc00,0($inp)
824         lbz     $acc01,1($inp)
825         lbz     $acc02,2($inp)
826         lbz     $s0,3($inp)
827         lbz     $acc04,4($inp)
828         lbz     $acc05,5($inp)
829         lbz     $acc06,6($inp)
830         lbz     $s1,7($inp)
831         lbz     $acc08,8($inp)
832         lbz     $acc09,9($inp)
833         lbz     $acc10,10($inp)
834         insrwi  $s0,$acc00,8,0
835         lbz     $s2,11($inp)
836         insrwi  $s1,$acc04,8,0
837         lbz     $acc12,12($inp)
838         insrwi  $s0,$acc01,8,8
839         lbz     $acc13,13($inp)
840         insrwi  $s1,$acc05,8,8
841         lbz     $acc14,14($inp)
842         insrwi  $s0,$acc02,8,16
843         lbz     $s3,15($inp)
844         insrwi  $s1,$acc06,8,16
845         insrwi  $s2,$acc08,8,0
846         insrwi  $s3,$acc12,8,0
847         insrwi  $s2,$acc09,8,8
848         insrwi  $s3,$acc13,8,8
849         insrwi  $s2,$acc10,8,16
850         insrwi  $s3,$acc14,8,16
851
852         bl      LAES_Td
853         bl      Lppc_AES_decrypt_compact
854
855         extrwi  $acc00,$s0,8,0
856         extrwi  $acc01,$s0,8,8
857         stb     $acc00,0($out)
858         extrwi  $acc02,$s0,8,16
859         stb     $acc01,1($out)
860         stb     $acc02,2($out)
861         extrwi  $acc04,$s1,8,0
862         stb     $s0,3($out)
863         extrwi  $acc05,$s1,8,8
864         stb     $acc04,4($out)
865         extrwi  $acc06,$s1,8,16
866         stb     $acc05,5($out)
867         stb     $acc06,6($out)
868         extrwi  $acc08,$s2,8,0
869         stb     $s1,7($out)
870         extrwi  $acc09,$s2,8,8
871         stb     $acc08,8($out)
872         extrwi  $acc10,$s2,8,16
873         stb     $acc09,9($out)
874         stb     $acc10,10($out)
875         extrwi  $acc12,$s3,8,0
876         stb     $s2,11($out)
877         extrwi  $acc13,$s3,8,8
878         stb     $acc12,12($out)
879         extrwi  $acc14,$s3,8,16
880         stb     $acc13,13($out)
881         stb     $acc14,14($out)
882         stb     $s3,15($out)
883
884 Ldec_done:
885         $POP    r0,`$FRAME+$LRSAVE`($sp)
886         $POP    $toc,`$FRAME-$SIZE_T*20`($sp)
887         $POP    r13,`$FRAME-$SIZE_T*19`($sp)
888         $POP    r14,`$FRAME-$SIZE_T*18`($sp)
889         $POP    r15,`$FRAME-$SIZE_T*17`($sp)
890         $POP    r16,`$FRAME-$SIZE_T*16`($sp)
891         $POP    r17,`$FRAME-$SIZE_T*15`($sp)
892         $POP    r18,`$FRAME-$SIZE_T*14`($sp)
893         $POP    r19,`$FRAME-$SIZE_T*13`($sp)
894         $POP    r20,`$FRAME-$SIZE_T*12`($sp)
895         $POP    r21,`$FRAME-$SIZE_T*11`($sp)
896         $POP    r22,`$FRAME-$SIZE_T*10`($sp)
897         $POP    r23,`$FRAME-$SIZE_T*9`($sp)
898         $POP    r24,`$FRAME-$SIZE_T*8`($sp)
899         $POP    r25,`$FRAME-$SIZE_T*7`($sp)
900         $POP    r26,`$FRAME-$SIZE_T*6`($sp)
901         $POP    r27,`$FRAME-$SIZE_T*5`($sp)
902         $POP    r28,`$FRAME-$SIZE_T*4`($sp)
903         $POP    r29,`$FRAME-$SIZE_T*3`($sp)
904         $POP    r30,`$FRAME-$SIZE_T*2`($sp)
905         $POP    r31,`$FRAME-$SIZE_T*1`($sp)
906         mtlr    r0
907         addi    $sp,$sp,$FRAME
908         blr
909         .long   0
910         .byte   0,12,4,1,0x80,18,3,0
911         .long   0
912
913 .align  5
914 Lppc_AES_decrypt:
915         lwz     $acc00,240($key)
916         addi    $Tbl1,$Tbl0,3
917         lwz     $t0,0($key)
918         addi    $Tbl2,$Tbl0,2
919         lwz     $t1,4($key)
920         addi    $Tbl3,$Tbl0,1
921         lwz     $t2,8($key)
922         addi    $acc00,$acc00,-1
923         lwz     $t3,12($key)
924         addi    $key,$key,16
925         xor     $s0,$s0,$t0
926         xor     $s1,$s1,$t1
927         xor     $s2,$s2,$t2
928         xor     $s3,$s3,$t3
929         mtctr   $acc00
930 .align  4
931 Ldec_loop:
932         rlwinm  $acc00,$s0,`32-24+3`,21,28
933         rlwinm  $acc01,$s1,`32-24+3`,21,28
934         rlwinm  $acc02,$s2,`32-24+3`,21,28
935         rlwinm  $acc03,$s3,`32-24+3`,21,28
936         lwz     $t0,0($key)
937         rlwinm  $acc04,$s3,`32-16+3`,21,28
938         lwz     $t1,4($key)
939         rlwinm  $acc05,$s0,`32-16+3`,21,28
940         lwz     $t2,8($key)
941         rlwinm  $acc06,$s1,`32-16+3`,21,28
942         lwz     $t3,12($key)
943         rlwinm  $acc07,$s2,`32-16+3`,21,28
944         lwzx    $acc00,$Tbl0,$acc00
945         rlwinm  $acc08,$s2,`32-8+3`,21,28
946         lwzx    $acc01,$Tbl0,$acc01
947         rlwinm  $acc09,$s3,`32-8+3`,21,28
948         lwzx    $acc02,$Tbl0,$acc02
949         rlwinm  $acc10,$s0,`32-8+3`,21,28
950         lwzx    $acc03,$Tbl0,$acc03
951         rlwinm  $acc11,$s1,`32-8+3`,21,28
952         lwzx    $acc04,$Tbl1,$acc04
953         rlwinm  $acc12,$s1,`0+3`,21,28
954         lwzx    $acc05,$Tbl1,$acc05
955         rlwinm  $acc13,$s2,`0+3`,21,28
956         lwzx    $acc06,$Tbl1,$acc06
957         rlwinm  $acc14,$s3,`0+3`,21,28
958         lwzx    $acc07,$Tbl1,$acc07
959         rlwinm  $acc15,$s0,`0+3`,21,28
960         lwzx    $acc08,$Tbl2,$acc08
961         xor     $t0,$t0,$acc00
962         lwzx    $acc09,$Tbl2,$acc09
963         xor     $t1,$t1,$acc01
964         lwzx    $acc10,$Tbl2,$acc10
965         xor     $t2,$t2,$acc02
966         lwzx    $acc11,$Tbl2,$acc11
967         xor     $t3,$t3,$acc03
968         lwzx    $acc12,$Tbl3,$acc12
969         xor     $t0,$t0,$acc04
970         lwzx    $acc13,$Tbl3,$acc13
971         xor     $t1,$t1,$acc05
972         lwzx    $acc14,$Tbl3,$acc14
973         xor     $t2,$t2,$acc06
974         lwzx    $acc15,$Tbl3,$acc15
975         xor     $t3,$t3,$acc07
976         xor     $t0,$t0,$acc08
977         xor     $t1,$t1,$acc09
978         xor     $t2,$t2,$acc10
979         xor     $t3,$t3,$acc11
980         xor     $s0,$t0,$acc12
981         xor     $s1,$t1,$acc13
982         xor     $s2,$t2,$acc14
983         xor     $s3,$t3,$acc15
984         addi    $key,$key,16
985         bdnz-   Ldec_loop
986
987         addi    $Tbl2,$Tbl0,2048
988         nop
989         lwz     $t0,0($key)
990         rlwinm  $acc00,$s0,`32-24`,24,31
991         lwz     $t1,4($key)
992         rlwinm  $acc01,$s1,`32-24`,24,31
993         lwz     $t2,8($key)
994         rlwinm  $acc02,$s2,`32-24`,24,31
995         lwz     $t3,12($key)
996         rlwinm  $acc03,$s3,`32-24`,24,31
997         lwz     $acc08,`2048+0`($Tbl0)  ! prefetch Td4
998         rlwinm  $acc04,$s3,`32-16`,24,31
999         lwz     $acc09,`2048+32`($Tbl0)
1000         rlwinm  $acc05,$s0,`32-16`,24,31
1001         lwz     $acc10,`2048+64`($Tbl0)
1002         lbzx    $acc00,$Tbl2,$acc00
1003         lwz     $acc11,`2048+96`($Tbl0)
1004         lbzx    $acc01,$Tbl2,$acc01
1005         lwz     $acc12,`2048+128`($Tbl0)
1006         rlwinm  $acc06,$s1,`32-16`,24,31
1007         lwz     $acc13,`2048+160`($Tbl0)
1008         rlwinm  $acc07,$s2,`32-16`,24,31
1009         lwz     $acc14,`2048+192`($Tbl0)
1010         rlwinm  $acc08,$s2,`32-8`,24,31
1011         lwz     $acc15,`2048+224`($Tbl0)
1012         rlwinm  $acc09,$s3,`32-8`,24,31
1013         lbzx    $acc02,$Tbl2,$acc02
1014         rlwinm  $acc10,$s0,`32-8`,24,31
1015         lbzx    $acc03,$Tbl2,$acc03
1016         rlwinm  $acc11,$s1,`32-8`,24,31
1017         lbzx    $acc04,$Tbl2,$acc04
1018         rlwinm  $acc12,$s1,`0`,24,31
1019         lbzx    $acc05,$Tbl2,$acc05
1020         rlwinm  $acc13,$s2,`0`,24,31
1021         lbzx    $acc06,$Tbl2,$acc06
1022         rlwinm  $acc14,$s3,`0`,24,31
1023         lbzx    $acc07,$Tbl2,$acc07
1024         rlwinm  $acc15,$s0,`0`,24,31
1025         lbzx    $acc08,$Tbl2,$acc08
1026         rlwinm  $s0,$acc00,24,0,7
1027         lbzx    $acc09,$Tbl2,$acc09
1028         rlwinm  $s1,$acc01,24,0,7
1029         lbzx    $acc10,$Tbl2,$acc10
1030         rlwinm  $s2,$acc02,24,0,7
1031         lbzx    $acc11,$Tbl2,$acc11
1032         rlwinm  $s3,$acc03,24,0,7
1033         lbzx    $acc12,$Tbl2,$acc12
1034         rlwimi  $s0,$acc04,16,8,15
1035         lbzx    $acc13,$Tbl2,$acc13
1036         rlwimi  $s1,$acc05,16,8,15
1037         lbzx    $acc14,$Tbl2,$acc14
1038         rlwimi  $s2,$acc06,16,8,15
1039         lbzx    $acc15,$Tbl2,$acc15
1040         rlwimi  $s3,$acc07,16,8,15
1041         rlwimi  $s0,$acc08,8,16,23
1042         rlwimi  $s1,$acc09,8,16,23
1043         rlwimi  $s2,$acc10,8,16,23
1044         rlwimi  $s3,$acc11,8,16,23
1045         or      $s0,$s0,$acc12
1046         or      $s1,$s1,$acc13
1047         or      $s2,$s2,$acc14
1048         or      $s3,$s3,$acc15
1049         xor     $s0,$s0,$t0
1050         xor     $s1,$s1,$t1
1051         xor     $s2,$s2,$t2
1052         xor     $s3,$s3,$t3
1053         blr
1054         .long   0
1055         .byte   0,12,0x14,0,0,0,0,0
1056
1057 .align  4
1058 Lppc_AES_decrypt_compact:
1059         lwz     $acc00,240($key)
1060         addi    $Tbl1,$Tbl0,2048
1061         lwz     $t0,0($key)
1062         lis     $mask80,0x8080
1063         lwz     $t1,4($key)
1064         lis     $mask1b,0x1b1b
1065         lwz     $t2,8($key)
1066         ori     $mask80,$mask80,0x8080
1067         lwz     $t3,12($key)
1068         ori     $mask1b,$mask1b,0x1b1b
1069         addi    $key,$key,16
1070 ___
1071 $code.=<<___ if ($SIZE_T==8);
1072         insrdi  $mask80,$mask80,32,0
1073         insrdi  $mask1b,$mask1b,32,0
1074 ___
1075 $code.=<<___;
1076         mtctr   $acc00
1077 .align  4
1078 Ldec_compact_loop:
1079         xor     $s0,$s0,$t0
1080         xor     $s1,$s1,$t1
1081         rlwinm  $acc00,$s0,`32-24`,24,31
1082         xor     $s2,$s2,$t2
1083         rlwinm  $acc01,$s1,`32-24`,24,31
1084         xor     $s3,$s3,$t3
1085         rlwinm  $acc02,$s2,`32-24`,24,31
1086         rlwinm  $acc03,$s3,`32-24`,24,31
1087         rlwinm  $acc04,$s3,`32-16`,24,31
1088         rlwinm  $acc05,$s0,`32-16`,24,31
1089         rlwinm  $acc06,$s1,`32-16`,24,31
1090         rlwinm  $acc07,$s2,`32-16`,24,31
1091         lbzx    $acc00,$Tbl1,$acc00
1092         rlwinm  $acc08,$s2,`32-8`,24,31
1093         lbzx    $acc01,$Tbl1,$acc01
1094         rlwinm  $acc09,$s3,`32-8`,24,31
1095         lbzx    $acc02,$Tbl1,$acc02
1096         rlwinm  $acc10,$s0,`32-8`,24,31
1097         lbzx    $acc03,$Tbl1,$acc03
1098         rlwinm  $acc11,$s1,`32-8`,24,31
1099         lbzx    $acc04,$Tbl1,$acc04
1100         rlwinm  $acc12,$s1,`0`,24,31
1101         lbzx    $acc05,$Tbl1,$acc05
1102         rlwinm  $acc13,$s2,`0`,24,31
1103         lbzx    $acc06,$Tbl1,$acc06
1104         rlwinm  $acc14,$s3,`0`,24,31
1105         lbzx    $acc07,$Tbl1,$acc07
1106         rlwinm  $acc15,$s0,`0`,24,31
1107         lbzx    $acc08,$Tbl1,$acc08
1108         rlwinm  $s0,$acc00,24,0,7
1109         lbzx    $acc09,$Tbl1,$acc09
1110         rlwinm  $s1,$acc01,24,0,7
1111         lbzx    $acc10,$Tbl1,$acc10
1112         rlwinm  $s2,$acc02,24,0,7
1113         lbzx    $acc11,$Tbl1,$acc11
1114         rlwinm  $s3,$acc03,24,0,7
1115         lbzx    $acc12,$Tbl1,$acc12
1116         rlwimi  $s0,$acc04,16,8,15
1117         lbzx    $acc13,$Tbl1,$acc13
1118         rlwimi  $s1,$acc05,16,8,15
1119         lbzx    $acc14,$Tbl1,$acc14
1120         rlwimi  $s2,$acc06,16,8,15
1121         lbzx    $acc15,$Tbl1,$acc15
1122         rlwimi  $s3,$acc07,16,8,15
1123         rlwimi  $s0,$acc08,8,16,23
1124         rlwimi  $s1,$acc09,8,16,23
1125         rlwimi  $s2,$acc10,8,16,23
1126         rlwimi  $s3,$acc11,8,16,23
1127         lwz     $t0,0($key)
1128         or      $s0,$s0,$acc12
1129         lwz     $t1,4($key)
1130         or      $s1,$s1,$acc13
1131         lwz     $t2,8($key)
1132         or      $s2,$s2,$acc14
1133         lwz     $t3,12($key)
1134         or      $s3,$s3,$acc15
1135
1136         addi    $key,$key,16
1137         bdz     Ldec_compact_done
1138 ___
1139 $code.=<<___ if ($SIZE_T==8);
1140         # vectorized permutation improves decrypt performance by 10%
1141         insrdi  $s0,$s1,32,0
1142         insrdi  $s2,$s3,32,0
1143
1144         and     $acc00,$s0,$mask80      # r1=r0&0x80808080
1145         and     $acc02,$s2,$mask80
1146         srdi    $acc04,$acc00,7         # r1>>7
1147         srdi    $acc06,$acc02,7
1148         andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
1149         andc    $acc10,$s2,$mask80
1150         sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
1151         sub     $acc02,$acc02,$acc06
1152         add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
1153         add     $acc10,$acc10,$acc10
1154         and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1155         and     $acc02,$acc02,$mask1b
1156         xor     $acc00,$acc00,$acc08    # r2
1157         xor     $acc02,$acc02,$acc10
1158
1159         and     $acc04,$acc00,$mask80   # r1=r2&0x80808080
1160         and     $acc06,$acc02,$mask80
1161         srdi    $acc08,$acc04,7         # r1>>7
1162         srdi    $acc10,$acc06,7
1163         andc    $acc12,$acc00,$mask80   # r2&0x7f7f7f7f
1164         andc    $acc14,$acc02,$mask80
1165         sub     $acc04,$acc04,$acc08    # r1-(r1>>7)
1166         sub     $acc06,$acc06,$acc10
1167         add     $acc12,$acc12,$acc12    # (r2&0x7f7f7f7f)<<1
1168         add     $acc14,$acc14,$acc14
1169         and     $acc04,$acc04,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1170         and     $acc06,$acc06,$mask1b
1171         xor     $acc04,$acc04,$acc12    # r4
1172         xor     $acc06,$acc06,$acc14
1173
1174         and     $acc08,$acc04,$mask80   # r1=r4&0x80808080
1175         and     $acc10,$acc06,$mask80
1176         srdi    $acc12,$acc08,7         # r1>>7
1177         srdi    $acc14,$acc10,7
1178         sub     $acc08,$acc08,$acc12    # r1-(r1>>7)
1179         sub     $acc10,$acc10,$acc14
1180         andc    $acc12,$acc04,$mask80   # r4&0x7f7f7f7f
1181         andc    $acc14,$acc06,$mask80
1182         add     $acc12,$acc12,$acc12    # (r4&0x7f7f7f7f)<<1
1183         add     $acc14,$acc14,$acc14
1184         and     $acc08,$acc08,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1185         and     $acc10,$acc10,$mask1b
1186         xor     $acc08,$acc08,$acc12    # r8
1187         xor     $acc10,$acc10,$acc14
1188
1189         xor     $acc00,$acc00,$s0       # r2^r0
1190         xor     $acc02,$acc02,$s2
1191         xor     $acc04,$acc04,$s0       # r4^r0
1192         xor     $acc06,$acc06,$s2
1193
1194         extrdi  $acc01,$acc00,32,0
1195         extrdi  $acc03,$acc02,32,0
1196         extrdi  $acc05,$acc04,32,0
1197         extrdi  $acc07,$acc06,32,0
1198         extrdi  $acc09,$acc08,32,0
1199         extrdi  $acc11,$acc10,32,0
1200 ___
1201 $code.=<<___ if ($SIZE_T==4);
1202         and     $acc00,$s0,$mask80      # r1=r0&0x80808080
1203         and     $acc01,$s1,$mask80
1204         and     $acc02,$s2,$mask80
1205         and     $acc03,$s3,$mask80
1206         srwi    $acc04,$acc00,7         # r1>>7
1207         andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
1208         srwi    $acc05,$acc01,7
1209         andc    $acc09,$s1,$mask80
1210         srwi    $acc06,$acc02,7
1211         andc    $acc10,$s2,$mask80
1212         srwi    $acc07,$acc03,7
1213         andc    $acc11,$s3,$mask80
1214         sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
1215         sub     $acc01,$acc01,$acc05
1216         sub     $acc02,$acc02,$acc06
1217         sub     $acc03,$acc03,$acc07
1218         add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
1219         add     $acc09,$acc09,$acc09
1220         add     $acc10,$acc10,$acc10
1221         add     $acc11,$acc11,$acc11
1222         and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1223         and     $acc01,$acc01,$mask1b
1224         and     $acc02,$acc02,$mask1b
1225         and     $acc03,$acc03,$mask1b
1226         xor     $acc00,$acc00,$acc08    # r2
1227         xor     $acc01,$acc01,$acc09
1228         xor     $acc02,$acc02,$acc10
1229         xor     $acc03,$acc03,$acc11
1230
1231         and     $acc04,$acc00,$mask80   # r1=r2&0x80808080
1232         and     $acc05,$acc01,$mask80
1233         and     $acc06,$acc02,$mask80
1234         and     $acc07,$acc03,$mask80
1235         srwi    $acc08,$acc04,7         # r1>>7
1236         andc    $acc12,$acc00,$mask80   # r2&0x7f7f7f7f
1237         srwi    $acc09,$acc05,7
1238         andc    $acc13,$acc01,$mask80
1239         srwi    $acc10,$acc06,7
1240         andc    $acc14,$acc02,$mask80
1241         srwi    $acc11,$acc07,7
1242         andc    $acc15,$acc03,$mask80
1243         sub     $acc04,$acc04,$acc08    # r1-(r1>>7)
1244         sub     $acc05,$acc05,$acc09
1245         sub     $acc06,$acc06,$acc10
1246         sub     $acc07,$acc07,$acc11
1247         add     $acc12,$acc12,$acc12    # (r2&0x7f7f7f7f)<<1
1248         add     $acc13,$acc13,$acc13
1249         add     $acc14,$acc14,$acc14
1250         add     $acc15,$acc15,$acc15
1251         and     $acc04,$acc04,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1252         and     $acc05,$acc05,$mask1b
1253         and     $acc06,$acc06,$mask1b
1254         and     $acc07,$acc07,$mask1b
1255         xor     $acc04,$acc04,$acc12    # r4
1256         xor     $acc05,$acc05,$acc13
1257         xor     $acc06,$acc06,$acc14
1258         xor     $acc07,$acc07,$acc15
1259
1260         and     $acc08,$acc04,$mask80   # r1=r4&0x80808080
1261         and     $acc09,$acc05,$mask80
1262         srwi    $acc12,$acc08,7         # r1>>7
1263         and     $acc10,$acc06,$mask80
1264         srwi    $acc13,$acc09,7
1265         and     $acc11,$acc07,$mask80
1266         srwi    $acc14,$acc10,7
1267         sub     $acc08,$acc08,$acc12    # r1-(r1>>7)
1268         srwi    $acc15,$acc11,7
1269         sub     $acc09,$acc09,$acc13
1270         sub     $acc10,$acc10,$acc14
1271         sub     $acc11,$acc11,$acc15
1272         andc    $acc12,$acc04,$mask80   # r4&0x7f7f7f7f
1273         andc    $acc13,$acc05,$mask80
1274         andc    $acc14,$acc06,$mask80
1275         andc    $acc15,$acc07,$mask80
1276         add     $acc12,$acc12,$acc12    # (r4&0x7f7f7f7f)<<1
1277         add     $acc13,$acc13,$acc13
1278         add     $acc14,$acc14,$acc14
1279         add     $acc15,$acc15,$acc15
1280         and     $acc08,$acc08,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1281         and     $acc09,$acc09,$mask1b
1282         and     $acc10,$acc10,$mask1b
1283         and     $acc11,$acc11,$mask1b
1284         xor     $acc08,$acc08,$acc12    # r8
1285         xor     $acc09,$acc09,$acc13
1286         xor     $acc10,$acc10,$acc14
1287         xor     $acc11,$acc11,$acc15
1288
1289         xor     $acc00,$acc00,$s0       # r2^r0
1290         xor     $acc01,$acc01,$s1
1291         xor     $acc02,$acc02,$s2
1292         xor     $acc03,$acc03,$s3
1293         xor     $acc04,$acc04,$s0       # r4^r0
1294         xor     $acc05,$acc05,$s1
1295         xor     $acc06,$acc06,$s2
1296         xor     $acc07,$acc07,$s3
1297 ___
1298 $code.=<<___;
1299         rotrwi  $s0,$s0,8               # = ROTATE(r0,8)
1300         rotrwi  $s1,$s1,8
1301         xor     $s0,$s0,$acc00          # ^= r2^r0
1302         rotrwi  $s2,$s2,8
1303         xor     $s1,$s1,$acc01
1304         rotrwi  $s3,$s3,8
1305         xor     $s2,$s2,$acc02
1306         xor     $s3,$s3,$acc03
1307         xor     $acc00,$acc00,$acc08
1308         xor     $acc01,$acc01,$acc09
1309         xor     $acc02,$acc02,$acc10
1310         xor     $acc03,$acc03,$acc11
1311         xor     $s0,$s0,$acc04          # ^= r4^r0
1312         rotrwi  $acc00,$acc00,24
1313         xor     $s1,$s1,$acc05
1314         rotrwi  $acc01,$acc01,24
1315         xor     $s2,$s2,$acc06
1316         rotrwi  $acc02,$acc02,24
1317         xor     $s3,$s3,$acc07
1318         rotrwi  $acc03,$acc03,24
1319         xor     $acc04,$acc04,$acc08
1320         xor     $acc05,$acc05,$acc09
1321         xor     $acc06,$acc06,$acc10
1322         xor     $acc07,$acc07,$acc11
1323         xor     $s0,$s0,$acc08          # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
1324         rotrwi  $acc04,$acc04,16
1325         xor     $s1,$s1,$acc09
1326         rotrwi  $acc05,$acc05,16
1327         xor     $s2,$s2,$acc10
1328         rotrwi  $acc06,$acc06,16
1329         xor     $s3,$s3,$acc11
1330         rotrwi  $acc07,$acc07,16
1331         xor     $s0,$s0,$acc00          # ^= ROTATE(r8^r2^r0,24)
1332         rotrwi  $acc08,$acc08,8
1333         xor     $s1,$s1,$acc01
1334         rotrwi  $acc09,$acc09,8
1335         xor     $s2,$s2,$acc02
1336         rotrwi  $acc10,$acc10,8
1337         xor     $s3,$s3,$acc03
1338         rotrwi  $acc11,$acc11,8
1339         xor     $s0,$s0,$acc04          # ^= ROTATE(r8^r4^r0,16)
1340         xor     $s1,$s1,$acc05
1341         xor     $s2,$s2,$acc06
1342         xor     $s3,$s3,$acc07
1343         xor     $s0,$s0,$acc08          # ^= ROTATE(r8,8)       
1344         xor     $s1,$s1,$acc09  
1345         xor     $s2,$s2,$acc10  
1346         xor     $s3,$s3,$acc11  
1347
1348         b       Ldec_compact_loop
1349 .align  4
1350 Ldec_compact_done:
1351         xor     $s0,$s0,$t0
1352         xor     $s1,$s1,$t1
1353         xor     $s2,$s2,$t2
1354         xor     $s3,$s3,$t3
1355         blr
1356         .long   0
1357         .byte   0,12,0x14,0,0,0,0,0
1358
1359 .asciz  "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
1360 .align  7
1361 ___
1362
1363 $code =~ s/\`([^\`]*)\`/eval $1/gem;
1364 print $code;
1365 close STDOUT;