PPC assembly pack: make new .size directives profiler-friendly.
[openssl.git] / crypto / aes / asm / aes-ppc.pl
1 #!/usr/bin/env perl
2
3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9
10 # Needs more work: key setup, CBC routine...
11 #
12 # ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
13 # 128-bit key, which is ~40% better than 64-bit code generated by gcc
14 # 4.0. But these are not the ones currently used! Their "compact"
15 # counterparts are, for security reason. ppc_AES_encrypt_compact runs
16 # at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
17 # at 1/3 of ppc_AES_decrypt.
18
19 # February 2010
20 #
21 # Rescheduling instructions to favour Power6 pipeline gave 10%
22 # performance improvement on the platfrom in question (and marginal
23 # improvement even on others). It should be noted that Power6 fails
24 # to process byte in 18 cycles, only in 23, because it fails to issue
25 # 4 load instructions in two cycles, only in 3. As result non-compact
26 # block subroutines are 25% slower than one would expect. Compact
27 # functions scale better, because they have pure computational part,
28 # which scales perfectly with clock frequency. To be specific
29 # ppc_AES_encrypt_compact operates at 42 cycles per byte, while
30 # ppc_AES_decrypt_compact - at 55 (in 64-bit build).
31
32 $flavour = shift;
33
34 if ($flavour =~ /64/) {
35         $SIZE_T =8;
36         $LRSAVE =2*$SIZE_T;
37         $STU    ="stdu";
38         $POP    ="ld";
39         $PUSH   ="std";
40 } elsif ($flavour =~ /32/) {
41         $SIZE_T =4;
42         $LRSAVE =$SIZE_T;
43         $STU    ="stwu";
44         $POP    ="lwz";
45         $PUSH   ="stw";
46 } else { die "nonsense $flavour"; }
47
48 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
49 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
50 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
51 die "can't locate ppc-xlate.pl";
52
53 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
54
55 $FRAME=32*$SIZE_T;
56
57 sub _data_word()
58 { my $i;
59     while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
60 }
61
62 $sp="r1";
63 $toc="r2";
64 $inp="r3";
65 $out="r4";
66 $key="r5";
67
68 $Tbl0="r3";
69 $Tbl1="r6";
70 $Tbl2="r7";
71 $Tbl3=$out;     # stay away from "r2"; $out is offloaded to stack
72
73 $s0="r8";
74 $s1="r9";
75 $s2="r10";
76 $s3="r11";
77
78 $t0="r12";
79 $t1="r0";       # stay away from "r13";
80 $t2="r14";
81 $t3="r15";
82
83 $acc00="r16";
84 $acc01="r17";
85 $acc02="r18";
86 $acc03="r19";
87
88 $acc04="r20";
89 $acc05="r21";
90 $acc06="r22";
91 $acc07="r23";
92
93 $acc08="r24";
94 $acc09="r25";
95 $acc10="r26";
96 $acc11="r27";
97
98 $acc12="r28";
99 $acc13="r29";
100 $acc14="r30";
101 $acc15="r31";
102
103 $mask80=$Tbl2;
104 $mask1b=$Tbl3;
105
106 $code.=<<___;
107 .machine        "any"
108 .text
109
110 .align  7
111 LAES_Te:
112         mflr    r0
113         bcl     20,31,\$+4
114         mflr    $Tbl0   ;    vvvvv "distance" between . and 1st data entry
115         addi    $Tbl0,$Tbl0,`128-8`
116         mtlr    r0
117         blr
118         .long   0
119         .byte   0,12,0x14,0,0,0,0,0
120         .space  `64-9*4`
121 LAES_Td:
122         mflr    r0
123         bcl     20,31,\$+4
124         mflr    $Tbl0   ;    vvvvvvvv "distance" between . and 1st data entry
125         addi    $Tbl0,$Tbl0,`128-64-8+2048+256`
126         mtlr    r0
127         blr
128         .long   0
129         .byte   0,12,0x14,0,0,0,0,0
130         .space  `128-64-9*4`
131 ___
132 &_data_word(
133         0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
134         0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
135         0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
136         0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
137         0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
138         0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
139         0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
140         0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
141         0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
142         0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
143         0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
144         0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
145         0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
146         0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
147         0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
148         0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
149         0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
150         0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
151         0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
152         0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
153         0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
154         0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
155         0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
156         0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
157         0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
158         0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
159         0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
160         0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
161         0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
162         0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
163         0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
164         0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
165         0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
166         0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
167         0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
168         0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
169         0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
170         0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
171         0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
172         0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
173         0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
174         0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
175         0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
176         0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
177         0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
178         0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
179         0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
180         0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
181         0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
182         0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
183         0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
184         0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
185         0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
186         0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
187         0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
188         0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
189         0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
190         0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
191         0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
192         0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
193         0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
194         0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
195         0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
196         0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
197 $code.=<<___;
198 .byte   0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
199 .byte   0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
200 .byte   0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
201 .byte   0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
202 .byte   0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
203 .byte   0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
204 .byte   0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
205 .byte   0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
206 .byte   0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
207 .byte   0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
208 .byte   0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
209 .byte   0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
210 .byte   0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
211 .byte   0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
212 .byte   0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
213 .byte   0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
214 .byte   0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
215 .byte   0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
216 .byte   0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
217 .byte   0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
218 .byte   0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
219 .byte   0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
220 .byte   0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
221 .byte   0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
222 .byte   0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
223 .byte   0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
224 .byte   0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
225 .byte   0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
226 .byte   0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
227 .byte   0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
228 .byte   0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
229 .byte   0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
230 ___
231 &_data_word(
232         0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
233         0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
234         0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
235         0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
236         0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
237         0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
238         0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
239         0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
240         0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
241         0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
242         0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
243         0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
244         0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
245         0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
246         0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
247         0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
248         0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
249         0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
250         0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
251         0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
252         0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
253         0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
254         0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
255         0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
256         0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
257         0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
258         0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
259         0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
260         0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
261         0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
262         0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
263         0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
264         0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
265         0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
266         0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
267         0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
268         0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
269         0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
270         0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
271         0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
272         0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
273         0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
274         0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
275         0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
276         0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
277         0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
278         0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
279         0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
280         0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
281         0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
282         0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
283         0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
284         0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
285         0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
286         0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
287         0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
288         0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
289         0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
290         0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
291         0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
292         0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
293         0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
294         0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
295         0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
296 $code.=<<___;
297 .byte   0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
298 .byte   0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
299 .byte   0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
300 .byte   0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
301 .byte   0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
302 .byte   0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
303 .byte   0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
304 .byte   0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
305 .byte   0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
306 .byte   0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
307 .byte   0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
308 .byte   0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
309 .byte   0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
310 .byte   0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
311 .byte   0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
312 .byte   0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
313 .byte   0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
314 .byte   0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
315 .byte   0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
316 .byte   0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
317 .byte   0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
318 .byte   0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
319 .byte   0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
320 .byte   0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
321 .byte   0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
322 .byte   0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
323 .byte   0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
324 .byte   0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
325 .byte   0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
326 .byte   0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
327 .byte   0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
328 .byte   0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
329
330
331 .globl  .AES_encrypt
332 .align  7
333 .AES_encrypt:
334         $STU    $sp,-$FRAME($sp)
335         mflr    r0
336
337         $PUSH   $out,`$FRAME-$SIZE_T*19`($sp)
338         $PUSH   r14,`$FRAME-$SIZE_T*18`($sp)
339         $PUSH   r15,`$FRAME-$SIZE_T*17`($sp)
340         $PUSH   r16,`$FRAME-$SIZE_T*16`($sp)
341         $PUSH   r17,`$FRAME-$SIZE_T*15`($sp)
342         $PUSH   r18,`$FRAME-$SIZE_T*14`($sp)
343         $PUSH   r19,`$FRAME-$SIZE_T*13`($sp)
344         $PUSH   r20,`$FRAME-$SIZE_T*12`($sp)
345         $PUSH   r21,`$FRAME-$SIZE_T*11`($sp)
346         $PUSH   r22,`$FRAME-$SIZE_T*10`($sp)
347         $PUSH   r23,`$FRAME-$SIZE_T*9`($sp)
348         $PUSH   r24,`$FRAME-$SIZE_T*8`($sp)
349         $PUSH   r25,`$FRAME-$SIZE_T*7`($sp)
350         $PUSH   r26,`$FRAME-$SIZE_T*6`($sp)
351         $PUSH   r27,`$FRAME-$SIZE_T*5`($sp)
352         $PUSH   r28,`$FRAME-$SIZE_T*4`($sp)
353         $PUSH   r29,`$FRAME-$SIZE_T*3`($sp)
354         $PUSH   r30,`$FRAME-$SIZE_T*2`($sp)
355         $PUSH   r31,`$FRAME-$SIZE_T*1`($sp)
356         $PUSH   r0,`$FRAME+$LRSAVE`($sp)
357
358         andi.   $t0,$inp,3
359         andi.   $t1,$out,3
360         or.     $t0,$t0,$t1
361         bne     Lenc_unaligned
362
363 Lenc_unaligned_ok:
364         lwz     $s0,0($inp)
365         lwz     $s1,4($inp)
366         lwz     $s2,8($inp)
367         lwz     $s3,12($inp)
368         bl      LAES_Te
369         bl      Lppc_AES_encrypt_compact
370         $POP    $out,`$FRAME-$SIZE_T*19`($sp)
371         stw     $s0,0($out)
372         stw     $s1,4($out)
373         stw     $s2,8($out)
374         stw     $s3,12($out)
375         b       Lenc_done
376
377 Lenc_unaligned:
378         subfic  $t0,$inp,4096
379         subfic  $t1,$out,4096
380         andi.   $t0,$t0,4096-16
381         beq     Lenc_xpage
382         andi.   $t1,$t1,4096-16
383         bne     Lenc_unaligned_ok
384
385 Lenc_xpage:
386         lbz     $acc00,0($inp)
387         lbz     $acc01,1($inp)
388         lbz     $acc02,2($inp)
389         lbz     $s0,3($inp)
390         lbz     $acc04,4($inp)
391         lbz     $acc05,5($inp)
392         lbz     $acc06,6($inp)
393         lbz     $s1,7($inp)
394         lbz     $acc08,8($inp)
395         lbz     $acc09,9($inp)
396         lbz     $acc10,10($inp)
397         insrwi  $s0,$acc00,8,0
398         lbz     $s2,11($inp)
399         insrwi  $s1,$acc04,8,0
400         lbz     $acc12,12($inp)
401         insrwi  $s0,$acc01,8,8
402         lbz     $acc13,13($inp)
403         insrwi  $s1,$acc05,8,8
404         lbz     $acc14,14($inp)
405         insrwi  $s0,$acc02,8,16
406         lbz     $s3,15($inp)
407         insrwi  $s1,$acc06,8,16
408         insrwi  $s2,$acc08,8,0
409         insrwi  $s3,$acc12,8,0
410         insrwi  $s2,$acc09,8,8
411         insrwi  $s3,$acc13,8,8
412         insrwi  $s2,$acc10,8,16
413         insrwi  $s3,$acc14,8,16
414
415         bl      LAES_Te
416         bl      Lppc_AES_encrypt_compact
417         $POP    $out,`$FRAME-$SIZE_T*19`($sp)
418
419         extrwi  $acc00,$s0,8,0
420         extrwi  $acc01,$s0,8,8
421         stb     $acc00,0($out)
422         extrwi  $acc02,$s0,8,16
423         stb     $acc01,1($out)
424         stb     $acc02,2($out)
425         extrwi  $acc04,$s1,8,0
426         stb     $s0,3($out)
427         extrwi  $acc05,$s1,8,8
428         stb     $acc04,4($out)
429         extrwi  $acc06,$s1,8,16
430         stb     $acc05,5($out)
431         stb     $acc06,6($out)
432         extrwi  $acc08,$s2,8,0
433         stb     $s1,7($out)
434         extrwi  $acc09,$s2,8,8
435         stb     $acc08,8($out)
436         extrwi  $acc10,$s2,8,16
437         stb     $acc09,9($out)
438         stb     $acc10,10($out)
439         extrwi  $acc12,$s3,8,0
440         stb     $s2,11($out)
441         extrwi  $acc13,$s3,8,8
442         stb     $acc12,12($out)
443         extrwi  $acc14,$s3,8,16
444         stb     $acc13,13($out)
445         stb     $acc14,14($out)
446         stb     $s3,15($out)
447
448 Lenc_done:
449         $POP    r0,`$FRAME+$LRSAVE`($sp)
450         $POP    r14,`$FRAME-$SIZE_T*18`($sp)
451         $POP    r15,`$FRAME-$SIZE_T*17`($sp)
452         $POP    r16,`$FRAME-$SIZE_T*16`($sp)
453         $POP    r17,`$FRAME-$SIZE_T*15`($sp)
454         $POP    r18,`$FRAME-$SIZE_T*14`($sp)
455         $POP    r19,`$FRAME-$SIZE_T*13`($sp)
456         $POP    r20,`$FRAME-$SIZE_T*12`($sp)
457         $POP    r21,`$FRAME-$SIZE_T*11`($sp)
458         $POP    r22,`$FRAME-$SIZE_T*10`($sp)
459         $POP    r23,`$FRAME-$SIZE_T*9`($sp)
460         $POP    r24,`$FRAME-$SIZE_T*8`($sp)
461         $POP    r25,`$FRAME-$SIZE_T*7`($sp)
462         $POP    r26,`$FRAME-$SIZE_T*6`($sp)
463         $POP    r27,`$FRAME-$SIZE_T*5`($sp)
464         $POP    r28,`$FRAME-$SIZE_T*4`($sp)
465         $POP    r29,`$FRAME-$SIZE_T*3`($sp)
466         $POP    r30,`$FRAME-$SIZE_T*2`($sp)
467         $POP    r31,`$FRAME-$SIZE_T*1`($sp)
468         mtlr    r0
469         addi    $sp,$sp,$FRAME
470         blr
471         .long   0
472         .byte   0,12,4,1,0x80,18,3,0
473         .long   0
474
475 .align  5
476 Lppc_AES_encrypt:
477         lwz     $acc00,240($key)
478         addi    $Tbl1,$Tbl0,3
479         lwz     $t0,0($key)
480         addi    $Tbl2,$Tbl0,2
481         lwz     $t1,4($key)
482         addi    $Tbl3,$Tbl0,1
483         lwz     $t2,8($key)
484         addi    $acc00,$acc00,-1
485         lwz     $t3,12($key)
486         addi    $key,$key,16
487         xor     $s0,$s0,$t0
488         xor     $s1,$s1,$t1
489         xor     $s2,$s2,$t2
490         xor     $s3,$s3,$t3
491         mtctr   $acc00
492 .align  4
493 Lenc_loop:
494         rlwinm  $acc00,$s0,`32-24+3`,21,28
495         rlwinm  $acc01,$s1,`32-24+3`,21,28
496         rlwinm  $acc02,$s2,`32-24+3`,21,28
497         rlwinm  $acc03,$s3,`32-24+3`,21,28
498         lwz     $t0,0($key)
499         rlwinm  $acc04,$s1,`32-16+3`,21,28
500         lwz     $t1,4($key)
501         rlwinm  $acc05,$s2,`32-16+3`,21,28
502         lwz     $t2,8($key)
503         rlwinm  $acc06,$s3,`32-16+3`,21,28
504         lwz     $t3,12($key)
505         rlwinm  $acc07,$s0,`32-16+3`,21,28
506         lwzx    $acc00,$Tbl0,$acc00
507         rlwinm  $acc08,$s2,`32-8+3`,21,28
508         lwzx    $acc01,$Tbl0,$acc01
509         rlwinm  $acc09,$s3,`32-8+3`,21,28
510         lwzx    $acc02,$Tbl0,$acc02
511         rlwinm  $acc10,$s0,`32-8+3`,21,28
512         lwzx    $acc03,$Tbl0,$acc03
513         rlwinm  $acc11,$s1,`32-8+3`,21,28
514         lwzx    $acc04,$Tbl1,$acc04
515         rlwinm  $acc12,$s3,`0+3`,21,28
516         lwzx    $acc05,$Tbl1,$acc05
517         rlwinm  $acc13,$s0,`0+3`,21,28
518         lwzx    $acc06,$Tbl1,$acc06
519         rlwinm  $acc14,$s1,`0+3`,21,28
520         lwzx    $acc07,$Tbl1,$acc07
521         rlwinm  $acc15,$s2,`0+3`,21,28
522         lwzx    $acc08,$Tbl2,$acc08
523         xor     $t0,$t0,$acc00
524         lwzx    $acc09,$Tbl2,$acc09
525         xor     $t1,$t1,$acc01
526         lwzx    $acc10,$Tbl2,$acc10
527         xor     $t2,$t2,$acc02
528         lwzx    $acc11,$Tbl2,$acc11
529         xor     $t3,$t3,$acc03
530         lwzx    $acc12,$Tbl3,$acc12
531         xor     $t0,$t0,$acc04
532         lwzx    $acc13,$Tbl3,$acc13
533         xor     $t1,$t1,$acc05
534         lwzx    $acc14,$Tbl3,$acc14
535         xor     $t2,$t2,$acc06
536         lwzx    $acc15,$Tbl3,$acc15
537         xor     $t3,$t3,$acc07
538         xor     $t0,$t0,$acc08
539         xor     $t1,$t1,$acc09
540         xor     $t2,$t2,$acc10
541         xor     $t3,$t3,$acc11
542         xor     $s0,$t0,$acc12
543         xor     $s1,$t1,$acc13
544         xor     $s2,$t2,$acc14
545         xor     $s3,$t3,$acc15
546         addi    $key,$key,16
547         bdnz-   Lenc_loop
548
549         addi    $Tbl2,$Tbl0,2048
550         nop
551         lwz     $t0,0($key)
552         rlwinm  $acc00,$s0,`32-24`,24,31
553         lwz     $t1,4($key)
554         rlwinm  $acc01,$s1,`32-24`,24,31
555         lwz     $t2,8($key)
556         rlwinm  $acc02,$s2,`32-24`,24,31
557         lwz     $t3,12($key)
558         rlwinm  $acc03,$s3,`32-24`,24,31
559         lwz     $acc08,`2048+0`($Tbl0)  ! prefetch Te4
560         rlwinm  $acc04,$s1,`32-16`,24,31
561         lwz     $acc09,`2048+32`($Tbl0)
562         rlwinm  $acc05,$s2,`32-16`,24,31
563         lwz     $acc10,`2048+64`($Tbl0)
564         rlwinm  $acc06,$s3,`32-16`,24,31
565         lwz     $acc11,`2048+96`($Tbl0)
566         rlwinm  $acc07,$s0,`32-16`,24,31
567         lwz     $acc12,`2048+128`($Tbl0)
568         rlwinm  $acc08,$s2,`32-8`,24,31
569         lwz     $acc13,`2048+160`($Tbl0)
570         rlwinm  $acc09,$s3,`32-8`,24,31
571         lwz     $acc14,`2048+192`($Tbl0)
572         rlwinm  $acc10,$s0,`32-8`,24,31
573         lwz     $acc15,`2048+224`($Tbl0)
574         rlwinm  $acc11,$s1,`32-8`,24,31
575         lbzx    $acc00,$Tbl2,$acc00
576         rlwinm  $acc12,$s3,`0`,24,31
577         lbzx    $acc01,$Tbl2,$acc01
578         rlwinm  $acc13,$s0,`0`,24,31
579         lbzx    $acc02,$Tbl2,$acc02
580         rlwinm  $acc14,$s1,`0`,24,31
581         lbzx    $acc03,$Tbl2,$acc03
582         rlwinm  $acc15,$s2,`0`,24,31
583         lbzx    $acc04,$Tbl2,$acc04
584         rlwinm  $s0,$acc00,24,0,7
585         lbzx    $acc05,$Tbl2,$acc05
586         rlwinm  $s1,$acc01,24,0,7
587         lbzx    $acc06,$Tbl2,$acc06
588         rlwinm  $s2,$acc02,24,0,7
589         lbzx    $acc07,$Tbl2,$acc07
590         rlwinm  $s3,$acc03,24,0,7
591         lbzx    $acc08,$Tbl2,$acc08
592         rlwimi  $s0,$acc04,16,8,15
593         lbzx    $acc09,$Tbl2,$acc09
594         rlwimi  $s1,$acc05,16,8,15
595         lbzx    $acc10,$Tbl2,$acc10
596         rlwimi  $s2,$acc06,16,8,15
597         lbzx    $acc11,$Tbl2,$acc11
598         rlwimi  $s3,$acc07,16,8,15
599         lbzx    $acc12,$Tbl2,$acc12
600         rlwimi  $s0,$acc08,8,16,23
601         lbzx    $acc13,$Tbl2,$acc13
602         rlwimi  $s1,$acc09,8,16,23
603         lbzx    $acc14,$Tbl2,$acc14
604         rlwimi  $s2,$acc10,8,16,23
605         lbzx    $acc15,$Tbl2,$acc15
606         rlwimi  $s3,$acc11,8,16,23
607         or      $s0,$s0,$acc12
608         or      $s1,$s1,$acc13
609         or      $s2,$s2,$acc14
610         or      $s3,$s3,$acc15
611         xor     $s0,$s0,$t0
612         xor     $s1,$s1,$t1
613         xor     $s2,$s2,$t2
614         xor     $s3,$s3,$t3
615         blr
616         .long   0
617         .byte   0,12,0x14,0,0,0,0,0
618
619 .align  4
620 Lppc_AES_encrypt_compact:
621         lwz     $acc00,240($key)
622         addi    $Tbl1,$Tbl0,2048
623         lwz     $t0,0($key)
624         lis     $mask80,0x8080
625         lwz     $t1,4($key)
626         lis     $mask1b,0x1b1b
627         lwz     $t2,8($key)
628         ori     $mask80,$mask80,0x8080
629         lwz     $t3,12($key)
630         ori     $mask1b,$mask1b,0x1b1b
631         addi    $key,$key,16
632         mtctr   $acc00
633 .align  4
634 Lenc_compact_loop:
635         xor     $s0,$s0,$t0
636         xor     $s1,$s1,$t1
637         rlwinm  $acc00,$s0,`32-24`,24,31
638         xor     $s2,$s2,$t2
639         rlwinm  $acc01,$s1,`32-24`,24,31
640         xor     $s3,$s3,$t3
641         rlwinm  $acc02,$s2,`32-24`,24,31
642         rlwinm  $acc03,$s3,`32-24`,24,31
643         rlwinm  $acc04,$s1,`32-16`,24,31
644         rlwinm  $acc05,$s2,`32-16`,24,31
645         rlwinm  $acc06,$s3,`32-16`,24,31
646         rlwinm  $acc07,$s0,`32-16`,24,31
647         lbzx    $acc00,$Tbl1,$acc00
648         rlwinm  $acc08,$s2,`32-8`,24,31
649         lbzx    $acc01,$Tbl1,$acc01
650         rlwinm  $acc09,$s3,`32-8`,24,31
651         lbzx    $acc02,$Tbl1,$acc02
652         rlwinm  $acc10,$s0,`32-8`,24,31
653         lbzx    $acc03,$Tbl1,$acc03
654         rlwinm  $acc11,$s1,`32-8`,24,31
655         lbzx    $acc04,$Tbl1,$acc04
656         rlwinm  $acc12,$s3,`0`,24,31
657         lbzx    $acc05,$Tbl1,$acc05
658         rlwinm  $acc13,$s0,`0`,24,31
659         lbzx    $acc06,$Tbl1,$acc06
660         rlwinm  $acc14,$s1,`0`,24,31
661         lbzx    $acc07,$Tbl1,$acc07
662         rlwinm  $acc15,$s2,`0`,24,31
663         lbzx    $acc08,$Tbl1,$acc08
664         rlwinm  $s0,$acc00,24,0,7
665         lbzx    $acc09,$Tbl1,$acc09
666         rlwinm  $s1,$acc01,24,0,7
667         lbzx    $acc10,$Tbl1,$acc10
668         rlwinm  $s2,$acc02,24,0,7
669         lbzx    $acc11,$Tbl1,$acc11
670         rlwinm  $s3,$acc03,24,0,7
671         lbzx    $acc12,$Tbl1,$acc12
672         rlwimi  $s0,$acc04,16,8,15
673         lbzx    $acc13,$Tbl1,$acc13
674         rlwimi  $s1,$acc05,16,8,15
675         lbzx    $acc14,$Tbl1,$acc14
676         rlwimi  $s2,$acc06,16,8,15
677         lbzx    $acc15,$Tbl1,$acc15
678         rlwimi  $s3,$acc07,16,8,15
679         rlwimi  $s0,$acc08,8,16,23
680         rlwimi  $s1,$acc09,8,16,23
681         rlwimi  $s2,$acc10,8,16,23
682         rlwimi  $s3,$acc11,8,16,23
683         lwz     $t0,0($key)
684         or      $s0,$s0,$acc12
685         lwz     $t1,4($key)
686         or      $s1,$s1,$acc13
687         lwz     $t2,8($key)
688         or      $s2,$s2,$acc14
689         lwz     $t3,12($key)
690         or      $s3,$s3,$acc15
691
692         addi    $key,$key,16
693         bdz     Lenc_compact_done
694
695         and     $acc00,$s0,$mask80      # r1=r0&0x80808080
696         and     $acc01,$s1,$mask80
697         and     $acc02,$s2,$mask80
698         and     $acc03,$s3,$mask80
699         srwi    $acc04,$acc00,7         # r1>>7
700         andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
701         srwi    $acc05,$acc01,7
702         andc    $acc09,$s1,$mask80
703         srwi    $acc06,$acc02,7
704         andc    $acc10,$s2,$mask80
705         srwi    $acc07,$acc03,7
706         andc    $acc11,$s3,$mask80
707         sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
708         sub     $acc01,$acc01,$acc05
709         sub     $acc02,$acc02,$acc06
710         sub     $acc03,$acc03,$acc07
711         add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
712         add     $acc09,$acc09,$acc09
713         add     $acc10,$acc10,$acc10
714         add     $acc11,$acc11,$acc11
715         and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
716         and     $acc01,$acc01,$mask1b
717         and     $acc02,$acc02,$mask1b
718         and     $acc03,$acc03,$mask1b
719         xor     $acc00,$acc00,$acc08    # r2
720         xor     $acc01,$acc01,$acc09
721          rotlwi $acc12,$s0,16           # ROTATE(r0,16)
722         xor     $acc02,$acc02,$acc10
723          rotlwi $acc13,$s1,16
724         xor     $acc03,$acc03,$acc11
725          rotlwi $acc14,$s2,16
726
727         xor     $s0,$s0,$acc00          # r0^r2
728         rotlwi  $acc15,$s3,16
729         xor     $s1,$s1,$acc01
730         rotrwi  $s0,$s0,24              # ROTATE(r2^r0,24)
731         xor     $s2,$s2,$acc02
732         rotrwi  $s1,$s1,24
733         xor     $s3,$s3,$acc03
734         rotrwi  $s2,$s2,24
735         xor     $s0,$s0,$acc00          # ROTATE(r2^r0,24)^r2
736         rotrwi  $s3,$s3,24
737         xor     $s1,$s1,$acc01
738         xor     $s2,$s2,$acc02
739         xor     $s3,$s3,$acc03
740         rotlwi  $acc08,$acc12,8         # ROTATE(r0,24)
741         xor     $s0,$s0,$acc12          #
742         rotlwi  $acc09,$acc13,8
743         xor     $s1,$s1,$acc13
744         rotlwi  $acc10,$acc14,8
745         xor     $s2,$s2,$acc14
746         rotlwi  $acc11,$acc15,8
747         xor     $s3,$s3,$acc15
748         xor     $s0,$s0,$acc08          #
749         xor     $s1,$s1,$acc09
750         xor     $s2,$s2,$acc10
751         xor     $s3,$s3,$acc11
752
753         b       Lenc_compact_loop
754 .align  4
755 Lenc_compact_done:
756         xor     $s0,$s0,$t0
757         xor     $s1,$s1,$t1
758         xor     $s2,$s2,$t2
759         xor     $s3,$s3,$t3
760         blr
761         .long   0
762         .byte   0,12,0x14,0,0,0,0,0
763 .size   .AES_encrypt,.-.AES_encrypt
764
765 .globl  .AES_decrypt
766 .align  7
767 .AES_decrypt:
768         $STU    $sp,-$FRAME($sp)
769         mflr    r0
770
771         $PUSH   $out,`$FRAME-$SIZE_T*19`($sp)
772         $PUSH   r14,`$FRAME-$SIZE_T*18`($sp)
773         $PUSH   r15,`$FRAME-$SIZE_T*17`($sp)
774         $PUSH   r16,`$FRAME-$SIZE_T*16`($sp)
775         $PUSH   r17,`$FRAME-$SIZE_T*15`($sp)
776         $PUSH   r18,`$FRAME-$SIZE_T*14`($sp)
777         $PUSH   r19,`$FRAME-$SIZE_T*13`($sp)
778         $PUSH   r20,`$FRAME-$SIZE_T*12`($sp)
779         $PUSH   r21,`$FRAME-$SIZE_T*11`($sp)
780         $PUSH   r22,`$FRAME-$SIZE_T*10`($sp)
781         $PUSH   r23,`$FRAME-$SIZE_T*9`($sp)
782         $PUSH   r24,`$FRAME-$SIZE_T*8`($sp)
783         $PUSH   r25,`$FRAME-$SIZE_T*7`($sp)
784         $PUSH   r26,`$FRAME-$SIZE_T*6`($sp)
785         $PUSH   r27,`$FRAME-$SIZE_T*5`($sp)
786         $PUSH   r28,`$FRAME-$SIZE_T*4`($sp)
787         $PUSH   r29,`$FRAME-$SIZE_T*3`($sp)
788         $PUSH   r30,`$FRAME-$SIZE_T*2`($sp)
789         $PUSH   r31,`$FRAME-$SIZE_T*1`($sp)
790         $PUSH   r0,`$FRAME+$LRSAVE`($sp)
791
792         andi.   $t0,$inp,3
793         andi.   $t1,$out,3
794         or.     $t0,$t0,$t1
795         bne     Ldec_unaligned
796
797 Ldec_unaligned_ok:
798         lwz     $s0,0($inp)
799         lwz     $s1,4($inp)
800         lwz     $s2,8($inp)
801         lwz     $s3,12($inp)
802         bl      LAES_Td
803         bl      Lppc_AES_decrypt_compact
804         $POP    $out,`$FRAME-$SIZE_T*19`($sp)
805         stw     $s0,0($out)
806         stw     $s1,4($out)
807         stw     $s2,8($out)
808         stw     $s3,12($out)
809         b       Ldec_done
810
811 Ldec_unaligned:
812         subfic  $t0,$inp,4096
813         subfic  $t1,$out,4096
814         andi.   $t0,$t0,4096-16
815         beq     Ldec_xpage
816         andi.   $t1,$t1,4096-16
817         bne     Ldec_unaligned_ok
818
819 Ldec_xpage:
820         lbz     $acc00,0($inp)
821         lbz     $acc01,1($inp)
822         lbz     $acc02,2($inp)
823         lbz     $s0,3($inp)
824         lbz     $acc04,4($inp)
825         lbz     $acc05,5($inp)
826         lbz     $acc06,6($inp)
827         lbz     $s1,7($inp)
828         lbz     $acc08,8($inp)
829         lbz     $acc09,9($inp)
830         lbz     $acc10,10($inp)
831         insrwi  $s0,$acc00,8,0
832         lbz     $s2,11($inp)
833         insrwi  $s1,$acc04,8,0
834         lbz     $acc12,12($inp)
835         insrwi  $s0,$acc01,8,8
836         lbz     $acc13,13($inp)
837         insrwi  $s1,$acc05,8,8
838         lbz     $acc14,14($inp)
839         insrwi  $s0,$acc02,8,16
840         lbz     $s3,15($inp)
841         insrwi  $s1,$acc06,8,16
842         insrwi  $s2,$acc08,8,0
843         insrwi  $s3,$acc12,8,0
844         insrwi  $s2,$acc09,8,8
845         insrwi  $s3,$acc13,8,8
846         insrwi  $s2,$acc10,8,16
847         insrwi  $s3,$acc14,8,16
848
849         bl      LAES_Td
850         bl      Lppc_AES_decrypt_compact
851         $POP    $out,`$FRAME-$SIZE_T*19`($sp)
852
853         extrwi  $acc00,$s0,8,0
854         extrwi  $acc01,$s0,8,8
855         stb     $acc00,0($out)
856         extrwi  $acc02,$s0,8,16
857         stb     $acc01,1($out)
858         stb     $acc02,2($out)
859         extrwi  $acc04,$s1,8,0
860         stb     $s0,3($out)
861         extrwi  $acc05,$s1,8,8
862         stb     $acc04,4($out)
863         extrwi  $acc06,$s1,8,16
864         stb     $acc05,5($out)
865         stb     $acc06,6($out)
866         extrwi  $acc08,$s2,8,0
867         stb     $s1,7($out)
868         extrwi  $acc09,$s2,8,8
869         stb     $acc08,8($out)
870         extrwi  $acc10,$s2,8,16
871         stb     $acc09,9($out)
872         stb     $acc10,10($out)
873         extrwi  $acc12,$s3,8,0
874         stb     $s2,11($out)
875         extrwi  $acc13,$s3,8,8
876         stb     $acc12,12($out)
877         extrwi  $acc14,$s3,8,16
878         stb     $acc13,13($out)
879         stb     $acc14,14($out)
880         stb     $s3,15($out)
881
882 Ldec_done:
883         $POP    r0,`$FRAME+$LRSAVE`($sp)
884         $POP    r14,`$FRAME-$SIZE_T*18`($sp)
885         $POP    r15,`$FRAME-$SIZE_T*17`($sp)
886         $POP    r16,`$FRAME-$SIZE_T*16`($sp)
887         $POP    r17,`$FRAME-$SIZE_T*15`($sp)
888         $POP    r18,`$FRAME-$SIZE_T*14`($sp)
889         $POP    r19,`$FRAME-$SIZE_T*13`($sp)
890         $POP    r20,`$FRAME-$SIZE_T*12`($sp)
891         $POP    r21,`$FRAME-$SIZE_T*11`($sp)
892         $POP    r22,`$FRAME-$SIZE_T*10`($sp)
893         $POP    r23,`$FRAME-$SIZE_T*9`($sp)
894         $POP    r24,`$FRAME-$SIZE_T*8`($sp)
895         $POP    r25,`$FRAME-$SIZE_T*7`($sp)
896         $POP    r26,`$FRAME-$SIZE_T*6`($sp)
897         $POP    r27,`$FRAME-$SIZE_T*5`($sp)
898         $POP    r28,`$FRAME-$SIZE_T*4`($sp)
899         $POP    r29,`$FRAME-$SIZE_T*3`($sp)
900         $POP    r30,`$FRAME-$SIZE_T*2`($sp)
901         $POP    r31,`$FRAME-$SIZE_T*1`($sp)
902         mtlr    r0
903         addi    $sp,$sp,$FRAME
904         blr
905         .long   0
906         .byte   0,12,4,1,0x80,18,3,0
907         .long   0
908
909 .align  5
910 Lppc_AES_decrypt:
911         lwz     $acc00,240($key)
912         addi    $Tbl1,$Tbl0,3
913         lwz     $t0,0($key)
914         addi    $Tbl2,$Tbl0,2
915         lwz     $t1,4($key)
916         addi    $Tbl3,$Tbl0,1
917         lwz     $t2,8($key)
918         addi    $acc00,$acc00,-1
919         lwz     $t3,12($key)
920         addi    $key,$key,16
921         xor     $s0,$s0,$t0
922         xor     $s1,$s1,$t1
923         xor     $s2,$s2,$t2
924         xor     $s3,$s3,$t3
925         mtctr   $acc00
926 .align  4
927 Ldec_loop:
928         rlwinm  $acc00,$s0,`32-24+3`,21,28
929         rlwinm  $acc01,$s1,`32-24+3`,21,28
930         rlwinm  $acc02,$s2,`32-24+3`,21,28
931         rlwinm  $acc03,$s3,`32-24+3`,21,28
932         lwz     $t0,0($key)
933         rlwinm  $acc04,$s3,`32-16+3`,21,28
934         lwz     $t1,4($key)
935         rlwinm  $acc05,$s0,`32-16+3`,21,28
936         lwz     $t2,8($key)
937         rlwinm  $acc06,$s1,`32-16+3`,21,28
938         lwz     $t3,12($key)
939         rlwinm  $acc07,$s2,`32-16+3`,21,28
940         lwzx    $acc00,$Tbl0,$acc00
941         rlwinm  $acc08,$s2,`32-8+3`,21,28
942         lwzx    $acc01,$Tbl0,$acc01
943         rlwinm  $acc09,$s3,`32-8+3`,21,28
944         lwzx    $acc02,$Tbl0,$acc02
945         rlwinm  $acc10,$s0,`32-8+3`,21,28
946         lwzx    $acc03,$Tbl0,$acc03
947         rlwinm  $acc11,$s1,`32-8+3`,21,28
948         lwzx    $acc04,$Tbl1,$acc04
949         rlwinm  $acc12,$s1,`0+3`,21,28
950         lwzx    $acc05,$Tbl1,$acc05
951         rlwinm  $acc13,$s2,`0+3`,21,28
952         lwzx    $acc06,$Tbl1,$acc06
953         rlwinm  $acc14,$s3,`0+3`,21,28
954         lwzx    $acc07,$Tbl1,$acc07
955         rlwinm  $acc15,$s0,`0+3`,21,28
956         lwzx    $acc08,$Tbl2,$acc08
957         xor     $t0,$t0,$acc00
958         lwzx    $acc09,$Tbl2,$acc09
959         xor     $t1,$t1,$acc01
960         lwzx    $acc10,$Tbl2,$acc10
961         xor     $t2,$t2,$acc02
962         lwzx    $acc11,$Tbl2,$acc11
963         xor     $t3,$t3,$acc03
964         lwzx    $acc12,$Tbl3,$acc12
965         xor     $t0,$t0,$acc04
966         lwzx    $acc13,$Tbl3,$acc13
967         xor     $t1,$t1,$acc05
968         lwzx    $acc14,$Tbl3,$acc14
969         xor     $t2,$t2,$acc06
970         lwzx    $acc15,$Tbl3,$acc15
971         xor     $t3,$t3,$acc07
972         xor     $t0,$t0,$acc08
973         xor     $t1,$t1,$acc09
974         xor     $t2,$t2,$acc10
975         xor     $t3,$t3,$acc11
976         xor     $s0,$t0,$acc12
977         xor     $s1,$t1,$acc13
978         xor     $s2,$t2,$acc14
979         xor     $s3,$t3,$acc15
980         addi    $key,$key,16
981         bdnz-   Ldec_loop
982
983         addi    $Tbl2,$Tbl0,2048
984         nop
985         lwz     $t0,0($key)
986         rlwinm  $acc00,$s0,`32-24`,24,31
987         lwz     $t1,4($key)
988         rlwinm  $acc01,$s1,`32-24`,24,31
989         lwz     $t2,8($key)
990         rlwinm  $acc02,$s2,`32-24`,24,31
991         lwz     $t3,12($key)
992         rlwinm  $acc03,$s3,`32-24`,24,31
993         lwz     $acc08,`2048+0`($Tbl0)  ! prefetch Td4
994         rlwinm  $acc04,$s3,`32-16`,24,31
995         lwz     $acc09,`2048+32`($Tbl0)
996         rlwinm  $acc05,$s0,`32-16`,24,31
997         lwz     $acc10,`2048+64`($Tbl0)
998         lbzx    $acc00,$Tbl2,$acc00
999         lwz     $acc11,`2048+96`($Tbl0)
1000         lbzx    $acc01,$Tbl2,$acc01
1001         lwz     $acc12,`2048+128`($Tbl0)
1002         rlwinm  $acc06,$s1,`32-16`,24,31
1003         lwz     $acc13,`2048+160`($Tbl0)
1004         rlwinm  $acc07,$s2,`32-16`,24,31
1005         lwz     $acc14,`2048+192`($Tbl0)
1006         rlwinm  $acc08,$s2,`32-8`,24,31
1007         lwz     $acc15,`2048+224`($Tbl0)
1008         rlwinm  $acc09,$s3,`32-8`,24,31
1009         lbzx    $acc02,$Tbl2,$acc02
1010         rlwinm  $acc10,$s0,`32-8`,24,31
1011         lbzx    $acc03,$Tbl2,$acc03
1012         rlwinm  $acc11,$s1,`32-8`,24,31
1013         lbzx    $acc04,$Tbl2,$acc04
1014         rlwinm  $acc12,$s1,`0`,24,31
1015         lbzx    $acc05,$Tbl2,$acc05
1016         rlwinm  $acc13,$s2,`0`,24,31
1017         lbzx    $acc06,$Tbl2,$acc06
1018         rlwinm  $acc14,$s3,`0`,24,31
1019         lbzx    $acc07,$Tbl2,$acc07
1020         rlwinm  $acc15,$s0,`0`,24,31
1021         lbzx    $acc08,$Tbl2,$acc08
1022         rlwinm  $s0,$acc00,24,0,7
1023         lbzx    $acc09,$Tbl2,$acc09
1024         rlwinm  $s1,$acc01,24,0,7
1025         lbzx    $acc10,$Tbl2,$acc10
1026         rlwinm  $s2,$acc02,24,0,7
1027         lbzx    $acc11,$Tbl2,$acc11
1028         rlwinm  $s3,$acc03,24,0,7
1029         lbzx    $acc12,$Tbl2,$acc12
1030         rlwimi  $s0,$acc04,16,8,15
1031         lbzx    $acc13,$Tbl2,$acc13
1032         rlwimi  $s1,$acc05,16,8,15
1033         lbzx    $acc14,$Tbl2,$acc14
1034         rlwimi  $s2,$acc06,16,8,15
1035         lbzx    $acc15,$Tbl2,$acc15
1036         rlwimi  $s3,$acc07,16,8,15
1037         rlwimi  $s0,$acc08,8,16,23
1038         rlwimi  $s1,$acc09,8,16,23
1039         rlwimi  $s2,$acc10,8,16,23
1040         rlwimi  $s3,$acc11,8,16,23
1041         or      $s0,$s0,$acc12
1042         or      $s1,$s1,$acc13
1043         or      $s2,$s2,$acc14
1044         or      $s3,$s3,$acc15
1045         xor     $s0,$s0,$t0
1046         xor     $s1,$s1,$t1
1047         xor     $s2,$s2,$t2
1048         xor     $s3,$s3,$t3
1049         blr
1050         .long   0
1051         .byte   0,12,0x14,0,0,0,0,0
1052
1053 .align  4
1054 Lppc_AES_decrypt_compact:
1055         lwz     $acc00,240($key)
1056         addi    $Tbl1,$Tbl0,2048
1057         lwz     $t0,0($key)
1058         lis     $mask80,0x8080
1059         lwz     $t1,4($key)
1060         lis     $mask1b,0x1b1b
1061         lwz     $t2,8($key)
1062         ori     $mask80,$mask80,0x8080
1063         lwz     $t3,12($key)
1064         ori     $mask1b,$mask1b,0x1b1b
1065         addi    $key,$key,16
1066 ___
1067 $code.=<<___ if ($SIZE_T==8);
1068         insrdi  $mask80,$mask80,32,0
1069         insrdi  $mask1b,$mask1b,32,0
1070 ___
1071 $code.=<<___;
1072         mtctr   $acc00
1073 .align  4
1074 Ldec_compact_loop:
1075         xor     $s0,$s0,$t0
1076         xor     $s1,$s1,$t1
1077         rlwinm  $acc00,$s0,`32-24`,24,31
1078         xor     $s2,$s2,$t2
1079         rlwinm  $acc01,$s1,`32-24`,24,31
1080         xor     $s3,$s3,$t3
1081         rlwinm  $acc02,$s2,`32-24`,24,31
1082         rlwinm  $acc03,$s3,`32-24`,24,31
1083         rlwinm  $acc04,$s3,`32-16`,24,31
1084         rlwinm  $acc05,$s0,`32-16`,24,31
1085         rlwinm  $acc06,$s1,`32-16`,24,31
1086         rlwinm  $acc07,$s2,`32-16`,24,31
1087         lbzx    $acc00,$Tbl1,$acc00
1088         rlwinm  $acc08,$s2,`32-8`,24,31
1089         lbzx    $acc01,$Tbl1,$acc01
1090         rlwinm  $acc09,$s3,`32-8`,24,31
1091         lbzx    $acc02,$Tbl1,$acc02
1092         rlwinm  $acc10,$s0,`32-8`,24,31
1093         lbzx    $acc03,$Tbl1,$acc03
1094         rlwinm  $acc11,$s1,`32-8`,24,31
1095         lbzx    $acc04,$Tbl1,$acc04
1096         rlwinm  $acc12,$s1,`0`,24,31
1097         lbzx    $acc05,$Tbl1,$acc05
1098         rlwinm  $acc13,$s2,`0`,24,31
1099         lbzx    $acc06,$Tbl1,$acc06
1100         rlwinm  $acc14,$s3,`0`,24,31
1101         lbzx    $acc07,$Tbl1,$acc07
1102         rlwinm  $acc15,$s0,`0`,24,31
1103         lbzx    $acc08,$Tbl1,$acc08
1104         rlwinm  $s0,$acc00,24,0,7
1105         lbzx    $acc09,$Tbl1,$acc09
1106         rlwinm  $s1,$acc01,24,0,7
1107         lbzx    $acc10,$Tbl1,$acc10
1108         rlwinm  $s2,$acc02,24,0,7
1109         lbzx    $acc11,$Tbl1,$acc11
1110         rlwinm  $s3,$acc03,24,0,7
1111         lbzx    $acc12,$Tbl1,$acc12
1112         rlwimi  $s0,$acc04,16,8,15
1113         lbzx    $acc13,$Tbl1,$acc13
1114         rlwimi  $s1,$acc05,16,8,15
1115         lbzx    $acc14,$Tbl1,$acc14
1116         rlwimi  $s2,$acc06,16,8,15
1117         lbzx    $acc15,$Tbl1,$acc15
1118         rlwimi  $s3,$acc07,16,8,15
1119         rlwimi  $s0,$acc08,8,16,23
1120         rlwimi  $s1,$acc09,8,16,23
1121         rlwimi  $s2,$acc10,8,16,23
1122         rlwimi  $s3,$acc11,8,16,23
1123         lwz     $t0,0($key)
1124         or      $s0,$s0,$acc12
1125         lwz     $t1,4($key)
1126         or      $s1,$s1,$acc13
1127         lwz     $t2,8($key)
1128         or      $s2,$s2,$acc14
1129         lwz     $t3,12($key)
1130         or      $s3,$s3,$acc15
1131
1132         addi    $key,$key,16
1133         bdz     Ldec_compact_done
1134 ___
1135 $code.=<<___ if ($SIZE_T==8);
1136         # vectorized permutation improves decrypt performance by 10%
1137         insrdi  $s0,$s1,32,0
1138         insrdi  $s2,$s3,32,0
1139
1140         and     $acc00,$s0,$mask80      # r1=r0&0x80808080
1141         and     $acc02,$s2,$mask80
1142         srdi    $acc04,$acc00,7         # r1>>7
1143         srdi    $acc06,$acc02,7
1144         andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
1145         andc    $acc10,$s2,$mask80
1146         sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
1147         sub     $acc02,$acc02,$acc06
1148         add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
1149         add     $acc10,$acc10,$acc10
1150         and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1151         and     $acc02,$acc02,$mask1b
1152         xor     $acc00,$acc00,$acc08    # r2
1153         xor     $acc02,$acc02,$acc10
1154
1155         and     $acc04,$acc00,$mask80   # r1=r2&0x80808080
1156         and     $acc06,$acc02,$mask80
1157         srdi    $acc08,$acc04,7         # r1>>7
1158         srdi    $acc10,$acc06,7
1159         andc    $acc12,$acc00,$mask80   # r2&0x7f7f7f7f
1160         andc    $acc14,$acc02,$mask80
1161         sub     $acc04,$acc04,$acc08    # r1-(r1>>7)
1162         sub     $acc06,$acc06,$acc10
1163         add     $acc12,$acc12,$acc12    # (r2&0x7f7f7f7f)<<1
1164         add     $acc14,$acc14,$acc14
1165         and     $acc04,$acc04,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1166         and     $acc06,$acc06,$mask1b
1167         xor     $acc04,$acc04,$acc12    # r4
1168         xor     $acc06,$acc06,$acc14
1169
1170         and     $acc08,$acc04,$mask80   # r1=r4&0x80808080
1171         and     $acc10,$acc06,$mask80
1172         srdi    $acc12,$acc08,7         # r1>>7
1173         srdi    $acc14,$acc10,7
1174         sub     $acc08,$acc08,$acc12    # r1-(r1>>7)
1175         sub     $acc10,$acc10,$acc14
1176         andc    $acc12,$acc04,$mask80   # r4&0x7f7f7f7f
1177         andc    $acc14,$acc06,$mask80
1178         add     $acc12,$acc12,$acc12    # (r4&0x7f7f7f7f)<<1
1179         add     $acc14,$acc14,$acc14
1180         and     $acc08,$acc08,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1181         and     $acc10,$acc10,$mask1b
1182         xor     $acc08,$acc08,$acc12    # r8
1183         xor     $acc10,$acc10,$acc14
1184
1185         xor     $acc00,$acc00,$s0       # r2^r0
1186         xor     $acc02,$acc02,$s2
1187         xor     $acc04,$acc04,$s0       # r4^r0
1188         xor     $acc06,$acc06,$s2
1189
1190         extrdi  $acc01,$acc00,32,0
1191         extrdi  $acc03,$acc02,32,0
1192         extrdi  $acc05,$acc04,32,0
1193         extrdi  $acc07,$acc06,32,0
1194         extrdi  $acc09,$acc08,32,0
1195         extrdi  $acc11,$acc10,32,0
1196 ___
1197 $code.=<<___ if ($SIZE_T==4);
1198         and     $acc00,$s0,$mask80      # r1=r0&0x80808080
1199         and     $acc01,$s1,$mask80
1200         and     $acc02,$s2,$mask80
1201         and     $acc03,$s3,$mask80
1202         srwi    $acc04,$acc00,7         # r1>>7
1203         andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
1204         srwi    $acc05,$acc01,7
1205         andc    $acc09,$s1,$mask80
1206         srwi    $acc06,$acc02,7
1207         andc    $acc10,$s2,$mask80
1208         srwi    $acc07,$acc03,7
1209         andc    $acc11,$s3,$mask80
1210         sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
1211         sub     $acc01,$acc01,$acc05
1212         sub     $acc02,$acc02,$acc06
1213         sub     $acc03,$acc03,$acc07
1214         add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
1215         add     $acc09,$acc09,$acc09
1216         add     $acc10,$acc10,$acc10
1217         add     $acc11,$acc11,$acc11
1218         and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1219         and     $acc01,$acc01,$mask1b
1220         and     $acc02,$acc02,$mask1b
1221         and     $acc03,$acc03,$mask1b
1222         xor     $acc00,$acc00,$acc08    # r2
1223         xor     $acc01,$acc01,$acc09
1224         xor     $acc02,$acc02,$acc10
1225         xor     $acc03,$acc03,$acc11
1226
1227         and     $acc04,$acc00,$mask80   # r1=r2&0x80808080
1228         and     $acc05,$acc01,$mask80
1229         and     $acc06,$acc02,$mask80
1230         and     $acc07,$acc03,$mask80
1231         srwi    $acc08,$acc04,7         # r1>>7
1232         andc    $acc12,$acc00,$mask80   # r2&0x7f7f7f7f
1233         srwi    $acc09,$acc05,7
1234         andc    $acc13,$acc01,$mask80
1235         srwi    $acc10,$acc06,7
1236         andc    $acc14,$acc02,$mask80
1237         srwi    $acc11,$acc07,7
1238         andc    $acc15,$acc03,$mask80
1239         sub     $acc04,$acc04,$acc08    # r1-(r1>>7)
1240         sub     $acc05,$acc05,$acc09
1241         sub     $acc06,$acc06,$acc10
1242         sub     $acc07,$acc07,$acc11
1243         add     $acc12,$acc12,$acc12    # (r2&0x7f7f7f7f)<<1
1244         add     $acc13,$acc13,$acc13
1245         add     $acc14,$acc14,$acc14
1246         add     $acc15,$acc15,$acc15
1247         and     $acc04,$acc04,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1248         and     $acc05,$acc05,$mask1b
1249         and     $acc06,$acc06,$mask1b
1250         and     $acc07,$acc07,$mask1b
1251         xor     $acc04,$acc04,$acc12    # r4
1252         xor     $acc05,$acc05,$acc13
1253         xor     $acc06,$acc06,$acc14
1254         xor     $acc07,$acc07,$acc15
1255
1256         and     $acc08,$acc04,$mask80   # r1=r4&0x80808080
1257         and     $acc09,$acc05,$mask80
1258         srwi    $acc12,$acc08,7         # r1>>7
1259         and     $acc10,$acc06,$mask80
1260         srwi    $acc13,$acc09,7
1261         and     $acc11,$acc07,$mask80
1262         srwi    $acc14,$acc10,7
1263         sub     $acc08,$acc08,$acc12    # r1-(r1>>7)
1264         srwi    $acc15,$acc11,7
1265         sub     $acc09,$acc09,$acc13
1266         sub     $acc10,$acc10,$acc14
1267         sub     $acc11,$acc11,$acc15
1268         andc    $acc12,$acc04,$mask80   # r4&0x7f7f7f7f
1269         andc    $acc13,$acc05,$mask80
1270         andc    $acc14,$acc06,$mask80
1271         andc    $acc15,$acc07,$mask80
1272         add     $acc12,$acc12,$acc12    # (r4&0x7f7f7f7f)<<1
1273         add     $acc13,$acc13,$acc13
1274         add     $acc14,$acc14,$acc14
1275         add     $acc15,$acc15,$acc15
1276         and     $acc08,$acc08,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1277         and     $acc09,$acc09,$mask1b
1278         and     $acc10,$acc10,$mask1b
1279         and     $acc11,$acc11,$mask1b
1280         xor     $acc08,$acc08,$acc12    # r8
1281         xor     $acc09,$acc09,$acc13
1282         xor     $acc10,$acc10,$acc14
1283         xor     $acc11,$acc11,$acc15
1284
1285         xor     $acc00,$acc00,$s0       # r2^r0
1286         xor     $acc01,$acc01,$s1
1287         xor     $acc02,$acc02,$s2
1288         xor     $acc03,$acc03,$s3
1289         xor     $acc04,$acc04,$s0       # r4^r0
1290         xor     $acc05,$acc05,$s1
1291         xor     $acc06,$acc06,$s2
1292         xor     $acc07,$acc07,$s3
1293 ___
1294 $code.=<<___;
1295         rotrwi  $s0,$s0,8               # = ROTATE(r0,8)
1296         rotrwi  $s1,$s1,8
1297         xor     $s0,$s0,$acc00          # ^= r2^r0
1298         rotrwi  $s2,$s2,8
1299         xor     $s1,$s1,$acc01
1300         rotrwi  $s3,$s3,8
1301         xor     $s2,$s2,$acc02
1302         xor     $s3,$s3,$acc03
1303         xor     $acc00,$acc00,$acc08
1304         xor     $acc01,$acc01,$acc09
1305         xor     $acc02,$acc02,$acc10
1306         xor     $acc03,$acc03,$acc11
1307         xor     $s0,$s0,$acc04          # ^= r4^r0
1308         rotrwi  $acc00,$acc00,24
1309         xor     $s1,$s1,$acc05
1310         rotrwi  $acc01,$acc01,24
1311         xor     $s2,$s2,$acc06
1312         rotrwi  $acc02,$acc02,24
1313         xor     $s3,$s3,$acc07
1314         rotrwi  $acc03,$acc03,24
1315         xor     $acc04,$acc04,$acc08
1316         xor     $acc05,$acc05,$acc09
1317         xor     $acc06,$acc06,$acc10
1318         xor     $acc07,$acc07,$acc11
1319         xor     $s0,$s0,$acc08          # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
1320         rotrwi  $acc04,$acc04,16
1321         xor     $s1,$s1,$acc09
1322         rotrwi  $acc05,$acc05,16
1323         xor     $s2,$s2,$acc10
1324         rotrwi  $acc06,$acc06,16
1325         xor     $s3,$s3,$acc11
1326         rotrwi  $acc07,$acc07,16
1327         xor     $s0,$s0,$acc00          # ^= ROTATE(r8^r2^r0,24)
1328         rotrwi  $acc08,$acc08,8
1329         xor     $s1,$s1,$acc01
1330         rotrwi  $acc09,$acc09,8
1331         xor     $s2,$s2,$acc02
1332         rotrwi  $acc10,$acc10,8
1333         xor     $s3,$s3,$acc03
1334         rotrwi  $acc11,$acc11,8
1335         xor     $s0,$s0,$acc04          # ^= ROTATE(r8^r4^r0,16)
1336         xor     $s1,$s1,$acc05
1337         xor     $s2,$s2,$acc06
1338         xor     $s3,$s3,$acc07
1339         xor     $s0,$s0,$acc08          # ^= ROTATE(r8,8)       
1340         xor     $s1,$s1,$acc09  
1341         xor     $s2,$s2,$acc10  
1342         xor     $s3,$s3,$acc11  
1343
1344         b       Ldec_compact_loop
1345 .align  4
1346 Ldec_compact_done:
1347         xor     $s0,$s0,$t0
1348         xor     $s1,$s1,$t1
1349         xor     $s2,$s2,$t2
1350         xor     $s3,$s3,$t3
1351         blr
1352         .long   0
1353         .byte   0,12,0x14,0,0,0,0,0
1354 .size   .AES_decrypt,.-.AES_decrypt
1355
1356 .asciz  "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
1357 .align  7
1358 ___
1359
1360 $code =~ s/\`([^\`]*)\`/eval $1/gem;
1361 print $code;
1362 close STDOUT;