PPC assembly pack: add .size directives.
[openssl.git] / crypto / aes / asm / aes-ppc.pl
1 #!/usr/bin/env perl
2
3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9
10 # Needs more work: key setup, CBC routine...
11 #
12 # ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
13 # 128-bit key, which is ~40% better than 64-bit code generated by gcc
14 # 4.0. But these are not the ones currently used! Their "compact"
15 # counterparts are, for security reason. ppc_AES_encrypt_compact runs
16 # at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
17 # at 1/3 of ppc_AES_decrypt.
18
19 # February 2010
20 #
21 # Rescheduling instructions to favour Power6 pipeline gave 10%
22 # performance improvement on the platfrom in question (and marginal
23 # improvement even on others). It should be noted that Power6 fails
24 # to process byte in 18 cycles, only in 23, because it fails to issue
25 # 4 load instructions in two cycles, only in 3. As result non-compact
26 # block subroutines are 25% slower than one would expect. Compact
27 # functions scale better, because they have pure computational part,
28 # which scales perfectly with clock frequency. To be specific
29 # ppc_AES_encrypt_compact operates at 42 cycles per byte, while
30 # ppc_AES_decrypt_compact - at 55 (in 64-bit build).
31
32 $flavour = shift;
33
34 if ($flavour =~ /64/) {
35         $SIZE_T =8;
36         $LRSAVE =2*$SIZE_T;
37         $STU    ="stdu";
38         $POP    ="ld";
39         $PUSH   ="std";
40 } elsif ($flavour =~ /32/) {
41         $SIZE_T =4;
42         $LRSAVE =$SIZE_T;
43         $STU    ="stwu";
44         $POP    ="lwz";
45         $PUSH   ="stw";
46 } else { die "nonsense $flavour"; }
47
48 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
49 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
50 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
51 die "can't locate ppc-xlate.pl";
52
53 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
54
55 $FRAME=32*$SIZE_T;
56
57 sub _data_word()
58 { my $i;
59     while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
60 }
61
62 $sp="r1";
63 $toc="r2";
64 $inp="r3";
65 $out="r4";
66 $key="r5";
67
68 $Tbl0="r3";
69 $Tbl1="r6";
70 $Tbl2="r7";
71 $Tbl3=$out;     # stay away from "r2"; $out is offloaded to stack
72
73 $s0="r8";
74 $s1="r9";
75 $s2="r10";
76 $s3="r11";
77
78 $t0="r12";
79 $t1="r0";       # stay away from "r13";
80 $t2="r14";
81 $t3="r15";
82
83 $acc00="r16";
84 $acc01="r17";
85 $acc02="r18";
86 $acc03="r19";
87
88 $acc04="r20";
89 $acc05="r21";
90 $acc06="r22";
91 $acc07="r23";
92
93 $acc08="r24";
94 $acc09="r25";
95 $acc10="r26";
96 $acc11="r27";
97
98 $acc12="r28";
99 $acc13="r29";
100 $acc14="r30";
101 $acc15="r31";
102
103 $mask80=$Tbl2;
104 $mask1b=$Tbl3;
105
106 $code.=<<___;
107 .machine        "any"
108 .text
109
110 .align  7
111 LAES_Te:
112         mflr    r0
113         bcl     20,31,\$+4
114         mflr    $Tbl0   ;    vvvvv "distance" between . and 1st data entry
115         addi    $Tbl0,$Tbl0,`128-8`
116         mtlr    r0
117         blr
118         .long   0
119         .byte   0,12,0x14,0,0,0,0,0
120         .space  `64-9*4`
121 LAES_Td:
122         mflr    r0
123         bcl     20,31,\$+4
124         mflr    $Tbl0   ;    vvvvvvvv "distance" between . and 1st data entry
125         addi    $Tbl0,$Tbl0,`128-64-8+2048+256`
126         mtlr    r0
127         blr
128         .long   0
129         .byte   0,12,0x14,0,0,0,0,0
130         .space  `128-64-9*4`
131 ___
132 &_data_word(
133         0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
134         0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
135         0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
136         0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
137         0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
138         0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
139         0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
140         0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
141         0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
142         0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
143         0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
144         0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
145         0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
146         0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
147         0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
148         0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
149         0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
150         0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
151         0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
152         0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
153         0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
154         0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
155         0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
156         0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
157         0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
158         0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
159         0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
160         0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
161         0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
162         0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
163         0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
164         0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
165         0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
166         0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
167         0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
168         0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
169         0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
170         0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
171         0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
172         0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
173         0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
174         0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
175         0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
176         0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
177         0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
178         0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
179         0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
180         0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
181         0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
182         0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
183         0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
184         0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
185         0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
186         0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
187         0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
188         0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
189         0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
190         0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
191         0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
192         0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
193         0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
194         0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
195         0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
196         0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
197 $code.=<<___;
198 .byte   0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
199 .byte   0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
200 .byte   0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
201 .byte   0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
202 .byte   0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
203 .byte   0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
204 .byte   0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
205 .byte   0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
206 .byte   0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
207 .byte   0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
208 .byte   0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
209 .byte   0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
210 .byte   0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
211 .byte   0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
212 .byte   0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
213 .byte   0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
214 .byte   0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
215 .byte   0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
216 .byte   0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
217 .byte   0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
218 .byte   0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
219 .byte   0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
220 .byte   0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
221 .byte   0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
222 .byte   0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
223 .byte   0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
224 .byte   0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
225 .byte   0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
226 .byte   0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
227 .byte   0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
228 .byte   0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
229 .byte   0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
230 ___
231 &_data_word(
232         0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
233         0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
234         0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
235         0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
236         0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
237         0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
238         0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
239         0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
240         0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
241         0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
242         0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
243         0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
244         0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
245         0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
246         0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
247         0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
248         0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
249         0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
250         0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
251         0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
252         0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
253         0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
254         0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
255         0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
256         0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
257         0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
258         0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
259         0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
260         0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
261         0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
262         0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
263         0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
264         0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
265         0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
266         0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
267         0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
268         0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
269         0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
270         0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
271         0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
272         0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
273         0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
274         0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
275         0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
276         0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
277         0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
278         0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
279         0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
280         0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
281         0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
282         0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
283         0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
284         0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
285         0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
286         0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
287         0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
288         0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
289         0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
290         0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
291         0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
292         0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
293         0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
294         0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
295         0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
296 $code.=<<___;
297 .byte   0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
298 .byte   0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
299 .byte   0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
300 .byte   0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
301 .byte   0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
302 .byte   0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
303 .byte   0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
304 .byte   0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
305 .byte   0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
306 .byte   0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
307 .byte   0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
308 .byte   0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
309 .byte   0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
310 .byte   0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
311 .byte   0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
312 .byte   0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
313 .byte   0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
314 .byte   0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
315 .byte   0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
316 .byte   0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
317 .byte   0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
318 .byte   0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
319 .byte   0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
320 .byte   0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
321 .byte   0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
322 .byte   0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
323 .byte   0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
324 .byte   0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
325 .byte   0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
326 .byte   0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
327 .byte   0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
328 .byte   0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
329
330
331 .globl  .AES_encrypt
332 .align  7
333 .AES_encrypt:
334         $STU    $sp,-$FRAME($sp)
335         mflr    r0
336
337         $PUSH   $out,`$FRAME-$SIZE_T*19`($sp)
338         $PUSH   r14,`$FRAME-$SIZE_T*18`($sp)
339         $PUSH   r15,`$FRAME-$SIZE_T*17`($sp)
340         $PUSH   r16,`$FRAME-$SIZE_T*16`($sp)
341         $PUSH   r17,`$FRAME-$SIZE_T*15`($sp)
342         $PUSH   r18,`$FRAME-$SIZE_T*14`($sp)
343         $PUSH   r19,`$FRAME-$SIZE_T*13`($sp)
344         $PUSH   r20,`$FRAME-$SIZE_T*12`($sp)
345         $PUSH   r21,`$FRAME-$SIZE_T*11`($sp)
346         $PUSH   r22,`$FRAME-$SIZE_T*10`($sp)
347         $PUSH   r23,`$FRAME-$SIZE_T*9`($sp)
348         $PUSH   r24,`$FRAME-$SIZE_T*8`($sp)
349         $PUSH   r25,`$FRAME-$SIZE_T*7`($sp)
350         $PUSH   r26,`$FRAME-$SIZE_T*6`($sp)
351         $PUSH   r27,`$FRAME-$SIZE_T*5`($sp)
352         $PUSH   r28,`$FRAME-$SIZE_T*4`($sp)
353         $PUSH   r29,`$FRAME-$SIZE_T*3`($sp)
354         $PUSH   r30,`$FRAME-$SIZE_T*2`($sp)
355         $PUSH   r31,`$FRAME-$SIZE_T*1`($sp)
356         $PUSH   r0,`$FRAME+$LRSAVE`($sp)
357
358         andi.   $t0,$inp,3
359         andi.   $t1,$out,3
360         or.     $t0,$t0,$t1
361         bne     Lenc_unaligned
362
363 Lenc_unaligned_ok:
364         lwz     $s0,0($inp)
365         lwz     $s1,4($inp)
366         lwz     $s2,8($inp)
367         lwz     $s3,12($inp)
368         bl      LAES_Te
369         bl      Lppc_AES_encrypt_compact
370         $POP    $out,`$FRAME-$SIZE_T*19`($sp)
371         stw     $s0,0($out)
372         stw     $s1,4($out)
373         stw     $s2,8($out)
374         stw     $s3,12($out)
375         b       Lenc_done
376
377 Lenc_unaligned:
378         subfic  $t0,$inp,4096
379         subfic  $t1,$out,4096
380         andi.   $t0,$t0,4096-16
381         beq     Lenc_xpage
382         andi.   $t1,$t1,4096-16
383         bne     Lenc_unaligned_ok
384
385 Lenc_xpage:
386         lbz     $acc00,0($inp)
387         lbz     $acc01,1($inp)
388         lbz     $acc02,2($inp)
389         lbz     $s0,3($inp)
390         lbz     $acc04,4($inp)
391         lbz     $acc05,5($inp)
392         lbz     $acc06,6($inp)
393         lbz     $s1,7($inp)
394         lbz     $acc08,8($inp)
395         lbz     $acc09,9($inp)
396         lbz     $acc10,10($inp)
397         insrwi  $s0,$acc00,8,0
398         lbz     $s2,11($inp)
399         insrwi  $s1,$acc04,8,0
400         lbz     $acc12,12($inp)
401         insrwi  $s0,$acc01,8,8
402         lbz     $acc13,13($inp)
403         insrwi  $s1,$acc05,8,8
404         lbz     $acc14,14($inp)
405         insrwi  $s0,$acc02,8,16
406         lbz     $s3,15($inp)
407         insrwi  $s1,$acc06,8,16
408         insrwi  $s2,$acc08,8,0
409         insrwi  $s3,$acc12,8,0
410         insrwi  $s2,$acc09,8,8
411         insrwi  $s3,$acc13,8,8
412         insrwi  $s2,$acc10,8,16
413         insrwi  $s3,$acc14,8,16
414
415         bl      LAES_Te
416         bl      Lppc_AES_encrypt_compact
417         $POP    $out,`$FRAME-$SIZE_T*19`($sp)
418
419         extrwi  $acc00,$s0,8,0
420         extrwi  $acc01,$s0,8,8
421         stb     $acc00,0($out)
422         extrwi  $acc02,$s0,8,16
423         stb     $acc01,1($out)
424         stb     $acc02,2($out)
425         extrwi  $acc04,$s1,8,0
426         stb     $s0,3($out)
427         extrwi  $acc05,$s1,8,8
428         stb     $acc04,4($out)
429         extrwi  $acc06,$s1,8,16
430         stb     $acc05,5($out)
431         stb     $acc06,6($out)
432         extrwi  $acc08,$s2,8,0
433         stb     $s1,7($out)
434         extrwi  $acc09,$s2,8,8
435         stb     $acc08,8($out)
436         extrwi  $acc10,$s2,8,16
437         stb     $acc09,9($out)
438         stb     $acc10,10($out)
439         extrwi  $acc12,$s3,8,0
440         stb     $s2,11($out)
441         extrwi  $acc13,$s3,8,8
442         stb     $acc12,12($out)
443         extrwi  $acc14,$s3,8,16
444         stb     $acc13,13($out)
445         stb     $acc14,14($out)
446         stb     $s3,15($out)
447
448 Lenc_done:
449         $POP    r0,`$FRAME+$LRSAVE`($sp)
450         $POP    r14,`$FRAME-$SIZE_T*18`($sp)
451         $POP    r15,`$FRAME-$SIZE_T*17`($sp)
452         $POP    r16,`$FRAME-$SIZE_T*16`($sp)
453         $POP    r17,`$FRAME-$SIZE_T*15`($sp)
454         $POP    r18,`$FRAME-$SIZE_T*14`($sp)
455         $POP    r19,`$FRAME-$SIZE_T*13`($sp)
456         $POP    r20,`$FRAME-$SIZE_T*12`($sp)
457         $POP    r21,`$FRAME-$SIZE_T*11`($sp)
458         $POP    r22,`$FRAME-$SIZE_T*10`($sp)
459         $POP    r23,`$FRAME-$SIZE_T*9`($sp)
460         $POP    r24,`$FRAME-$SIZE_T*8`($sp)
461         $POP    r25,`$FRAME-$SIZE_T*7`($sp)
462         $POP    r26,`$FRAME-$SIZE_T*6`($sp)
463         $POP    r27,`$FRAME-$SIZE_T*5`($sp)
464         $POP    r28,`$FRAME-$SIZE_T*4`($sp)
465         $POP    r29,`$FRAME-$SIZE_T*3`($sp)
466         $POP    r30,`$FRAME-$SIZE_T*2`($sp)
467         $POP    r31,`$FRAME-$SIZE_T*1`($sp)
468         mtlr    r0
469         addi    $sp,$sp,$FRAME
470         blr
471         .long   0
472         .byte   0,12,4,1,0x80,18,3,0
473         .long   0
474 .size   .AES_encrypt,.-.AES_encrypt
475
476 .align  5
477 Lppc_AES_encrypt:
478         lwz     $acc00,240($key)
479         addi    $Tbl1,$Tbl0,3
480         lwz     $t0,0($key)
481         addi    $Tbl2,$Tbl0,2
482         lwz     $t1,4($key)
483         addi    $Tbl3,$Tbl0,1
484         lwz     $t2,8($key)
485         addi    $acc00,$acc00,-1
486         lwz     $t3,12($key)
487         addi    $key,$key,16
488         xor     $s0,$s0,$t0
489         xor     $s1,$s1,$t1
490         xor     $s2,$s2,$t2
491         xor     $s3,$s3,$t3
492         mtctr   $acc00
493 .align  4
494 Lenc_loop:
495         rlwinm  $acc00,$s0,`32-24+3`,21,28
496         rlwinm  $acc01,$s1,`32-24+3`,21,28
497         rlwinm  $acc02,$s2,`32-24+3`,21,28
498         rlwinm  $acc03,$s3,`32-24+3`,21,28
499         lwz     $t0,0($key)
500         rlwinm  $acc04,$s1,`32-16+3`,21,28
501         lwz     $t1,4($key)
502         rlwinm  $acc05,$s2,`32-16+3`,21,28
503         lwz     $t2,8($key)
504         rlwinm  $acc06,$s3,`32-16+3`,21,28
505         lwz     $t3,12($key)
506         rlwinm  $acc07,$s0,`32-16+3`,21,28
507         lwzx    $acc00,$Tbl0,$acc00
508         rlwinm  $acc08,$s2,`32-8+3`,21,28
509         lwzx    $acc01,$Tbl0,$acc01
510         rlwinm  $acc09,$s3,`32-8+3`,21,28
511         lwzx    $acc02,$Tbl0,$acc02
512         rlwinm  $acc10,$s0,`32-8+3`,21,28
513         lwzx    $acc03,$Tbl0,$acc03
514         rlwinm  $acc11,$s1,`32-8+3`,21,28
515         lwzx    $acc04,$Tbl1,$acc04
516         rlwinm  $acc12,$s3,`0+3`,21,28
517         lwzx    $acc05,$Tbl1,$acc05
518         rlwinm  $acc13,$s0,`0+3`,21,28
519         lwzx    $acc06,$Tbl1,$acc06
520         rlwinm  $acc14,$s1,`0+3`,21,28
521         lwzx    $acc07,$Tbl1,$acc07
522         rlwinm  $acc15,$s2,`0+3`,21,28
523         lwzx    $acc08,$Tbl2,$acc08
524         xor     $t0,$t0,$acc00
525         lwzx    $acc09,$Tbl2,$acc09
526         xor     $t1,$t1,$acc01
527         lwzx    $acc10,$Tbl2,$acc10
528         xor     $t2,$t2,$acc02
529         lwzx    $acc11,$Tbl2,$acc11
530         xor     $t3,$t3,$acc03
531         lwzx    $acc12,$Tbl3,$acc12
532         xor     $t0,$t0,$acc04
533         lwzx    $acc13,$Tbl3,$acc13
534         xor     $t1,$t1,$acc05
535         lwzx    $acc14,$Tbl3,$acc14
536         xor     $t2,$t2,$acc06
537         lwzx    $acc15,$Tbl3,$acc15
538         xor     $t3,$t3,$acc07
539         xor     $t0,$t0,$acc08
540         xor     $t1,$t1,$acc09
541         xor     $t2,$t2,$acc10
542         xor     $t3,$t3,$acc11
543         xor     $s0,$t0,$acc12
544         xor     $s1,$t1,$acc13
545         xor     $s2,$t2,$acc14
546         xor     $s3,$t3,$acc15
547         addi    $key,$key,16
548         bdnz-   Lenc_loop
549
550         addi    $Tbl2,$Tbl0,2048
551         nop
552         lwz     $t0,0($key)
553         rlwinm  $acc00,$s0,`32-24`,24,31
554         lwz     $t1,4($key)
555         rlwinm  $acc01,$s1,`32-24`,24,31
556         lwz     $t2,8($key)
557         rlwinm  $acc02,$s2,`32-24`,24,31
558         lwz     $t3,12($key)
559         rlwinm  $acc03,$s3,`32-24`,24,31
560         lwz     $acc08,`2048+0`($Tbl0)  ! prefetch Te4
561         rlwinm  $acc04,$s1,`32-16`,24,31
562         lwz     $acc09,`2048+32`($Tbl0)
563         rlwinm  $acc05,$s2,`32-16`,24,31
564         lwz     $acc10,`2048+64`($Tbl0)
565         rlwinm  $acc06,$s3,`32-16`,24,31
566         lwz     $acc11,`2048+96`($Tbl0)
567         rlwinm  $acc07,$s0,`32-16`,24,31
568         lwz     $acc12,`2048+128`($Tbl0)
569         rlwinm  $acc08,$s2,`32-8`,24,31
570         lwz     $acc13,`2048+160`($Tbl0)
571         rlwinm  $acc09,$s3,`32-8`,24,31
572         lwz     $acc14,`2048+192`($Tbl0)
573         rlwinm  $acc10,$s0,`32-8`,24,31
574         lwz     $acc15,`2048+224`($Tbl0)
575         rlwinm  $acc11,$s1,`32-8`,24,31
576         lbzx    $acc00,$Tbl2,$acc00
577         rlwinm  $acc12,$s3,`0`,24,31
578         lbzx    $acc01,$Tbl2,$acc01
579         rlwinm  $acc13,$s0,`0`,24,31
580         lbzx    $acc02,$Tbl2,$acc02
581         rlwinm  $acc14,$s1,`0`,24,31
582         lbzx    $acc03,$Tbl2,$acc03
583         rlwinm  $acc15,$s2,`0`,24,31
584         lbzx    $acc04,$Tbl2,$acc04
585         rlwinm  $s0,$acc00,24,0,7
586         lbzx    $acc05,$Tbl2,$acc05
587         rlwinm  $s1,$acc01,24,0,7
588         lbzx    $acc06,$Tbl2,$acc06
589         rlwinm  $s2,$acc02,24,0,7
590         lbzx    $acc07,$Tbl2,$acc07
591         rlwinm  $s3,$acc03,24,0,7
592         lbzx    $acc08,$Tbl2,$acc08
593         rlwimi  $s0,$acc04,16,8,15
594         lbzx    $acc09,$Tbl2,$acc09
595         rlwimi  $s1,$acc05,16,8,15
596         lbzx    $acc10,$Tbl2,$acc10
597         rlwimi  $s2,$acc06,16,8,15
598         lbzx    $acc11,$Tbl2,$acc11
599         rlwimi  $s3,$acc07,16,8,15
600         lbzx    $acc12,$Tbl2,$acc12
601         rlwimi  $s0,$acc08,8,16,23
602         lbzx    $acc13,$Tbl2,$acc13
603         rlwimi  $s1,$acc09,8,16,23
604         lbzx    $acc14,$Tbl2,$acc14
605         rlwimi  $s2,$acc10,8,16,23
606         lbzx    $acc15,$Tbl2,$acc15
607         rlwimi  $s3,$acc11,8,16,23
608         or      $s0,$s0,$acc12
609         or      $s1,$s1,$acc13
610         or      $s2,$s2,$acc14
611         or      $s3,$s3,$acc15
612         xor     $s0,$s0,$t0
613         xor     $s1,$s1,$t1
614         xor     $s2,$s2,$t2
615         xor     $s3,$s3,$t3
616         blr
617         .long   0
618         .byte   0,12,0x14,0,0,0,0,0
619
620 .align  4
621 Lppc_AES_encrypt_compact:
622         lwz     $acc00,240($key)
623         addi    $Tbl1,$Tbl0,2048
624         lwz     $t0,0($key)
625         lis     $mask80,0x8080
626         lwz     $t1,4($key)
627         lis     $mask1b,0x1b1b
628         lwz     $t2,8($key)
629         ori     $mask80,$mask80,0x8080
630         lwz     $t3,12($key)
631         ori     $mask1b,$mask1b,0x1b1b
632         addi    $key,$key,16
633         mtctr   $acc00
634 .align  4
635 Lenc_compact_loop:
636         xor     $s0,$s0,$t0
637         xor     $s1,$s1,$t1
638         rlwinm  $acc00,$s0,`32-24`,24,31
639         xor     $s2,$s2,$t2
640         rlwinm  $acc01,$s1,`32-24`,24,31
641         xor     $s3,$s3,$t3
642         rlwinm  $acc02,$s2,`32-24`,24,31
643         rlwinm  $acc03,$s3,`32-24`,24,31
644         rlwinm  $acc04,$s1,`32-16`,24,31
645         rlwinm  $acc05,$s2,`32-16`,24,31
646         rlwinm  $acc06,$s3,`32-16`,24,31
647         rlwinm  $acc07,$s0,`32-16`,24,31
648         lbzx    $acc00,$Tbl1,$acc00
649         rlwinm  $acc08,$s2,`32-8`,24,31
650         lbzx    $acc01,$Tbl1,$acc01
651         rlwinm  $acc09,$s3,`32-8`,24,31
652         lbzx    $acc02,$Tbl1,$acc02
653         rlwinm  $acc10,$s0,`32-8`,24,31
654         lbzx    $acc03,$Tbl1,$acc03
655         rlwinm  $acc11,$s1,`32-8`,24,31
656         lbzx    $acc04,$Tbl1,$acc04
657         rlwinm  $acc12,$s3,`0`,24,31
658         lbzx    $acc05,$Tbl1,$acc05
659         rlwinm  $acc13,$s0,`0`,24,31
660         lbzx    $acc06,$Tbl1,$acc06
661         rlwinm  $acc14,$s1,`0`,24,31
662         lbzx    $acc07,$Tbl1,$acc07
663         rlwinm  $acc15,$s2,`0`,24,31
664         lbzx    $acc08,$Tbl1,$acc08
665         rlwinm  $s0,$acc00,24,0,7
666         lbzx    $acc09,$Tbl1,$acc09
667         rlwinm  $s1,$acc01,24,0,7
668         lbzx    $acc10,$Tbl1,$acc10
669         rlwinm  $s2,$acc02,24,0,7
670         lbzx    $acc11,$Tbl1,$acc11
671         rlwinm  $s3,$acc03,24,0,7
672         lbzx    $acc12,$Tbl1,$acc12
673         rlwimi  $s0,$acc04,16,8,15
674         lbzx    $acc13,$Tbl1,$acc13
675         rlwimi  $s1,$acc05,16,8,15
676         lbzx    $acc14,$Tbl1,$acc14
677         rlwimi  $s2,$acc06,16,8,15
678         lbzx    $acc15,$Tbl1,$acc15
679         rlwimi  $s3,$acc07,16,8,15
680         rlwimi  $s0,$acc08,8,16,23
681         rlwimi  $s1,$acc09,8,16,23
682         rlwimi  $s2,$acc10,8,16,23
683         rlwimi  $s3,$acc11,8,16,23
684         lwz     $t0,0($key)
685         or      $s0,$s0,$acc12
686         lwz     $t1,4($key)
687         or      $s1,$s1,$acc13
688         lwz     $t2,8($key)
689         or      $s2,$s2,$acc14
690         lwz     $t3,12($key)
691         or      $s3,$s3,$acc15
692
693         addi    $key,$key,16
694         bdz     Lenc_compact_done
695
696         and     $acc00,$s0,$mask80      # r1=r0&0x80808080
697         and     $acc01,$s1,$mask80
698         and     $acc02,$s2,$mask80
699         and     $acc03,$s3,$mask80
700         srwi    $acc04,$acc00,7         # r1>>7
701         andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
702         srwi    $acc05,$acc01,7
703         andc    $acc09,$s1,$mask80
704         srwi    $acc06,$acc02,7
705         andc    $acc10,$s2,$mask80
706         srwi    $acc07,$acc03,7
707         andc    $acc11,$s3,$mask80
708         sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
709         sub     $acc01,$acc01,$acc05
710         sub     $acc02,$acc02,$acc06
711         sub     $acc03,$acc03,$acc07
712         add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
713         add     $acc09,$acc09,$acc09
714         add     $acc10,$acc10,$acc10
715         add     $acc11,$acc11,$acc11
716         and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
717         and     $acc01,$acc01,$mask1b
718         and     $acc02,$acc02,$mask1b
719         and     $acc03,$acc03,$mask1b
720         xor     $acc00,$acc00,$acc08    # r2
721         xor     $acc01,$acc01,$acc09
722          rotlwi $acc12,$s0,16           # ROTATE(r0,16)
723         xor     $acc02,$acc02,$acc10
724          rotlwi $acc13,$s1,16
725         xor     $acc03,$acc03,$acc11
726          rotlwi $acc14,$s2,16
727
728         xor     $s0,$s0,$acc00          # r0^r2
729         rotlwi  $acc15,$s3,16
730         xor     $s1,$s1,$acc01
731         rotrwi  $s0,$s0,24              # ROTATE(r2^r0,24)
732         xor     $s2,$s2,$acc02
733         rotrwi  $s1,$s1,24
734         xor     $s3,$s3,$acc03
735         rotrwi  $s2,$s2,24
736         xor     $s0,$s0,$acc00          # ROTATE(r2^r0,24)^r2
737         rotrwi  $s3,$s3,24
738         xor     $s1,$s1,$acc01
739         xor     $s2,$s2,$acc02
740         xor     $s3,$s3,$acc03
741         rotlwi  $acc08,$acc12,8         # ROTATE(r0,24)
742         xor     $s0,$s0,$acc12          #
743         rotlwi  $acc09,$acc13,8
744         xor     $s1,$s1,$acc13
745         rotlwi  $acc10,$acc14,8
746         xor     $s2,$s2,$acc14
747         rotlwi  $acc11,$acc15,8
748         xor     $s3,$s3,$acc15
749         xor     $s0,$s0,$acc08          #
750         xor     $s1,$s1,$acc09
751         xor     $s2,$s2,$acc10
752         xor     $s3,$s3,$acc11
753
754         b       Lenc_compact_loop
755 .align  4
756 Lenc_compact_done:
757         xor     $s0,$s0,$t0
758         xor     $s1,$s1,$t1
759         xor     $s2,$s2,$t2
760         xor     $s3,$s3,$t3
761         blr
762         .long   0
763         .byte   0,12,0x14,0,0,0,0,0
764
765 .globl  .AES_decrypt
766 .align  7
767 .AES_decrypt:
768         $STU    $sp,-$FRAME($sp)
769         mflr    r0
770
771         $PUSH   $out,`$FRAME-$SIZE_T*19`($sp)
772         $PUSH   r14,`$FRAME-$SIZE_T*18`($sp)
773         $PUSH   r15,`$FRAME-$SIZE_T*17`($sp)
774         $PUSH   r16,`$FRAME-$SIZE_T*16`($sp)
775         $PUSH   r17,`$FRAME-$SIZE_T*15`($sp)
776         $PUSH   r18,`$FRAME-$SIZE_T*14`($sp)
777         $PUSH   r19,`$FRAME-$SIZE_T*13`($sp)
778         $PUSH   r20,`$FRAME-$SIZE_T*12`($sp)
779         $PUSH   r21,`$FRAME-$SIZE_T*11`($sp)
780         $PUSH   r22,`$FRAME-$SIZE_T*10`($sp)
781         $PUSH   r23,`$FRAME-$SIZE_T*9`($sp)
782         $PUSH   r24,`$FRAME-$SIZE_T*8`($sp)
783         $PUSH   r25,`$FRAME-$SIZE_T*7`($sp)
784         $PUSH   r26,`$FRAME-$SIZE_T*6`($sp)
785         $PUSH   r27,`$FRAME-$SIZE_T*5`($sp)
786         $PUSH   r28,`$FRAME-$SIZE_T*4`($sp)
787         $PUSH   r29,`$FRAME-$SIZE_T*3`($sp)
788         $PUSH   r30,`$FRAME-$SIZE_T*2`($sp)
789         $PUSH   r31,`$FRAME-$SIZE_T*1`($sp)
790         $PUSH   r0,`$FRAME+$LRSAVE`($sp)
791
792         andi.   $t0,$inp,3
793         andi.   $t1,$out,3
794         or.     $t0,$t0,$t1
795         bne     Ldec_unaligned
796
797 Ldec_unaligned_ok:
798         lwz     $s0,0($inp)
799         lwz     $s1,4($inp)
800         lwz     $s2,8($inp)
801         lwz     $s3,12($inp)
802         bl      LAES_Td
803         bl      Lppc_AES_decrypt_compact
804         $POP    $out,`$FRAME-$SIZE_T*19`($sp)
805         stw     $s0,0($out)
806         stw     $s1,4($out)
807         stw     $s2,8($out)
808         stw     $s3,12($out)
809         b       Ldec_done
810
811 Ldec_unaligned:
812         subfic  $t0,$inp,4096
813         subfic  $t1,$out,4096
814         andi.   $t0,$t0,4096-16
815         beq     Ldec_xpage
816         andi.   $t1,$t1,4096-16
817         bne     Ldec_unaligned_ok
818
819 Ldec_xpage:
820         lbz     $acc00,0($inp)
821         lbz     $acc01,1($inp)
822         lbz     $acc02,2($inp)
823         lbz     $s0,3($inp)
824         lbz     $acc04,4($inp)
825         lbz     $acc05,5($inp)
826         lbz     $acc06,6($inp)
827         lbz     $s1,7($inp)
828         lbz     $acc08,8($inp)
829         lbz     $acc09,9($inp)
830         lbz     $acc10,10($inp)
831         insrwi  $s0,$acc00,8,0
832         lbz     $s2,11($inp)
833         insrwi  $s1,$acc04,8,0
834         lbz     $acc12,12($inp)
835         insrwi  $s0,$acc01,8,8
836         lbz     $acc13,13($inp)
837         insrwi  $s1,$acc05,8,8
838         lbz     $acc14,14($inp)
839         insrwi  $s0,$acc02,8,16
840         lbz     $s3,15($inp)
841         insrwi  $s1,$acc06,8,16
842         insrwi  $s2,$acc08,8,0
843         insrwi  $s3,$acc12,8,0
844         insrwi  $s2,$acc09,8,8
845         insrwi  $s3,$acc13,8,8
846         insrwi  $s2,$acc10,8,16
847         insrwi  $s3,$acc14,8,16
848
849         bl      LAES_Td
850         bl      Lppc_AES_decrypt_compact
851         $POP    $out,`$FRAME-$SIZE_T*19`($sp)
852
853         extrwi  $acc00,$s0,8,0
854         extrwi  $acc01,$s0,8,8
855         stb     $acc00,0($out)
856         extrwi  $acc02,$s0,8,16
857         stb     $acc01,1($out)
858         stb     $acc02,2($out)
859         extrwi  $acc04,$s1,8,0
860         stb     $s0,3($out)
861         extrwi  $acc05,$s1,8,8
862         stb     $acc04,4($out)
863         extrwi  $acc06,$s1,8,16
864         stb     $acc05,5($out)
865         stb     $acc06,6($out)
866         extrwi  $acc08,$s2,8,0
867         stb     $s1,7($out)
868         extrwi  $acc09,$s2,8,8
869         stb     $acc08,8($out)
870         extrwi  $acc10,$s2,8,16
871         stb     $acc09,9($out)
872         stb     $acc10,10($out)
873         extrwi  $acc12,$s3,8,0
874         stb     $s2,11($out)
875         extrwi  $acc13,$s3,8,8
876         stb     $acc12,12($out)
877         extrwi  $acc14,$s3,8,16
878         stb     $acc13,13($out)
879         stb     $acc14,14($out)
880         stb     $s3,15($out)
881
882 Ldec_done:
883         $POP    r0,`$FRAME+$LRSAVE`($sp)
884         $POP    r14,`$FRAME-$SIZE_T*18`($sp)
885         $POP    r15,`$FRAME-$SIZE_T*17`($sp)
886         $POP    r16,`$FRAME-$SIZE_T*16`($sp)
887         $POP    r17,`$FRAME-$SIZE_T*15`($sp)
888         $POP    r18,`$FRAME-$SIZE_T*14`($sp)
889         $POP    r19,`$FRAME-$SIZE_T*13`($sp)
890         $POP    r20,`$FRAME-$SIZE_T*12`($sp)
891         $POP    r21,`$FRAME-$SIZE_T*11`($sp)
892         $POP    r22,`$FRAME-$SIZE_T*10`($sp)
893         $POP    r23,`$FRAME-$SIZE_T*9`($sp)
894         $POP    r24,`$FRAME-$SIZE_T*8`($sp)
895         $POP    r25,`$FRAME-$SIZE_T*7`($sp)
896         $POP    r26,`$FRAME-$SIZE_T*6`($sp)
897         $POP    r27,`$FRAME-$SIZE_T*5`($sp)
898         $POP    r28,`$FRAME-$SIZE_T*4`($sp)
899         $POP    r29,`$FRAME-$SIZE_T*3`($sp)
900         $POP    r30,`$FRAME-$SIZE_T*2`($sp)
901         $POP    r31,`$FRAME-$SIZE_T*1`($sp)
902         mtlr    r0
903         addi    $sp,$sp,$FRAME
904         blr
905         .long   0
906         .byte   0,12,4,1,0x80,18,3,0
907         .long   0
908 .size   .AES_decrypt,.-.AES_decrypt
909
910 .align  5
911 Lppc_AES_decrypt:
912         lwz     $acc00,240($key)
913         addi    $Tbl1,$Tbl0,3
914         lwz     $t0,0($key)
915         addi    $Tbl2,$Tbl0,2
916         lwz     $t1,4($key)
917         addi    $Tbl3,$Tbl0,1
918         lwz     $t2,8($key)
919         addi    $acc00,$acc00,-1
920         lwz     $t3,12($key)
921         addi    $key,$key,16
922         xor     $s0,$s0,$t0
923         xor     $s1,$s1,$t1
924         xor     $s2,$s2,$t2
925         xor     $s3,$s3,$t3
926         mtctr   $acc00
927 .align  4
928 Ldec_loop:
929         rlwinm  $acc00,$s0,`32-24+3`,21,28
930         rlwinm  $acc01,$s1,`32-24+3`,21,28
931         rlwinm  $acc02,$s2,`32-24+3`,21,28
932         rlwinm  $acc03,$s3,`32-24+3`,21,28
933         lwz     $t0,0($key)
934         rlwinm  $acc04,$s3,`32-16+3`,21,28
935         lwz     $t1,4($key)
936         rlwinm  $acc05,$s0,`32-16+3`,21,28
937         lwz     $t2,8($key)
938         rlwinm  $acc06,$s1,`32-16+3`,21,28
939         lwz     $t3,12($key)
940         rlwinm  $acc07,$s2,`32-16+3`,21,28
941         lwzx    $acc00,$Tbl0,$acc00
942         rlwinm  $acc08,$s2,`32-8+3`,21,28
943         lwzx    $acc01,$Tbl0,$acc01
944         rlwinm  $acc09,$s3,`32-8+3`,21,28
945         lwzx    $acc02,$Tbl0,$acc02
946         rlwinm  $acc10,$s0,`32-8+3`,21,28
947         lwzx    $acc03,$Tbl0,$acc03
948         rlwinm  $acc11,$s1,`32-8+3`,21,28
949         lwzx    $acc04,$Tbl1,$acc04
950         rlwinm  $acc12,$s1,`0+3`,21,28
951         lwzx    $acc05,$Tbl1,$acc05
952         rlwinm  $acc13,$s2,`0+3`,21,28
953         lwzx    $acc06,$Tbl1,$acc06
954         rlwinm  $acc14,$s3,`0+3`,21,28
955         lwzx    $acc07,$Tbl1,$acc07
956         rlwinm  $acc15,$s0,`0+3`,21,28
957         lwzx    $acc08,$Tbl2,$acc08
958         xor     $t0,$t0,$acc00
959         lwzx    $acc09,$Tbl2,$acc09
960         xor     $t1,$t1,$acc01
961         lwzx    $acc10,$Tbl2,$acc10
962         xor     $t2,$t2,$acc02
963         lwzx    $acc11,$Tbl2,$acc11
964         xor     $t3,$t3,$acc03
965         lwzx    $acc12,$Tbl3,$acc12
966         xor     $t0,$t0,$acc04
967         lwzx    $acc13,$Tbl3,$acc13
968         xor     $t1,$t1,$acc05
969         lwzx    $acc14,$Tbl3,$acc14
970         xor     $t2,$t2,$acc06
971         lwzx    $acc15,$Tbl3,$acc15
972         xor     $t3,$t3,$acc07
973         xor     $t0,$t0,$acc08
974         xor     $t1,$t1,$acc09
975         xor     $t2,$t2,$acc10
976         xor     $t3,$t3,$acc11
977         xor     $s0,$t0,$acc12
978         xor     $s1,$t1,$acc13
979         xor     $s2,$t2,$acc14
980         xor     $s3,$t3,$acc15
981         addi    $key,$key,16
982         bdnz-   Ldec_loop
983
984         addi    $Tbl2,$Tbl0,2048
985         nop
986         lwz     $t0,0($key)
987         rlwinm  $acc00,$s0,`32-24`,24,31
988         lwz     $t1,4($key)
989         rlwinm  $acc01,$s1,`32-24`,24,31
990         lwz     $t2,8($key)
991         rlwinm  $acc02,$s2,`32-24`,24,31
992         lwz     $t3,12($key)
993         rlwinm  $acc03,$s3,`32-24`,24,31
994         lwz     $acc08,`2048+0`($Tbl0)  ! prefetch Td4
995         rlwinm  $acc04,$s3,`32-16`,24,31
996         lwz     $acc09,`2048+32`($Tbl0)
997         rlwinm  $acc05,$s0,`32-16`,24,31
998         lwz     $acc10,`2048+64`($Tbl0)
999         lbzx    $acc00,$Tbl2,$acc00
1000         lwz     $acc11,`2048+96`($Tbl0)
1001         lbzx    $acc01,$Tbl2,$acc01
1002         lwz     $acc12,`2048+128`($Tbl0)
1003         rlwinm  $acc06,$s1,`32-16`,24,31
1004         lwz     $acc13,`2048+160`($Tbl0)
1005         rlwinm  $acc07,$s2,`32-16`,24,31
1006         lwz     $acc14,`2048+192`($Tbl0)
1007         rlwinm  $acc08,$s2,`32-8`,24,31
1008         lwz     $acc15,`2048+224`($Tbl0)
1009         rlwinm  $acc09,$s3,`32-8`,24,31
1010         lbzx    $acc02,$Tbl2,$acc02
1011         rlwinm  $acc10,$s0,`32-8`,24,31
1012         lbzx    $acc03,$Tbl2,$acc03
1013         rlwinm  $acc11,$s1,`32-8`,24,31
1014         lbzx    $acc04,$Tbl2,$acc04
1015         rlwinm  $acc12,$s1,`0`,24,31
1016         lbzx    $acc05,$Tbl2,$acc05
1017         rlwinm  $acc13,$s2,`0`,24,31
1018         lbzx    $acc06,$Tbl2,$acc06
1019         rlwinm  $acc14,$s3,`0`,24,31
1020         lbzx    $acc07,$Tbl2,$acc07
1021         rlwinm  $acc15,$s0,`0`,24,31
1022         lbzx    $acc08,$Tbl2,$acc08
1023         rlwinm  $s0,$acc00,24,0,7
1024         lbzx    $acc09,$Tbl2,$acc09
1025         rlwinm  $s1,$acc01,24,0,7
1026         lbzx    $acc10,$Tbl2,$acc10
1027         rlwinm  $s2,$acc02,24,0,7
1028         lbzx    $acc11,$Tbl2,$acc11
1029         rlwinm  $s3,$acc03,24,0,7
1030         lbzx    $acc12,$Tbl2,$acc12
1031         rlwimi  $s0,$acc04,16,8,15
1032         lbzx    $acc13,$Tbl2,$acc13
1033         rlwimi  $s1,$acc05,16,8,15
1034         lbzx    $acc14,$Tbl2,$acc14
1035         rlwimi  $s2,$acc06,16,8,15
1036         lbzx    $acc15,$Tbl2,$acc15
1037         rlwimi  $s3,$acc07,16,8,15
1038         rlwimi  $s0,$acc08,8,16,23
1039         rlwimi  $s1,$acc09,8,16,23
1040         rlwimi  $s2,$acc10,8,16,23
1041         rlwimi  $s3,$acc11,8,16,23
1042         or      $s0,$s0,$acc12
1043         or      $s1,$s1,$acc13
1044         or      $s2,$s2,$acc14
1045         or      $s3,$s3,$acc15
1046         xor     $s0,$s0,$t0
1047         xor     $s1,$s1,$t1
1048         xor     $s2,$s2,$t2
1049         xor     $s3,$s3,$t3
1050         blr
1051         .long   0
1052         .byte   0,12,0x14,0,0,0,0,0
1053
1054 .align  4
1055 Lppc_AES_decrypt_compact:
1056         lwz     $acc00,240($key)
1057         addi    $Tbl1,$Tbl0,2048
1058         lwz     $t0,0($key)
1059         lis     $mask80,0x8080
1060         lwz     $t1,4($key)
1061         lis     $mask1b,0x1b1b
1062         lwz     $t2,8($key)
1063         ori     $mask80,$mask80,0x8080
1064         lwz     $t3,12($key)
1065         ori     $mask1b,$mask1b,0x1b1b
1066         addi    $key,$key,16
1067 ___
1068 $code.=<<___ if ($SIZE_T==8);
1069         insrdi  $mask80,$mask80,32,0
1070         insrdi  $mask1b,$mask1b,32,0
1071 ___
1072 $code.=<<___;
1073         mtctr   $acc00
1074 .align  4
1075 Ldec_compact_loop:
1076         xor     $s0,$s0,$t0
1077         xor     $s1,$s1,$t1
1078         rlwinm  $acc00,$s0,`32-24`,24,31
1079         xor     $s2,$s2,$t2
1080         rlwinm  $acc01,$s1,`32-24`,24,31
1081         xor     $s3,$s3,$t3
1082         rlwinm  $acc02,$s2,`32-24`,24,31
1083         rlwinm  $acc03,$s3,`32-24`,24,31
1084         rlwinm  $acc04,$s3,`32-16`,24,31
1085         rlwinm  $acc05,$s0,`32-16`,24,31
1086         rlwinm  $acc06,$s1,`32-16`,24,31
1087         rlwinm  $acc07,$s2,`32-16`,24,31
1088         lbzx    $acc00,$Tbl1,$acc00
1089         rlwinm  $acc08,$s2,`32-8`,24,31
1090         lbzx    $acc01,$Tbl1,$acc01
1091         rlwinm  $acc09,$s3,`32-8`,24,31
1092         lbzx    $acc02,$Tbl1,$acc02
1093         rlwinm  $acc10,$s0,`32-8`,24,31
1094         lbzx    $acc03,$Tbl1,$acc03
1095         rlwinm  $acc11,$s1,`32-8`,24,31
1096         lbzx    $acc04,$Tbl1,$acc04
1097         rlwinm  $acc12,$s1,`0`,24,31
1098         lbzx    $acc05,$Tbl1,$acc05
1099         rlwinm  $acc13,$s2,`0`,24,31
1100         lbzx    $acc06,$Tbl1,$acc06
1101         rlwinm  $acc14,$s3,`0`,24,31
1102         lbzx    $acc07,$Tbl1,$acc07
1103         rlwinm  $acc15,$s0,`0`,24,31
1104         lbzx    $acc08,$Tbl1,$acc08
1105         rlwinm  $s0,$acc00,24,0,7
1106         lbzx    $acc09,$Tbl1,$acc09
1107         rlwinm  $s1,$acc01,24,0,7
1108         lbzx    $acc10,$Tbl1,$acc10
1109         rlwinm  $s2,$acc02,24,0,7
1110         lbzx    $acc11,$Tbl1,$acc11
1111         rlwinm  $s3,$acc03,24,0,7
1112         lbzx    $acc12,$Tbl1,$acc12
1113         rlwimi  $s0,$acc04,16,8,15
1114         lbzx    $acc13,$Tbl1,$acc13
1115         rlwimi  $s1,$acc05,16,8,15
1116         lbzx    $acc14,$Tbl1,$acc14
1117         rlwimi  $s2,$acc06,16,8,15
1118         lbzx    $acc15,$Tbl1,$acc15
1119         rlwimi  $s3,$acc07,16,8,15
1120         rlwimi  $s0,$acc08,8,16,23
1121         rlwimi  $s1,$acc09,8,16,23
1122         rlwimi  $s2,$acc10,8,16,23
1123         rlwimi  $s3,$acc11,8,16,23
1124         lwz     $t0,0($key)
1125         or      $s0,$s0,$acc12
1126         lwz     $t1,4($key)
1127         or      $s1,$s1,$acc13
1128         lwz     $t2,8($key)
1129         or      $s2,$s2,$acc14
1130         lwz     $t3,12($key)
1131         or      $s3,$s3,$acc15
1132
1133         addi    $key,$key,16
1134         bdz     Ldec_compact_done
1135 ___
1136 $code.=<<___ if ($SIZE_T==8);
1137         # vectorized permutation improves decrypt performance by 10%
1138         insrdi  $s0,$s1,32,0
1139         insrdi  $s2,$s3,32,0
1140
1141         and     $acc00,$s0,$mask80      # r1=r0&0x80808080
1142         and     $acc02,$s2,$mask80
1143         srdi    $acc04,$acc00,7         # r1>>7
1144         srdi    $acc06,$acc02,7
1145         andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
1146         andc    $acc10,$s2,$mask80
1147         sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
1148         sub     $acc02,$acc02,$acc06
1149         add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
1150         add     $acc10,$acc10,$acc10
1151         and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1152         and     $acc02,$acc02,$mask1b
1153         xor     $acc00,$acc00,$acc08    # r2
1154         xor     $acc02,$acc02,$acc10
1155
1156         and     $acc04,$acc00,$mask80   # r1=r2&0x80808080
1157         and     $acc06,$acc02,$mask80
1158         srdi    $acc08,$acc04,7         # r1>>7
1159         srdi    $acc10,$acc06,7
1160         andc    $acc12,$acc00,$mask80   # r2&0x7f7f7f7f
1161         andc    $acc14,$acc02,$mask80
1162         sub     $acc04,$acc04,$acc08    # r1-(r1>>7)
1163         sub     $acc06,$acc06,$acc10
1164         add     $acc12,$acc12,$acc12    # (r2&0x7f7f7f7f)<<1
1165         add     $acc14,$acc14,$acc14
1166         and     $acc04,$acc04,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1167         and     $acc06,$acc06,$mask1b
1168         xor     $acc04,$acc04,$acc12    # r4
1169         xor     $acc06,$acc06,$acc14
1170
1171         and     $acc08,$acc04,$mask80   # r1=r4&0x80808080
1172         and     $acc10,$acc06,$mask80
1173         srdi    $acc12,$acc08,7         # r1>>7
1174         srdi    $acc14,$acc10,7
1175         sub     $acc08,$acc08,$acc12    # r1-(r1>>7)
1176         sub     $acc10,$acc10,$acc14
1177         andc    $acc12,$acc04,$mask80   # r4&0x7f7f7f7f
1178         andc    $acc14,$acc06,$mask80
1179         add     $acc12,$acc12,$acc12    # (r4&0x7f7f7f7f)<<1
1180         add     $acc14,$acc14,$acc14
1181         and     $acc08,$acc08,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1182         and     $acc10,$acc10,$mask1b
1183         xor     $acc08,$acc08,$acc12    # r8
1184         xor     $acc10,$acc10,$acc14
1185
1186         xor     $acc00,$acc00,$s0       # r2^r0
1187         xor     $acc02,$acc02,$s2
1188         xor     $acc04,$acc04,$s0       # r4^r0
1189         xor     $acc06,$acc06,$s2
1190
1191         extrdi  $acc01,$acc00,32,0
1192         extrdi  $acc03,$acc02,32,0
1193         extrdi  $acc05,$acc04,32,0
1194         extrdi  $acc07,$acc06,32,0
1195         extrdi  $acc09,$acc08,32,0
1196         extrdi  $acc11,$acc10,32,0
1197 ___
1198 $code.=<<___ if ($SIZE_T==4);
1199         and     $acc00,$s0,$mask80      # r1=r0&0x80808080
1200         and     $acc01,$s1,$mask80
1201         and     $acc02,$s2,$mask80
1202         and     $acc03,$s3,$mask80
1203         srwi    $acc04,$acc00,7         # r1>>7
1204         andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
1205         srwi    $acc05,$acc01,7
1206         andc    $acc09,$s1,$mask80
1207         srwi    $acc06,$acc02,7
1208         andc    $acc10,$s2,$mask80
1209         srwi    $acc07,$acc03,7
1210         andc    $acc11,$s3,$mask80
1211         sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
1212         sub     $acc01,$acc01,$acc05
1213         sub     $acc02,$acc02,$acc06
1214         sub     $acc03,$acc03,$acc07
1215         add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
1216         add     $acc09,$acc09,$acc09
1217         add     $acc10,$acc10,$acc10
1218         add     $acc11,$acc11,$acc11
1219         and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1220         and     $acc01,$acc01,$mask1b
1221         and     $acc02,$acc02,$mask1b
1222         and     $acc03,$acc03,$mask1b
1223         xor     $acc00,$acc00,$acc08    # r2
1224         xor     $acc01,$acc01,$acc09
1225         xor     $acc02,$acc02,$acc10
1226         xor     $acc03,$acc03,$acc11
1227
1228         and     $acc04,$acc00,$mask80   # r1=r2&0x80808080
1229         and     $acc05,$acc01,$mask80
1230         and     $acc06,$acc02,$mask80
1231         and     $acc07,$acc03,$mask80
1232         srwi    $acc08,$acc04,7         # r1>>7
1233         andc    $acc12,$acc00,$mask80   # r2&0x7f7f7f7f
1234         srwi    $acc09,$acc05,7
1235         andc    $acc13,$acc01,$mask80
1236         srwi    $acc10,$acc06,7
1237         andc    $acc14,$acc02,$mask80
1238         srwi    $acc11,$acc07,7
1239         andc    $acc15,$acc03,$mask80
1240         sub     $acc04,$acc04,$acc08    # r1-(r1>>7)
1241         sub     $acc05,$acc05,$acc09
1242         sub     $acc06,$acc06,$acc10
1243         sub     $acc07,$acc07,$acc11
1244         add     $acc12,$acc12,$acc12    # (r2&0x7f7f7f7f)<<1
1245         add     $acc13,$acc13,$acc13
1246         add     $acc14,$acc14,$acc14
1247         add     $acc15,$acc15,$acc15
1248         and     $acc04,$acc04,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1249         and     $acc05,$acc05,$mask1b
1250         and     $acc06,$acc06,$mask1b
1251         and     $acc07,$acc07,$mask1b
1252         xor     $acc04,$acc04,$acc12    # r4
1253         xor     $acc05,$acc05,$acc13
1254         xor     $acc06,$acc06,$acc14
1255         xor     $acc07,$acc07,$acc15
1256
1257         and     $acc08,$acc04,$mask80   # r1=r4&0x80808080
1258         and     $acc09,$acc05,$mask80
1259         srwi    $acc12,$acc08,7         # r1>>7
1260         and     $acc10,$acc06,$mask80
1261         srwi    $acc13,$acc09,7
1262         and     $acc11,$acc07,$mask80
1263         srwi    $acc14,$acc10,7
1264         sub     $acc08,$acc08,$acc12    # r1-(r1>>7)
1265         srwi    $acc15,$acc11,7
1266         sub     $acc09,$acc09,$acc13
1267         sub     $acc10,$acc10,$acc14
1268         sub     $acc11,$acc11,$acc15
1269         andc    $acc12,$acc04,$mask80   # r4&0x7f7f7f7f
1270         andc    $acc13,$acc05,$mask80
1271         andc    $acc14,$acc06,$mask80
1272         andc    $acc15,$acc07,$mask80
1273         add     $acc12,$acc12,$acc12    # (r4&0x7f7f7f7f)<<1
1274         add     $acc13,$acc13,$acc13
1275         add     $acc14,$acc14,$acc14
1276         add     $acc15,$acc15,$acc15
1277         and     $acc08,$acc08,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1278         and     $acc09,$acc09,$mask1b
1279         and     $acc10,$acc10,$mask1b
1280         and     $acc11,$acc11,$mask1b
1281         xor     $acc08,$acc08,$acc12    # r8
1282         xor     $acc09,$acc09,$acc13
1283         xor     $acc10,$acc10,$acc14
1284         xor     $acc11,$acc11,$acc15
1285
1286         xor     $acc00,$acc00,$s0       # r2^r0
1287         xor     $acc01,$acc01,$s1
1288         xor     $acc02,$acc02,$s2
1289         xor     $acc03,$acc03,$s3
1290         xor     $acc04,$acc04,$s0       # r4^r0
1291         xor     $acc05,$acc05,$s1
1292         xor     $acc06,$acc06,$s2
1293         xor     $acc07,$acc07,$s3
1294 ___
1295 $code.=<<___;
1296         rotrwi  $s0,$s0,8               # = ROTATE(r0,8)
1297         rotrwi  $s1,$s1,8
1298         xor     $s0,$s0,$acc00          # ^= r2^r0
1299         rotrwi  $s2,$s2,8
1300         xor     $s1,$s1,$acc01
1301         rotrwi  $s3,$s3,8
1302         xor     $s2,$s2,$acc02
1303         xor     $s3,$s3,$acc03
1304         xor     $acc00,$acc00,$acc08
1305         xor     $acc01,$acc01,$acc09
1306         xor     $acc02,$acc02,$acc10
1307         xor     $acc03,$acc03,$acc11
1308         xor     $s0,$s0,$acc04          # ^= r4^r0
1309         rotrwi  $acc00,$acc00,24
1310         xor     $s1,$s1,$acc05
1311         rotrwi  $acc01,$acc01,24
1312         xor     $s2,$s2,$acc06
1313         rotrwi  $acc02,$acc02,24
1314         xor     $s3,$s3,$acc07
1315         rotrwi  $acc03,$acc03,24
1316         xor     $acc04,$acc04,$acc08
1317         xor     $acc05,$acc05,$acc09
1318         xor     $acc06,$acc06,$acc10
1319         xor     $acc07,$acc07,$acc11
1320         xor     $s0,$s0,$acc08          # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
1321         rotrwi  $acc04,$acc04,16
1322         xor     $s1,$s1,$acc09
1323         rotrwi  $acc05,$acc05,16
1324         xor     $s2,$s2,$acc10
1325         rotrwi  $acc06,$acc06,16
1326         xor     $s3,$s3,$acc11
1327         rotrwi  $acc07,$acc07,16
1328         xor     $s0,$s0,$acc00          # ^= ROTATE(r8^r2^r0,24)
1329         rotrwi  $acc08,$acc08,8
1330         xor     $s1,$s1,$acc01
1331         rotrwi  $acc09,$acc09,8
1332         xor     $s2,$s2,$acc02
1333         rotrwi  $acc10,$acc10,8
1334         xor     $s3,$s3,$acc03
1335         rotrwi  $acc11,$acc11,8
1336         xor     $s0,$s0,$acc04          # ^= ROTATE(r8^r4^r0,16)
1337         xor     $s1,$s1,$acc05
1338         xor     $s2,$s2,$acc06
1339         xor     $s3,$s3,$acc07
1340         xor     $s0,$s0,$acc08          # ^= ROTATE(r8,8)       
1341         xor     $s1,$s1,$acc09  
1342         xor     $s2,$s2,$acc10  
1343         xor     $s3,$s3,$acc11  
1344
1345         b       Ldec_compact_loop
1346 .align  4
1347 Ldec_compact_done:
1348         xor     $s0,$s0,$t0
1349         xor     $s1,$s1,$t1
1350         xor     $s2,$s2,$t2
1351         xor     $s3,$s3,$t3
1352         blr
1353         .long   0
1354         .byte   0,12,0x14,0,0,0,0,0
1355
1356 .asciz  "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
1357 .align  7
1358 ___
1359
1360 $code =~ s/\`([^\`]*)\`/eval $1/gem;
1361 print $code;
1362 close STDOUT;