Minor x86_64 perlasm update.
[openssl.git] / crypto / aes / asm / aes-ppc.pl
1 #!/usr/bin/env perl
2
3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9
10 # Needs more work: key setup, CBC routine...
11 #
12 # ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
13 # 128-bit key, which is ~40% better than 64-bit code generated by gcc
14 # 4.0. But these are not the ones currently used! Their "compact"
15 # counterparts are, for security reason. ppc_AES_encrypt_compact runs
16 # at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
17 # at 1/3 of ppc_AES_decrypt.
18
19 # February 2010
20 #
21 # Rescheduling instructions to favour Power6 pipeline gave 10%
22 # performance improvement on the platfrom in question (and marginal
23 # improvement even on others). It should be noted that Power6 fails
24 # to process byte in 18 cycles, only in 23, because it fails to issue
25 # 4 load instructions in two cycles, only in 3. As result non-compact
26 # block subroutines are 25% slower than one would expect. Compact
27 # functions scale better, because they have pure computational part,
28 # which scales perfectly with clock frequency. To be specific
29 # ppc_AES_encrypt_compact operates at 42 cycles per byte, while
30 # ppc_AES_decrypt_compact - at 55 (in 64-bit build).
31
32 $flavour = shift;
33
34 if ($flavour =~ /64/) {
35         $SIZE_T =8;
36         $LRSAVE =2*$SIZE_T;
37         $STU    ="stdu";
38         $POP    ="ld";
39         $PUSH   ="std";
40 } elsif ($flavour =~ /32/) {
41         $SIZE_T =4;
42         $LRSAVE =$SIZE_T;
43         $STU    ="stwu";
44         $POP    ="lwz";
45         $PUSH   ="stw";
46 } else { die "nonsense $flavour"; }
47
48 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
49 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
50 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
51 die "can't locate ppc-xlate.pl";
52
53 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
54
55 $FRAME=32*$SIZE_T;
56
57 sub _data_word()
58 { my $i;
59     while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
60 }
61
62 $sp="r1";
63 $toc="r2";
64 $inp="r3";
65 $out="r4";
66 $key="r5";
67
68 $Tbl0="r3";
69 $Tbl1="r6";
70 $Tbl2="r7";
71 $Tbl3="r2";
72
73 $s0="r8";
74 $s1="r9";
75 $s2="r10";
76 $s3="r11";
77
78 $t0="r12";
79 $t1="r13";
80 $t2="r14";
81 $t3="r15";
82
83 $acc00="r16";
84 $acc01="r17";
85 $acc02="r18";
86 $acc03="r19";
87
88 $acc04="r20";
89 $acc05="r21";
90 $acc06="r22";
91 $acc07="r23";
92
93 $acc08="r24";
94 $acc09="r25";
95 $acc10="r26";
96 $acc11="r27";
97
98 $acc12="r28";
99 $acc13="r29";
100 $acc14="r30";
101 $acc15="r31";
102
103 # stay away from TLS pointer
104 if ($SIZE_T==8) { die if ($t1 ne "r13");  $t1="r0";             }
105 else            { die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0";  }
106 $mask80=$Tbl2;
107 $mask1b=$Tbl3;
108
109 $code.=<<___;
110 .machine        "any"
111 .text
112
113 .align  7
114 LAES_Te:
115         mflr    r0
116         bcl     20,31,\$+4
117         mflr    $Tbl0   ;    vvvvv "distance" between . and 1st data entry
118         addi    $Tbl0,$Tbl0,`128-8`
119         mtlr    r0
120         blr
121         .long   0
122         .byte   0,12,0x14,0,0,0,0,0
123         .space  `64-9*4`
124 LAES_Td:
125         mflr    r0
126         bcl     20,31,\$+4
127         mflr    $Tbl0   ;    vvvvvvvv "distance" between . and 1st data entry
128         addi    $Tbl0,$Tbl0,`128-64-8+2048+256`
129         mtlr    r0
130         blr
131         .long   0
132         .byte   0,12,0x14,0,0,0,0,0
133         .space  `128-64-9*4`
134 ___
135 &_data_word(
136         0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
137         0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
138         0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
139         0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
140         0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
141         0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
142         0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
143         0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
144         0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
145         0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
146         0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
147         0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
148         0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
149         0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
150         0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
151         0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
152         0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
153         0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
154         0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
155         0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
156         0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
157         0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
158         0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
159         0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
160         0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
161         0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
162         0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
163         0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
164         0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
165         0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
166         0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
167         0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
168         0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
169         0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
170         0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
171         0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
172         0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
173         0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
174         0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
175         0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
176         0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
177         0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
178         0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
179         0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
180         0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
181         0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
182         0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
183         0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
184         0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
185         0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
186         0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
187         0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
188         0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
189         0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
190         0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
191         0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
192         0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
193         0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
194         0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
195         0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
196         0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
197         0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
198         0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
199         0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
200 $code.=<<___;
201 .byte   0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
202 .byte   0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
203 .byte   0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
204 .byte   0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
205 .byte   0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
206 .byte   0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
207 .byte   0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
208 .byte   0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
209 .byte   0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
210 .byte   0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
211 .byte   0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
212 .byte   0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
213 .byte   0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
214 .byte   0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
215 .byte   0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
216 .byte   0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
217 .byte   0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
218 .byte   0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
219 .byte   0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
220 .byte   0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
221 .byte   0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
222 .byte   0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
223 .byte   0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
224 .byte   0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
225 .byte   0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
226 .byte   0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
227 .byte   0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
228 .byte   0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
229 .byte   0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
230 .byte   0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
231 .byte   0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
232 .byte   0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
233 ___
234 &_data_word(
235         0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
236         0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
237         0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
238         0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
239         0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
240         0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
241         0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
242         0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
243         0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
244         0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
245         0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
246         0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
247         0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
248         0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
249         0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
250         0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
251         0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
252         0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
253         0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
254         0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
255         0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
256         0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
257         0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
258         0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
259         0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
260         0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
261         0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
262         0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
263         0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
264         0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
265         0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
266         0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
267         0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
268         0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
269         0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
270         0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
271         0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
272         0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
273         0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
274         0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
275         0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
276         0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
277         0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
278         0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
279         0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
280         0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
281         0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
282         0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
283         0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
284         0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
285         0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
286         0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
287         0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
288         0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
289         0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
290         0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
291         0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
292         0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
293         0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
294         0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
295         0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
296         0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
297         0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
298         0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
299 $code.=<<___;
300 .byte   0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
301 .byte   0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
302 .byte   0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
303 .byte   0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
304 .byte   0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
305 .byte   0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
306 .byte   0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
307 .byte   0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
308 .byte   0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
309 .byte   0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
310 .byte   0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
311 .byte   0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
312 .byte   0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
313 .byte   0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
314 .byte   0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
315 .byte   0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
316 .byte   0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
317 .byte   0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
318 .byte   0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
319 .byte   0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
320 .byte   0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
321 .byte   0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
322 .byte   0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
323 .byte   0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
324 .byte   0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
325 .byte   0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
326 .byte   0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
327 .byte   0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
328 .byte   0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
329 .byte   0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
330 .byte   0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
331 .byte   0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
332
333
334 .globl  .AES_encrypt
335 .align  7
336 .AES_encrypt:
337         $STU    $sp,-$FRAME($sp)
338         mflr    r0
339
340         $PUSH   $toc,`$FRAME-$SIZE_T*20`($sp)
341         $PUSH   r13,`$FRAME-$SIZE_T*19`($sp)
342         $PUSH   r14,`$FRAME-$SIZE_T*18`($sp)
343         $PUSH   r15,`$FRAME-$SIZE_T*17`($sp)
344         $PUSH   r16,`$FRAME-$SIZE_T*16`($sp)
345         $PUSH   r17,`$FRAME-$SIZE_T*15`($sp)
346         $PUSH   r18,`$FRAME-$SIZE_T*14`($sp)
347         $PUSH   r19,`$FRAME-$SIZE_T*13`($sp)
348         $PUSH   r20,`$FRAME-$SIZE_T*12`($sp)
349         $PUSH   r21,`$FRAME-$SIZE_T*11`($sp)
350         $PUSH   r22,`$FRAME-$SIZE_T*10`($sp)
351         $PUSH   r23,`$FRAME-$SIZE_T*9`($sp)
352         $PUSH   r24,`$FRAME-$SIZE_T*8`($sp)
353         $PUSH   r25,`$FRAME-$SIZE_T*7`($sp)
354         $PUSH   r26,`$FRAME-$SIZE_T*6`($sp)
355         $PUSH   r27,`$FRAME-$SIZE_T*5`($sp)
356         $PUSH   r28,`$FRAME-$SIZE_T*4`($sp)
357         $PUSH   r29,`$FRAME-$SIZE_T*3`($sp)
358         $PUSH   r30,`$FRAME-$SIZE_T*2`($sp)
359         $PUSH   r31,`$FRAME-$SIZE_T*1`($sp)
360         $PUSH   r0,`$FRAME+$LRSAVE`($sp)
361
362         andi.   $t0,$inp,3
363         andi.   $t1,$out,3
364         or.     $t0,$t0,$t1
365         bne     Lenc_unaligned
366
367 Lenc_unaligned_ok:
368         lwz     $s0,0($inp)
369         lwz     $s1,4($inp)
370         lwz     $s2,8($inp)
371         lwz     $s3,12($inp)
372         bl      LAES_Te
373         bl      Lppc_AES_encrypt_compact
374         stw     $s0,0($out)
375         stw     $s1,4($out)
376         stw     $s2,8($out)
377         stw     $s3,12($out)
378         b       Lenc_done
379
380 Lenc_unaligned:
381         subfic  $t0,$inp,4096
382         subfic  $t1,$out,4096
383         andi.   $t0,$t0,4096-16
384         beq     Lenc_xpage
385         andi.   $t1,$t1,4096-16
386         bne     Lenc_unaligned_ok
387
388 Lenc_xpage:
389         lbz     $acc00,0($inp)
390         lbz     $acc01,1($inp)
391         lbz     $acc02,2($inp)
392         lbz     $s0,3($inp)
393         lbz     $acc04,4($inp)
394         lbz     $acc05,5($inp)
395         lbz     $acc06,6($inp)
396         lbz     $s1,7($inp)
397         lbz     $acc08,8($inp)
398         lbz     $acc09,9($inp)
399         lbz     $acc10,10($inp)
400         lbz     $s2,11($inp)
401         lbz     $acc12,12($inp)
402         lbz     $acc13,13($inp)
403         lbz     $acc14,14($inp)
404         lbz     $s3,15($inp)
405         insrwi  $s0,$acc00,8,0
406         insrwi  $s1,$acc04,8,0
407         insrwi  $s0,$acc01,8,8
408         insrwi  $s1,$acc05,8,8
409         insrwi  $s0,$acc02,8,16
410         insrwi  $s1,$acc06,8,16
411         insrwi  $s2,$acc08,8,0
412         insrwi  $s3,$acc12,8,0
413         insrwi  $s2,$acc09,8,8
414         insrwi  $s3,$acc13,8,8
415         insrwi  $s2,$acc10,8,16
416         insrwi  $s3,$acc14,8,16
417
418         bl      LAES_Te
419         bl      Lppc_AES_encrypt_compact
420
421         extrwi  $acc00,$s0,8,0
422         extrwi  $acc01,$s0,8,8
423         stb     $acc00,0($out)
424         extrwi  $acc02,$s0,8,16
425         stb     $acc01,1($out)
426         stb     $acc02,2($out)
427         extrwi  $acc04,$s1,8,0
428         stb     $s0,3($out)
429         extrwi  $acc05,$s1,8,8
430         stb     $acc04,4($out)
431         extrwi  $acc06,$s1,8,16
432         stb     $acc05,5($out)
433         stb     $acc06,6($out)
434         extrwi  $acc08,$s2,8,0
435         stb     $s1,7($out)
436         extrwi  $acc09,$s2,8,8
437         stb     $acc08,8($out)
438         extrwi  $acc10,$s2,8,16
439         stb     $acc09,9($out)
440         stb     $acc10,10($out)
441         extrwi  $acc12,$s3,8,0
442         stb     $s2,11($out)
443         extrwi  $acc13,$s3,8,8
444         stb     $acc12,12($out)
445         extrwi  $acc14,$s3,8,16
446         stb     $acc13,13($out)
447         stb     $acc14,14($out)
448         stb     $s3,15($out)
449
450 Lenc_done:
451         $POP    r0,`$FRAME+$LRSAVE`($sp)
452         $POP    $toc,`$FRAME-$SIZE_T*20`($sp)
453         $POP    r13,`$FRAME-$SIZE_T*19`($sp)
454         $POP    r14,`$FRAME-$SIZE_T*18`($sp)
455         $POP    r15,`$FRAME-$SIZE_T*17`($sp)
456         $POP    r16,`$FRAME-$SIZE_T*16`($sp)
457         $POP    r17,`$FRAME-$SIZE_T*15`($sp)
458         $POP    r18,`$FRAME-$SIZE_T*14`($sp)
459         $POP    r19,`$FRAME-$SIZE_T*13`($sp)
460         $POP    r20,`$FRAME-$SIZE_T*12`($sp)
461         $POP    r21,`$FRAME-$SIZE_T*11`($sp)
462         $POP    r22,`$FRAME-$SIZE_T*10`($sp)
463         $POP    r23,`$FRAME-$SIZE_T*9`($sp)
464         $POP    r24,`$FRAME-$SIZE_T*8`($sp)
465         $POP    r25,`$FRAME-$SIZE_T*7`($sp)
466         $POP    r26,`$FRAME-$SIZE_T*6`($sp)
467         $POP    r27,`$FRAME-$SIZE_T*5`($sp)
468         $POP    r28,`$FRAME-$SIZE_T*4`($sp)
469         $POP    r29,`$FRAME-$SIZE_T*3`($sp)
470         $POP    r30,`$FRAME-$SIZE_T*2`($sp)
471         $POP    r31,`$FRAME-$SIZE_T*1`($sp)
472         mtlr    r0
473         addi    $sp,$sp,$FRAME
474         blr
475         .long   0
476         .byte   0,12,4,1,0x80,18,3,0
477         .long   0
478
479 .align  5
480 Lppc_AES_encrypt:
481         lwz     $acc00,240($key)
482         lwz     $t0,0($key)
483         lwz     $t1,4($key)
484         lwz     $t2,8($key)
485         lwz     $t3,12($key)
486         addi    $Tbl1,$Tbl0,3
487         addi    $Tbl2,$Tbl0,2
488         addi    $Tbl3,$Tbl0,1
489         addi    $acc00,$acc00,-1
490         addi    $key,$key,16
491         xor     $s0,$s0,$t0
492         xor     $s1,$s1,$t1
493         xor     $s2,$s2,$t2
494         xor     $s3,$s3,$t3
495         mtctr   $acc00
496 .align  4
497 Lenc_loop:
498         rlwinm  $acc00,$s0,`32-24+3`,21,28
499         rlwinm  $acc01,$s1,`32-24+3`,21,28
500         rlwinm  $acc02,$s2,`32-24+3`,21,28
501         rlwinm  $acc03,$s3,`32-24+3`,21,28
502         lwz     $t0,0($key)
503         lwz     $t1,4($key)
504         rlwinm  $acc04,$s1,`32-16+3`,21,28
505         rlwinm  $acc05,$s2,`32-16+3`,21,28
506         lwz     $t2,8($key)
507         lwz     $t3,12($key)
508         rlwinm  $acc06,$s3,`32-16+3`,21,28
509         rlwinm  $acc07,$s0,`32-16+3`,21,28
510         lwzx    $acc00,$Tbl0,$acc00
511         lwzx    $acc01,$Tbl0,$acc01
512         rlwinm  $acc08,$s2,`32-8+3`,21,28
513         rlwinm  $acc09,$s3,`32-8+3`,21,28
514         lwzx    $acc02,$Tbl0,$acc02
515         lwzx    $acc03,$Tbl0,$acc03
516         rlwinm  $acc10,$s0,`32-8+3`,21,28
517         rlwinm  $acc11,$s1,`32-8+3`,21,28
518         lwzx    $acc04,$Tbl1,$acc04
519         lwzx    $acc05,$Tbl1,$acc05
520         rlwinm  $acc12,$s3,`0+3`,21,28
521         rlwinm  $acc13,$s0,`0+3`,21,28
522         lwzx    $acc06,$Tbl1,$acc06
523         lwzx    $acc07,$Tbl1,$acc07
524         rlwinm  $acc14,$s1,`0+3`,21,28
525         rlwinm  $acc15,$s2,`0+3`,21,28
526         lwzx    $acc08,$Tbl2,$acc08
527         lwzx    $acc09,$Tbl2,$acc09
528         xor     $t0,$t0,$acc00
529         xor     $t1,$t1,$acc01
530         lwzx    $acc10,$Tbl2,$acc10
531         lwzx    $acc11,$Tbl2,$acc11
532         xor     $t2,$t2,$acc02
533         xor     $t3,$t3,$acc03
534         lwzx    $acc12,$Tbl3,$acc12
535         lwzx    $acc13,$Tbl3,$acc13
536         xor     $t0,$t0,$acc04
537         xor     $t1,$t1,$acc05
538         lwzx    $acc14,$Tbl3,$acc14
539         lwzx    $acc15,$Tbl3,$acc15
540         xor     $t2,$t2,$acc06
541         xor     $t3,$t3,$acc07
542         xor     $t0,$t0,$acc08
543         xor     $t1,$t1,$acc09
544         xor     $t2,$t2,$acc10
545         xor     $t3,$t3,$acc11
546         xor     $s0,$t0,$acc12
547         xor     $s1,$t1,$acc13
548         xor     $s2,$t2,$acc14
549         xor     $s3,$t3,$acc15
550         addi    $key,$key,16
551         bdnz-   Lenc_loop
552
553         addi    $Tbl2,$Tbl0,2048
554         nop
555         lwz     $t0,0($key)
556         lwz     $t1,4($key)
557         rlwinm  $acc00,$s0,`32-24`,24,31
558         rlwinm  $acc01,$s1,`32-24`,24,31
559         lwz     $t2,8($key)
560         lwz     $t3,12($key)
561         rlwinm  $acc02,$s2,`32-24`,24,31
562         rlwinm  $acc03,$s3,`32-24`,24,31
563         lwz     $acc08,`2048+0`($Tbl0)  ! prefetch Te4
564         lwz     $acc09,`2048+32`($Tbl0)
565         rlwinm  $acc04,$s1,`32-16`,24,31
566         rlwinm  $acc05,$s2,`32-16`,24,31
567         lwz     $acc10,`2048+64`($Tbl0)
568         lwz     $acc11,`2048+96`($Tbl0)
569         rlwinm  $acc06,$s3,`32-16`,24,31
570         rlwinm  $acc07,$s0,`32-16`,24,31
571         lwz     $acc12,`2048+128`($Tbl0)
572         lwz     $acc13,`2048+160`($Tbl0)
573         rlwinm  $acc08,$s2,`32-8`,24,31
574         rlwinm  $acc09,$s3,`32-8`,24,31
575         lwz     $acc14,`2048+192`($Tbl0)
576         lwz     $acc15,`2048+224`($Tbl0)
577         rlwinm  $acc10,$s0,`32-8`,24,31
578         rlwinm  $acc11,$s1,`32-8`,24,31
579         lbzx    $acc00,$Tbl2,$acc00
580         lbzx    $acc01,$Tbl2,$acc01
581         rlwinm  $acc12,$s3,`0`,24,31
582         rlwinm  $acc13,$s0,`0`,24,31
583         lbzx    $acc02,$Tbl2,$acc02
584         lbzx    $acc03,$Tbl2,$acc03
585         rlwinm  $acc14,$s1,`0`,24,31
586         rlwinm  $acc15,$s2,`0`,24,31
587         lbzx    $acc04,$Tbl2,$acc04
588         lbzx    $acc05,$Tbl2,$acc05
589         rlwinm  $s0,$acc00,24,0,7
590         rlwinm  $s1,$acc01,24,0,7
591         lbzx    $acc06,$Tbl2,$acc06
592         lbzx    $acc07,$Tbl2,$acc07
593         rlwinm  $s2,$acc02,24,0,7
594         rlwinm  $s3,$acc03,24,0,7
595         lbzx    $acc08,$Tbl2,$acc08
596         lbzx    $acc09,$Tbl2,$acc09
597         rlwimi  $s0,$acc04,16,8,15
598         rlwimi  $s1,$acc05,16,8,15
599         lbzx    $acc10,$Tbl2,$acc10
600         lbzx    $acc11,$Tbl2,$acc11
601         rlwimi  $s2,$acc06,16,8,15
602         rlwimi  $s3,$acc07,16,8,15
603         lbzx    $acc12,$Tbl2,$acc12
604         lbzx    $acc13,$Tbl2,$acc13
605         rlwimi  $s0,$acc08,8,16,23
606         rlwimi  $s1,$acc09,8,16,23
607         lbzx    $acc14,$Tbl2,$acc14
608         lbzx    $acc15,$Tbl2,$acc15
609         rlwimi  $s2,$acc10,8,16,23
610         rlwimi  $s3,$acc11,8,16,23
611         or      $s0,$s0,$acc12
612         or      $s1,$s1,$acc13
613         or      $s2,$s2,$acc14
614         or      $s3,$s3,$acc15
615         xor     $s0,$s0,$t0
616         xor     $s1,$s1,$t1
617         xor     $s2,$s2,$t2
618         xor     $s3,$s3,$t3
619         blr
620         .long   0
621         .byte   0,12,0x14,0,0,0,0,0
622
623 .align  4
624 Lppc_AES_encrypt_compact:
625         lwz     $acc00,240($key)
626         lwz     $t0,0($key)
627         lwz     $t1,4($key)
628         lwz     $t2,8($key)
629         lwz     $t3,12($key)
630         addi    $Tbl1,$Tbl0,2048
631         lis     $mask80,0x8080
632         lis     $mask1b,0x1b1b
633         addi    $key,$key,16
634         ori     $mask80,$mask80,0x8080
635         ori     $mask1b,$mask1b,0x1b1b
636         mtctr   $acc00
637 .align  4
638 Lenc_compact_loop:
639         xor     $s0,$s0,$t0
640         xor     $s1,$s1,$t1
641         xor     $s2,$s2,$t2
642         xor     $s3,$s3,$t3
643         rlwinm  $acc00,$s0,`32-24`,24,31
644         rlwinm  $acc01,$s1,`32-24`,24,31
645         rlwinm  $acc02,$s2,`32-24`,24,31
646         rlwinm  $acc03,$s3,`32-24`,24,31
647         rlwinm  $acc04,$s1,`32-16`,24,31
648         rlwinm  $acc05,$s2,`32-16`,24,31
649         rlwinm  $acc06,$s3,`32-16`,24,31
650         rlwinm  $acc07,$s0,`32-16`,24,31
651         lbzx    $acc00,$Tbl1,$acc00
652         lbzx    $acc01,$Tbl1,$acc01
653         rlwinm  $acc08,$s2,`32-8`,24,31
654         rlwinm  $acc09,$s3,`32-8`,24,31
655         lbzx    $acc02,$Tbl1,$acc02
656         lbzx    $acc03,$Tbl1,$acc03
657         rlwinm  $acc10,$s0,`32-8`,24,31
658         rlwinm  $acc11,$s1,`32-8`,24,31
659         lbzx    $acc04,$Tbl1,$acc04
660         lbzx    $acc05,$Tbl1,$acc05
661         rlwinm  $acc12,$s3,`0`,24,31
662         rlwinm  $acc13,$s0,`0`,24,31
663         lbzx    $acc06,$Tbl1,$acc06
664         lbzx    $acc07,$Tbl1,$acc07
665         rlwinm  $acc14,$s1,`0`,24,31
666         rlwinm  $acc15,$s2,`0`,24,31
667         lbzx    $acc08,$Tbl1,$acc08
668         lbzx    $acc09,$Tbl1,$acc09
669         rlwinm  $s0,$acc00,24,0,7
670         rlwinm  $s1,$acc01,24,0,7
671         lbzx    $acc10,$Tbl1,$acc10
672         lbzx    $acc11,$Tbl1,$acc11
673         rlwinm  $s2,$acc02,24,0,7
674         rlwinm  $s3,$acc03,24,0,7
675         lbzx    $acc12,$Tbl1,$acc12
676         lbzx    $acc13,$Tbl1,$acc13
677         rlwimi  $s0,$acc04,16,8,15
678         rlwimi  $s1,$acc05,16,8,15
679         lbzx    $acc14,$Tbl1,$acc14
680         lbzx    $acc15,$Tbl1,$acc15
681         rlwimi  $s2,$acc06,16,8,15
682         rlwimi  $s3,$acc07,16,8,15
683         rlwimi  $s0,$acc08,8,16,23
684         rlwimi  $s1,$acc09,8,16,23
685         rlwimi  $s2,$acc10,8,16,23
686         rlwimi  $s3,$acc11,8,16,23
687         lwz     $t0,0($key)
688         lwz     $t1,4($key)
689         or      $s0,$s0,$acc12
690         or      $s1,$s1,$acc13
691         lwz     $t2,8($key)
692         lwz     $t3,12($key)
693         or      $s2,$s2,$acc14
694         or      $s3,$s3,$acc15
695
696         addi    $key,$key,16
697         bdz     Lenc_compact_done
698
699         and     $acc00,$s0,$mask80      # r1=r0&0x80808080
700         and     $acc01,$s1,$mask80
701         and     $acc02,$s2,$mask80
702         and     $acc03,$s3,$mask80
703         srwi    $acc04,$acc00,7         # r1>>7
704         srwi    $acc05,$acc01,7
705         srwi    $acc06,$acc02,7
706         srwi    $acc07,$acc03,7
707         andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
708         andc    $acc09,$s1,$mask80
709         andc    $acc10,$s2,$mask80
710         andc    $acc11,$s3,$mask80
711         sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
712         sub     $acc01,$acc01,$acc05
713         sub     $acc02,$acc02,$acc06
714         sub     $acc03,$acc03,$acc07
715         add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
716         add     $acc09,$acc09,$acc09
717         add     $acc10,$acc10,$acc10
718         add     $acc11,$acc11,$acc11
719         and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
720         and     $acc01,$acc01,$mask1b
721         and     $acc02,$acc02,$mask1b
722         and     $acc03,$acc03,$mask1b
723         xor     $acc00,$acc00,$acc08    # r2
724         xor     $acc01,$acc01,$acc09
725         xor     $acc02,$acc02,$acc10
726         xor     $acc03,$acc03,$acc11
727
728         rotlwi  $acc12,$s0,16           # ROTATE(r0,16)
729         rotlwi  $acc13,$s1,16
730         rotlwi  $acc14,$s2,16
731         rotlwi  $acc15,$s3,16
732         xor     $s0,$s0,$acc00          # r0^r2
733         xor     $s1,$s1,$acc01
734         xor     $s2,$s2,$acc02
735         xor     $s3,$s3,$acc03
736         rotrwi  $s0,$s0,24              # ROTATE(r2^r0,24)
737         rotrwi  $s1,$s1,24
738         rotrwi  $s2,$s2,24
739         rotrwi  $s3,$s3,24
740         xor     $s0,$s0,$acc00          # ROTATE(r2^r0,24)^r2
741         xor     $s1,$s1,$acc01
742         xor     $s2,$s2,$acc02
743         xor     $s3,$s3,$acc03
744         rotlwi  $acc08,$acc12,8         # ROTATE(r0,24)
745         rotlwi  $acc09,$acc13,8
746         rotlwi  $acc10,$acc14,8
747         rotlwi  $acc11,$acc15,8
748         xor     $s0,$s0,$acc12          #
749         xor     $s1,$s1,$acc13
750         xor     $s2,$s2,$acc14
751         xor     $s3,$s3,$acc15
752         xor     $s0,$s0,$acc08          #
753         xor     $s1,$s1,$acc09
754         xor     $s2,$s2,$acc10
755         xor     $s3,$s3,$acc11
756
757         b       Lenc_compact_loop
758 .align  4
759 Lenc_compact_done:
760         xor     $s0,$s0,$t0
761         xor     $s1,$s1,$t1
762         xor     $s2,$s2,$t2
763         xor     $s3,$s3,$t3
764         blr
765         .long   0
766         .byte   0,12,0x14,0,0,0,0,0
767
768 .globl  .AES_decrypt
769 .align  7
770 .AES_decrypt:
771         $STU    $sp,-$FRAME($sp)
772         mflr    r0
773
774         $PUSH   $toc,`$FRAME-$SIZE_T*20`($sp)
775         $PUSH   r13,`$FRAME-$SIZE_T*19`($sp)
776         $PUSH   r14,`$FRAME-$SIZE_T*18`($sp)
777         $PUSH   r15,`$FRAME-$SIZE_T*17`($sp)
778         $PUSH   r16,`$FRAME-$SIZE_T*16`($sp)
779         $PUSH   r17,`$FRAME-$SIZE_T*15`($sp)
780         $PUSH   r18,`$FRAME-$SIZE_T*14`($sp)
781         $PUSH   r19,`$FRAME-$SIZE_T*13`($sp)
782         $PUSH   r20,`$FRAME-$SIZE_T*12`($sp)
783         $PUSH   r21,`$FRAME-$SIZE_T*11`($sp)
784         $PUSH   r22,`$FRAME-$SIZE_T*10`($sp)
785         $PUSH   r23,`$FRAME-$SIZE_T*9`($sp)
786         $PUSH   r24,`$FRAME-$SIZE_T*8`($sp)
787         $PUSH   r25,`$FRAME-$SIZE_T*7`($sp)
788         $PUSH   r26,`$FRAME-$SIZE_T*6`($sp)
789         $PUSH   r27,`$FRAME-$SIZE_T*5`($sp)
790         $PUSH   r28,`$FRAME-$SIZE_T*4`($sp)
791         $PUSH   r29,`$FRAME-$SIZE_T*3`($sp)
792         $PUSH   r30,`$FRAME-$SIZE_T*2`($sp)
793         $PUSH   r31,`$FRAME-$SIZE_T*1`($sp)
794         $PUSH   r0,`$FRAME+$LRSAVE`($sp)
795
796         andi.   $t0,$inp,3
797         andi.   $t1,$out,3
798         or.     $t0,$t0,$t1
799         bne     Ldec_unaligned
800
801 Ldec_unaligned_ok:
802         lwz     $s0,0($inp)
803         lwz     $s1,4($inp)
804         lwz     $s2,8($inp)
805         lwz     $s3,12($inp)
806         bl      LAES_Td
807         bl      Lppc_AES_decrypt_compact
808         stw     $s0,0($out)
809         stw     $s1,4($out)
810         stw     $s2,8($out)
811         stw     $s3,12($out)
812         b       Ldec_done
813
814 Ldec_unaligned:
815         subfic  $t0,$inp,4096
816         subfic  $t1,$out,4096
817         andi.   $t0,$t0,4096-16
818         beq     Ldec_xpage
819         andi.   $t1,$t1,4096-16
820         bne     Ldec_unaligned_ok
821
822 Ldec_xpage:
823         lbz     $acc00,0($inp)
824         lbz     $acc01,1($inp)
825         lbz     $acc02,2($inp)
826         lbz     $s0,3($inp)
827         lbz     $acc04,4($inp)
828         lbz     $acc05,5($inp)
829         lbz     $acc06,6($inp)
830         lbz     $s1,7($inp)
831         lbz     $acc08,8($inp)
832         lbz     $acc09,9($inp)
833         lbz     $acc10,10($inp)
834         lbz     $s2,11($inp)
835         lbz     $acc12,12($inp)
836         lbz     $acc13,13($inp)
837         lbz     $acc14,14($inp)
838         lbz     $s3,15($inp)
839         insrwi  $s0,$acc00,8,0
840         insrwi  $s1,$acc04,8,0
841         insrwi  $s0,$acc01,8,8
842         insrwi  $s1,$acc05,8,8
843         insrwi  $s0,$acc02,8,16
844         insrwi  $s1,$acc06,8,16
845         insrwi  $s2,$acc08,8,0
846         insrwi  $s3,$acc12,8,0
847         insrwi  $s2,$acc09,8,8
848         insrwi  $s3,$acc13,8,8
849         insrwi  $s2,$acc10,8,16
850         insrwi  $s3,$acc14,8,16
851
852         bl      LAES_Td
853         bl      Lppc_AES_decrypt_compact
854
855         extrwi  $acc00,$s0,8,0
856         extrwi  $acc01,$s0,8,8
857         stb     $acc00,0($out)
858         extrwi  $acc02,$s0,8,16
859         stb     $acc01,1($out)
860         stb     $acc02,2($out)
861         extrwi  $acc04,$s1,8,0
862         stb     $s0,3($out)
863         extrwi  $acc05,$s1,8,8
864         stb     $acc04,4($out)
865         extrwi  $acc06,$s1,8,16
866         stb     $acc05,5($out)
867         stb     $acc06,6($out)
868         extrwi  $acc08,$s2,8,0
869         stb     $s1,7($out)
870         extrwi  $acc09,$s2,8,8
871         stb     $acc08,8($out)
872         extrwi  $acc10,$s2,8,16
873         stb     $acc09,9($out)
874         stb     $acc10,10($out)
875         extrwi  $acc12,$s3,8,0
876         stb     $s2,11($out)
877         extrwi  $acc13,$s3,8,8
878         stb     $acc12,12($out)
879         extrwi  $acc14,$s3,8,16
880         stb     $acc13,13($out)
881         stb     $acc14,14($out)
882         stb     $s3,15($out)
883
884 Ldec_done:
885         $POP    r0,`$FRAME+$LRSAVE`($sp)
886         $POP    $toc,`$FRAME-$SIZE_T*20`($sp)
887         $POP    r13,`$FRAME-$SIZE_T*19`($sp)
888         $POP    r14,`$FRAME-$SIZE_T*18`($sp)
889         $POP    r15,`$FRAME-$SIZE_T*17`($sp)
890         $POP    r16,`$FRAME-$SIZE_T*16`($sp)
891         $POP    r17,`$FRAME-$SIZE_T*15`($sp)
892         $POP    r18,`$FRAME-$SIZE_T*14`($sp)
893         $POP    r19,`$FRAME-$SIZE_T*13`($sp)
894         $POP    r20,`$FRAME-$SIZE_T*12`($sp)
895         $POP    r21,`$FRAME-$SIZE_T*11`($sp)
896         $POP    r22,`$FRAME-$SIZE_T*10`($sp)
897         $POP    r23,`$FRAME-$SIZE_T*9`($sp)
898         $POP    r24,`$FRAME-$SIZE_T*8`($sp)
899         $POP    r25,`$FRAME-$SIZE_T*7`($sp)
900         $POP    r26,`$FRAME-$SIZE_T*6`($sp)
901         $POP    r27,`$FRAME-$SIZE_T*5`($sp)
902         $POP    r28,`$FRAME-$SIZE_T*4`($sp)
903         $POP    r29,`$FRAME-$SIZE_T*3`($sp)
904         $POP    r30,`$FRAME-$SIZE_T*2`($sp)
905         $POP    r31,`$FRAME-$SIZE_T*1`($sp)
906         mtlr    r0
907         addi    $sp,$sp,$FRAME
908         blr
909         .long   0
910         .byte   0,12,4,1,0x80,18,3,0
911         .long   0
912
913 .align  5
914 Lppc_AES_decrypt:
915         lwz     $acc00,240($key)
916         lwz     $t0,0($key)
917         lwz     $t1,4($key)
918         lwz     $t2,8($key)
919         lwz     $t3,12($key)
920         addi    $Tbl1,$Tbl0,3
921         addi    $Tbl2,$Tbl0,2
922         addi    $Tbl3,$Tbl0,1
923         addi    $acc00,$acc00,-1
924         addi    $key,$key,16
925         xor     $s0,$s0,$t0
926         xor     $s1,$s1,$t1
927         xor     $s2,$s2,$t2
928         xor     $s3,$s3,$t3
929         mtctr   $acc00
930 .align  4
931 Ldec_loop:
932         rlwinm  $acc00,$s0,`32-24+3`,21,28
933         rlwinm  $acc01,$s1,`32-24+3`,21,28
934         rlwinm  $acc02,$s2,`32-24+3`,21,28
935         rlwinm  $acc03,$s3,`32-24+3`,21,28
936         lwz     $t0,0($key)
937         lwz     $t1,4($key)
938         rlwinm  $acc04,$s3,`32-16+3`,21,28
939         rlwinm  $acc05,$s0,`32-16+3`,21,28
940         lwz     $t2,8($key)
941         lwz     $t3,12($key)
942         rlwinm  $acc06,$s1,`32-16+3`,21,28
943         rlwinm  $acc07,$s2,`32-16+3`,21,28
944         lwzx    $acc00,$Tbl0,$acc00
945         lwzx    $acc01,$Tbl0,$acc01
946         rlwinm  $acc08,$s2,`32-8+3`,21,28
947         rlwinm  $acc09,$s3,`32-8+3`,21,28
948         lwzx    $acc02,$Tbl0,$acc02
949         lwzx    $acc03,$Tbl0,$acc03
950         rlwinm  $acc10,$s0,`32-8+3`,21,28
951         rlwinm  $acc11,$s1,`32-8+3`,21,28
952         lwzx    $acc04,$Tbl1,$acc04
953         lwzx    $acc05,$Tbl1,$acc05
954         rlwinm  $acc12,$s1,`0+3`,21,28
955         rlwinm  $acc13,$s2,`0+3`,21,28
956         lwzx    $acc06,$Tbl1,$acc06
957         lwzx    $acc07,$Tbl1,$acc07
958         rlwinm  $acc14,$s3,`0+3`,21,28
959         rlwinm  $acc15,$s0,`0+3`,21,28
960         lwzx    $acc08,$Tbl2,$acc08
961         lwzx    $acc09,$Tbl2,$acc09
962         xor     $t0,$t0,$acc00
963         xor     $t1,$t1,$acc01
964         lwzx    $acc10,$Tbl2,$acc10
965         lwzx    $acc11,$Tbl2,$acc11
966         xor     $t2,$t2,$acc02
967         xor     $t3,$t3,$acc03
968         lwzx    $acc12,$Tbl3,$acc12
969         lwzx    $acc13,$Tbl3,$acc13
970         xor     $t0,$t0,$acc04
971         xor     $t1,$t1,$acc05
972         lwzx    $acc14,$Tbl3,$acc14
973         lwzx    $acc15,$Tbl3,$acc15
974         xor     $t2,$t2,$acc06
975         xor     $t3,$t3,$acc07
976         xor     $t0,$t0,$acc08
977         xor     $t1,$t1,$acc09
978         xor     $t2,$t2,$acc10
979         xor     $t3,$t3,$acc11
980         xor     $s0,$t0,$acc12
981         xor     $s1,$t1,$acc13
982         xor     $s2,$t2,$acc14
983         xor     $s3,$t3,$acc15
984         addi    $key,$key,16
985         bdnz-   Ldec_loop
986
987         addi    $Tbl2,$Tbl0,2048
988         nop
989         lwz     $t0,0($key)
990         lwz     $t1,4($key)
991         rlwinm  $acc00,$s0,`32-24`,24,31
992         rlwinm  $acc01,$s1,`32-24`,24,31
993         lwz     $t2,8($key)
994         lwz     $t3,12($key)
995         rlwinm  $acc02,$s2,`32-24`,24,31
996         rlwinm  $acc03,$s3,`32-24`,24,31
997         lwz     $acc08,`2048+0`($Tbl0)  ! prefetch Td4
998         lwz     $acc09,`2048+32`($Tbl0)
999         rlwinm  $acc04,$s3,`32-16`,24,31
1000         rlwinm  $acc05,$s0,`32-16`,24,31
1001         lwz     $acc10,`2048+64`($Tbl0)
1002         lwz     $acc11,`2048+96`($Tbl0)
1003         lbzx    $acc00,$Tbl2,$acc00
1004         lbzx    $acc01,$Tbl2,$acc01
1005         lwz     $acc12,`2048+128`($Tbl0)
1006         lwz     $acc13,`2048+160`($Tbl0)
1007         rlwinm  $acc06,$s1,`32-16`,24,31
1008         rlwinm  $acc07,$s2,`32-16`,24,31
1009         lwz     $acc14,`2048+192`($Tbl0)
1010         lwz     $acc15,`2048+224`($Tbl0)
1011         rlwinm  $acc08,$s2,`32-8`,24,31
1012         rlwinm  $acc09,$s3,`32-8`,24,31
1013         lbzx    $acc02,$Tbl2,$acc02
1014         lbzx    $acc03,$Tbl2,$acc03
1015         rlwinm  $acc10,$s0,`32-8`,24,31
1016         rlwinm  $acc11,$s1,`32-8`,24,31
1017         lbzx    $acc04,$Tbl2,$acc04
1018         lbzx    $acc05,$Tbl2,$acc05
1019         rlwinm  $acc12,$s1,`0`,24,31
1020         rlwinm  $acc13,$s2,`0`,24,31
1021         lbzx    $acc06,$Tbl2,$acc06
1022         lbzx    $acc07,$Tbl2,$acc07
1023         rlwinm  $acc14,$s3,`0`,24,31
1024         rlwinm  $acc15,$s0,`0`,24,31
1025         lbzx    $acc08,$Tbl2,$acc08
1026         lbzx    $acc09,$Tbl2,$acc09
1027         rlwinm  $s0,$acc00,24,0,7
1028         rlwinm  $s1,$acc01,24,0,7
1029         lbzx    $acc10,$Tbl2,$acc10
1030         lbzx    $acc11,$Tbl2,$acc11
1031         rlwinm  $s2,$acc02,24,0,7
1032         rlwinm  $s3,$acc03,24,0,7
1033         lbzx    $acc12,$Tbl2,$acc12
1034         lbzx    $acc13,$Tbl2,$acc13
1035         rlwimi  $s0,$acc04,16,8,15
1036         rlwimi  $s1,$acc05,16,8,15
1037         lbzx    $acc14,$Tbl2,$acc14
1038         lbzx    $acc15,$Tbl2,$acc15
1039         rlwimi  $s2,$acc06,16,8,15
1040         rlwimi  $s3,$acc07,16,8,15
1041         rlwimi  $s0,$acc08,8,16,23
1042         rlwimi  $s1,$acc09,8,16,23
1043         rlwimi  $s2,$acc10,8,16,23
1044         rlwimi  $s3,$acc11,8,16,23
1045         or      $s0,$s0,$acc12
1046         or      $s1,$s1,$acc13
1047         or      $s2,$s2,$acc14
1048         or      $s3,$s3,$acc15
1049         xor     $s0,$s0,$t0
1050         xor     $s1,$s1,$t1
1051         xor     $s2,$s2,$t2
1052         xor     $s3,$s3,$t3
1053         blr
1054         .long   0
1055         .byte   0,12,0x14,0,0,0,0,0
1056
1057 .align  4
1058 Lppc_AES_decrypt_compact:
1059         lwz     $acc00,240($key)
1060         lwz     $t0,0($key)
1061         lwz     $t1,4($key)
1062         lwz     $t2,8($key)
1063         lwz     $t3,12($key)
1064         addi    $Tbl1,$Tbl0,2048
1065         lis     $mask80,0x8080
1066         lis     $mask1b,0x1b1b
1067         addi    $key,$key,16
1068         ori     $mask80,$mask80,0x8080
1069         ori     $mask1b,$mask1b,0x1b1b
1070 ___
1071 $code.=<<___ if ($SIZE_T==8);
1072         insrdi  $mask80,$mask80,32,0
1073         insrdi  $mask1b,$mask1b,32,0
1074 ___
1075 $code.=<<___;
1076         mtctr   $acc00
1077 .align  4
1078 Ldec_compact_loop:
1079         xor     $s0,$s0,$t0
1080         xor     $s1,$s1,$t1
1081         xor     $s2,$s2,$t2
1082         xor     $s3,$s3,$t3
1083         rlwinm  $acc00,$s0,`32-24`,24,31
1084         rlwinm  $acc01,$s1,`32-24`,24,31
1085         rlwinm  $acc02,$s2,`32-24`,24,31
1086         rlwinm  $acc03,$s3,`32-24`,24,31
1087         rlwinm  $acc04,$s3,`32-16`,24,31
1088         rlwinm  $acc05,$s0,`32-16`,24,31
1089         rlwinm  $acc06,$s1,`32-16`,24,31
1090         rlwinm  $acc07,$s2,`32-16`,24,31
1091         lbzx    $acc00,$Tbl1,$acc00
1092         lbzx    $acc01,$Tbl1,$acc01
1093         rlwinm  $acc08,$s2,`32-8`,24,31
1094         rlwinm  $acc09,$s3,`32-8`,24,31
1095         lbzx    $acc02,$Tbl1,$acc02
1096         lbzx    $acc03,$Tbl1,$acc03
1097         rlwinm  $acc10,$s0,`32-8`,24,31
1098         rlwinm  $acc11,$s1,`32-8`,24,31
1099         lbzx    $acc04,$Tbl1,$acc04
1100         lbzx    $acc05,$Tbl1,$acc05
1101         rlwinm  $acc12,$s1,`0`,24,31
1102         rlwinm  $acc13,$s2,`0`,24,31
1103         lbzx    $acc06,$Tbl1,$acc06
1104         lbzx    $acc07,$Tbl1,$acc07
1105         rlwinm  $acc14,$s3,`0`,24,31
1106         rlwinm  $acc15,$s0,`0`,24,31
1107         lbzx    $acc08,$Tbl1,$acc08
1108         lbzx    $acc09,$Tbl1,$acc09
1109         rlwinm  $s0,$acc00,24,0,7
1110         rlwinm  $s1,$acc01,24,0,7
1111         lbzx    $acc10,$Tbl1,$acc10
1112         lbzx    $acc11,$Tbl1,$acc11
1113         rlwinm  $s2,$acc02,24,0,7
1114         rlwinm  $s3,$acc03,24,0,7
1115         lbzx    $acc12,$Tbl1,$acc12
1116         lbzx    $acc13,$Tbl1,$acc13
1117         rlwimi  $s0,$acc04,16,8,15
1118         rlwimi  $s1,$acc05,16,8,15
1119         lbzx    $acc14,$Tbl1,$acc14
1120         lbzx    $acc15,$Tbl1,$acc15
1121         rlwimi  $s2,$acc06,16,8,15
1122         rlwimi  $s3,$acc07,16,8,15
1123         rlwimi  $s0,$acc08,8,16,23
1124         rlwimi  $s1,$acc09,8,16,23
1125         rlwimi  $s2,$acc10,8,16,23
1126         rlwimi  $s3,$acc11,8,16,23
1127         lwz     $t0,0($key)
1128         lwz     $t1,4($key)
1129         or      $s0,$s0,$acc12
1130         or      $s1,$s1,$acc13
1131         lwz     $t2,8($key)
1132         lwz     $t3,12($key)
1133         or      $s2,$s2,$acc14
1134         or      $s3,$s3,$acc15
1135
1136         addi    $key,$key,16
1137         bdz     Ldec_compact_done
1138 ___
1139 $code.=<<___ if ($SIZE_T==8);
1140         # vectorized permutation improves decrypt performance by 10%
1141         insrdi  $s0,$s1,32,0
1142         insrdi  $s2,$s3,32,0
1143
1144         and     $acc00,$s0,$mask80      # r1=r0&0x80808080
1145         and     $acc02,$s2,$mask80
1146         srdi    $acc04,$acc00,7         # r1>>7
1147         srdi    $acc06,$acc02,7
1148         andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
1149         andc    $acc10,$s2,$mask80
1150         sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
1151         sub     $acc02,$acc02,$acc06
1152         add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
1153         add     $acc10,$acc10,$acc10
1154         and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1155         and     $acc02,$acc02,$mask1b
1156         xor     $acc00,$acc00,$acc08    # r2
1157         xor     $acc02,$acc02,$acc10
1158
1159         and     $acc04,$acc00,$mask80   # r1=r2&0x80808080
1160         and     $acc06,$acc02,$mask80
1161         srdi    $acc08,$acc04,7         # r1>>7
1162         srdi    $acc10,$acc06,7
1163         andc    $acc12,$acc00,$mask80   # r2&0x7f7f7f7f
1164         andc    $acc14,$acc02,$mask80
1165         sub     $acc04,$acc04,$acc08    # r1-(r1>>7)
1166         sub     $acc06,$acc06,$acc10
1167         add     $acc12,$acc12,$acc12    # (r2&0x7f7f7f7f)<<1
1168         add     $acc14,$acc14,$acc14
1169         and     $acc04,$acc04,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1170         and     $acc06,$acc06,$mask1b
1171         xor     $acc04,$acc04,$acc12    # r4
1172         xor     $acc06,$acc06,$acc14
1173
1174         and     $acc08,$acc04,$mask80   # r1=r4&0x80808080
1175         and     $acc10,$acc06,$mask80
1176         srdi    $acc12,$acc08,7         # r1>>7
1177         srdi    $acc14,$acc10,7
1178         sub     $acc08,$acc08,$acc12    # r1-(r1>>7)
1179         sub     $acc10,$acc10,$acc14
1180         andc    $acc12,$acc04,$mask80   # r4&0x7f7f7f7f
1181         andc    $acc14,$acc06,$mask80
1182         add     $acc12,$acc12,$acc12    # (r4&0x7f7f7f7f)<<1
1183         add     $acc14,$acc14,$acc14
1184         and     $acc08,$acc08,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1185         and     $acc10,$acc10,$mask1b
1186         xor     $acc08,$acc08,$acc12    # r8
1187         xor     $acc10,$acc10,$acc14
1188
1189         xor     $acc00,$acc00,$s0       # r2^r0
1190         xor     $acc02,$acc02,$s2
1191         xor     $acc04,$acc04,$s0       # r4^r0
1192         xor     $acc06,$acc06,$s2
1193
1194         extrdi  $acc01,$acc00,32,0
1195         extrdi  $acc03,$acc02,32,0
1196         extrdi  $acc05,$acc04,32,0
1197         extrdi  $acc07,$acc06,32,0
1198         extrdi  $acc09,$acc08,32,0
1199         extrdi  $acc11,$acc10,32,0
1200 ___
1201 $code.=<<___ if ($SIZE_T==4);
1202         and     $acc00,$s0,$mask80      # r1=r0&0x80808080
1203         and     $acc01,$s1,$mask80
1204         and     $acc02,$s2,$mask80
1205         and     $acc03,$s3,$mask80
1206         srwi    $acc04,$acc00,7         # r1>>7
1207         srwi    $acc05,$acc01,7
1208         srwi    $acc06,$acc02,7
1209         srwi    $acc07,$acc03,7
1210         andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
1211         andc    $acc09,$s1,$mask80
1212         andc    $acc10,$s2,$mask80
1213         andc    $acc11,$s3,$mask80
1214         sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
1215         sub     $acc01,$acc01,$acc05
1216         sub     $acc02,$acc02,$acc06
1217         sub     $acc03,$acc03,$acc07
1218         add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
1219         add     $acc09,$acc09,$acc09
1220         add     $acc10,$acc10,$acc10
1221         add     $acc11,$acc11,$acc11
1222         and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1223         and     $acc01,$acc01,$mask1b
1224         and     $acc02,$acc02,$mask1b
1225         and     $acc03,$acc03,$mask1b
1226         xor     $acc00,$acc00,$acc08    # r2
1227         xor     $acc01,$acc01,$acc09
1228         xor     $acc02,$acc02,$acc10
1229         xor     $acc03,$acc03,$acc11
1230
1231         and     $acc04,$acc00,$mask80   # r1=r2&0x80808080
1232         and     $acc05,$acc01,$mask80
1233         and     $acc06,$acc02,$mask80
1234         and     $acc07,$acc03,$mask80
1235         srwi    $acc08,$acc04,7         # r1>>7
1236         srwi    $acc09,$acc05,7
1237         srwi    $acc10,$acc06,7
1238         srwi    $acc11,$acc07,7
1239         andc    $acc12,$acc00,$mask80   # r2&0x7f7f7f7f
1240         andc    $acc13,$acc01,$mask80
1241         andc    $acc14,$acc02,$mask80
1242         andc    $acc15,$acc03,$mask80
1243         sub     $acc04,$acc04,$acc08    # r1-(r1>>7)
1244         sub     $acc05,$acc05,$acc09
1245         sub     $acc06,$acc06,$acc10
1246         sub     $acc07,$acc07,$acc11
1247         add     $acc12,$acc12,$acc12    # (r2&0x7f7f7f7f)<<1
1248         add     $acc13,$acc13,$acc13
1249         add     $acc14,$acc14,$acc14
1250         add     $acc15,$acc15,$acc15
1251         and     $acc04,$acc04,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1252         and     $acc05,$acc05,$mask1b
1253         and     $acc06,$acc06,$mask1b
1254         and     $acc07,$acc07,$mask1b
1255         xor     $acc04,$acc04,$acc12    # r4
1256         xor     $acc05,$acc05,$acc13
1257         xor     $acc06,$acc06,$acc14
1258         xor     $acc07,$acc07,$acc15
1259
1260         and     $acc08,$acc04,$mask80   # r1=r4&0x80808080
1261         and     $acc09,$acc05,$mask80
1262         and     $acc10,$acc06,$mask80
1263         and     $acc11,$acc07,$mask80
1264         srwi    $acc12,$acc08,7         # r1>>7
1265         srwi    $acc13,$acc09,7
1266         srwi    $acc14,$acc10,7
1267         srwi    $acc15,$acc11,7
1268         sub     $acc08,$acc08,$acc12    # r1-(r1>>7)
1269         sub     $acc09,$acc09,$acc13
1270         sub     $acc10,$acc10,$acc14
1271         sub     $acc11,$acc11,$acc15
1272         andc    $acc12,$acc04,$mask80   # r4&0x7f7f7f7f
1273         andc    $acc13,$acc05,$mask80
1274         andc    $acc14,$acc06,$mask80
1275         andc    $acc15,$acc07,$mask80
1276         add     $acc12,$acc12,$acc12    # (r4&0x7f7f7f7f)<<1
1277         add     $acc13,$acc13,$acc13
1278         add     $acc14,$acc14,$acc14
1279         add     $acc15,$acc15,$acc15
1280         and     $acc08,$acc08,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1281         and     $acc09,$acc09,$mask1b
1282         and     $acc10,$acc10,$mask1b
1283         and     $acc11,$acc11,$mask1b
1284         xor     $acc08,$acc08,$acc12    # r8
1285         xor     $acc09,$acc09,$acc13
1286         xor     $acc10,$acc10,$acc14
1287         xor     $acc11,$acc11,$acc15
1288
1289         xor     $acc00,$acc00,$s0       # r2^r0
1290         xor     $acc01,$acc01,$s1
1291         xor     $acc02,$acc02,$s2
1292         xor     $acc03,$acc03,$s3
1293         xor     $acc04,$acc04,$s0       # r4^r0
1294         xor     $acc05,$acc05,$s1
1295         xor     $acc06,$acc06,$s2
1296         xor     $acc07,$acc07,$s3
1297 ___
1298 $code.=<<___;
1299         rotrwi  $s0,$s0,8               # = ROTATE(r0,8)
1300         rotrwi  $s1,$s1,8
1301         rotrwi  $s2,$s2,8
1302         rotrwi  $s3,$s3,8
1303         xor     $s0,$s0,$acc00          # ^= r2^r0
1304         xor     $s1,$s1,$acc01
1305         xor     $s2,$s2,$acc02
1306         xor     $s3,$s3,$acc03
1307         xor     $acc00,$acc00,$acc08
1308         xor     $acc01,$acc01,$acc09
1309         xor     $acc02,$acc02,$acc10
1310         xor     $acc03,$acc03,$acc11
1311         xor     $s0,$s0,$acc04          # ^= r4^r0
1312         xor     $s1,$s1,$acc05
1313         xor     $s2,$s2,$acc06
1314         xor     $s3,$s3,$acc07
1315         rotrwi  $acc00,$acc00,24
1316         rotrwi  $acc01,$acc01,24
1317         rotrwi  $acc02,$acc02,24
1318         rotrwi  $acc03,$acc03,24
1319         xor     $acc04,$acc04,$acc08
1320         xor     $acc05,$acc05,$acc09
1321         xor     $acc06,$acc06,$acc10
1322         xor     $acc07,$acc07,$acc11
1323         xor     $s0,$s0,$acc08          # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
1324         xor     $s1,$s1,$acc09
1325         xor     $s2,$s2,$acc10
1326         xor     $s3,$s3,$acc11
1327         rotrwi  $acc04,$acc04,16
1328         rotrwi  $acc05,$acc05,16
1329         rotrwi  $acc06,$acc06,16
1330         rotrwi  $acc07,$acc07,16
1331         xor     $s0,$s0,$acc00          # ^= ROTATE(r8^r2^r0,24)
1332         xor     $s1,$s1,$acc01
1333         xor     $s2,$s2,$acc02
1334         xor     $s3,$s3,$acc03
1335         rotrwi  $acc08,$acc08,8
1336         rotrwi  $acc09,$acc09,8
1337         rotrwi  $acc10,$acc10,8
1338         rotrwi  $acc11,$acc11,8
1339         xor     $s0,$s0,$acc04          # ^= ROTATE(r8^r4^r0,16)
1340         xor     $s1,$s1,$acc05
1341         xor     $s2,$s2,$acc06
1342         xor     $s3,$s3,$acc07
1343         xor     $s0,$s0,$acc08          # ^= ROTATE(r8,8)       
1344         xor     $s1,$s1,$acc09  
1345         xor     $s2,$s2,$acc10  
1346         xor     $s3,$s3,$acc11  
1347
1348         b       Ldec_compact_loop
1349 .align  4
1350 Ldec_compact_done:
1351         xor     $s0,$s0,$t0
1352         xor     $s1,$s1,$t1
1353         xor     $s2,$s2,$t2
1354         xor     $s3,$s3,$t3
1355         blr
1356         .long   0
1357         .byte   0,12,0x14,0,0,0,0,0
1358
1359 .asciz  "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
1360 .align  7
1361 ___
1362
1363 $code =~ s/\`([^\`]*)\`/eval $1/gem;
1364 print $code;
1365 close STDOUT;