aes-ppc.pl, sha512-ppc.pl: comply even with Embedded ABI specification
[openssl.git] / crypto / aes / asm / aes-ppc.pl
1 #!/usr/bin/env perl
2
3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9
10 # Needs more work: key setup, CBC routine...
11 #
12 # ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
13 # 128-bit key, which is ~40% better than 64-bit code generated by gcc
14 # 4.0. But these are not the ones currently used! Their "compact"
15 # counterparts are, for security reason. ppc_AES_encrypt_compact runs
16 # at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
17 # at 1/3 of ppc_AES_decrypt.
18
19 # February 2010
20 #
21 # Rescheduling instructions to favour Power6 pipeline gave 10%
22 # performance improvement on the platfrom in question (and marginal
23 # improvement even on others). It should be noted that Power6 fails
24 # to process byte in 18 cycles, only in 23, because it fails to issue
25 # 4 load instructions in two cycles, only in 3. As result non-compact
26 # block subroutines are 25% slower than one would expect. Compact
27 # functions scale better, because they have pure computational part,
28 # which scales perfectly with clock frequency. To be specific
29 # ppc_AES_encrypt_compact operates at 42 cycles per byte, while
30 # ppc_AES_decrypt_compact - at 55 (in 64-bit build).
31
32 $flavour = shift;
33
34 if ($flavour =~ /64/) {
35         $SIZE_T =8;
36         $LRSAVE =2*$SIZE_T;
37         $STU    ="stdu";
38         $POP    ="ld";
39         $PUSH   ="std";
40 } elsif ($flavour =~ /32/) {
41         $SIZE_T =4;
42         $LRSAVE =$SIZE_T;
43         $STU    ="stwu";
44         $POP    ="lwz";
45         $PUSH   ="stw";
46 } else { die "nonsense $flavour"; }
47
48 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
49 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
50 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
51 die "can't locate ppc-xlate.pl";
52
53 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
54
55 $FRAME=32*$SIZE_T;
56
57 sub _data_word()
58 { my $i;
59     while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
60 }
61
62 $sp="r1";
63 $toc="r2";
64 $inp="r3";
65 $out="r4";
66 $key="r5";
67
68 $Tbl0="r3";
69 $Tbl1="r6";
70 $Tbl2="r7";
71 $Tbl3=$out;     # stay away from "r2"; $out is offloaded to stack
72
73 $s0="r8";
74 $s1="r9";
75 $s2="r10";
76 $s3="r11";
77
78 $t0="r12";
79 $t1="r0";       # stay away from "r13";
80 $t2="r14";
81 $t3="r15";
82
83 $acc00="r16";
84 $acc01="r17";
85 $acc02="r18";
86 $acc03="r19";
87
88 $acc04="r20";
89 $acc05="r21";
90 $acc06="r22";
91 $acc07="r23";
92
93 $acc08="r24";
94 $acc09="r25";
95 $acc10="r26";
96 $acc11="r27";
97
98 $acc12="r28";
99 $acc13="r29";
100 $acc14="r30";
101 $acc15="r31";
102
103 $mask80=$Tbl2;
104 $mask1b=$Tbl3;
105
106 $code.=<<___;
107 .machine        "any"
108 .text
109
110 .align  7
111 LAES_Te:
112         mflr    r0
113         bcl     20,31,\$+4
114         mflr    $Tbl0   ;    vvvvv "distance" between . and 1st data entry
115         addi    $Tbl0,$Tbl0,`128-8`
116         mtlr    r0
117         blr
118         .long   0
119         .byte   0,12,0x14,0,0,0,0,0
120         .space  `64-9*4`
121 LAES_Td:
122         mflr    r0
123         bcl     20,31,\$+4
124         mflr    $Tbl0   ;    vvvvvvvv "distance" between . and 1st data entry
125         addi    $Tbl0,$Tbl0,`128-64-8+2048+256`
126         mtlr    r0
127         blr
128         .long   0
129         .byte   0,12,0x14,0,0,0,0,0
130         .space  `128-64-9*4`
131 ___
132 &_data_word(
133         0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
134         0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
135         0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
136         0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
137         0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
138         0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
139         0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
140         0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
141         0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
142         0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
143         0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
144         0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
145         0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
146         0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
147         0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
148         0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
149         0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
150         0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
151         0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
152         0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
153         0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
154         0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
155         0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
156         0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
157         0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
158         0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
159         0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
160         0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
161         0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
162         0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
163         0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
164         0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
165         0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
166         0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
167         0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
168         0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
169         0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
170         0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
171         0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
172         0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
173         0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
174         0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
175         0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
176         0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
177         0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
178         0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
179         0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
180         0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
181         0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
182         0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
183         0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
184         0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
185         0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
186         0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
187         0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
188         0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
189         0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
190         0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
191         0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
192         0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
193         0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
194         0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
195         0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
196         0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
197 $code.=<<___;
198 .byte   0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
199 .byte   0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
200 .byte   0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
201 .byte   0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
202 .byte   0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
203 .byte   0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
204 .byte   0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
205 .byte   0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
206 .byte   0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
207 .byte   0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
208 .byte   0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
209 .byte   0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
210 .byte   0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
211 .byte   0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
212 .byte   0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
213 .byte   0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
214 .byte   0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
215 .byte   0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
216 .byte   0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
217 .byte   0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
218 .byte   0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
219 .byte   0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
220 .byte   0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
221 .byte   0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
222 .byte   0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
223 .byte   0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
224 .byte   0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
225 .byte   0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
226 .byte   0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
227 .byte   0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
228 .byte   0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
229 .byte   0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
230 ___
231 &_data_word(
232         0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
233         0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
234         0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
235         0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
236         0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
237         0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
238         0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
239         0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
240         0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
241         0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
242         0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
243         0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
244         0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
245         0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
246         0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
247         0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
248         0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
249         0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
250         0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
251         0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
252         0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
253         0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
254         0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
255         0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
256         0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
257         0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
258         0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
259         0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
260         0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
261         0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
262         0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
263         0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
264         0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
265         0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
266         0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
267         0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
268         0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
269         0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
270         0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
271         0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
272         0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
273         0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
274         0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
275         0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
276         0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
277         0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
278         0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
279         0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
280         0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
281         0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
282         0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
283         0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
284         0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
285         0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
286         0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
287         0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
288         0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
289         0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
290         0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
291         0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
292         0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
293         0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
294         0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
295         0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
296 $code.=<<___;
297 .byte   0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
298 .byte   0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
299 .byte   0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
300 .byte   0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
301 .byte   0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
302 .byte   0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
303 .byte   0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
304 .byte   0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
305 .byte   0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
306 .byte   0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
307 .byte   0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
308 .byte   0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
309 .byte   0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
310 .byte   0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
311 .byte   0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
312 .byte   0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
313 .byte   0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
314 .byte   0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
315 .byte   0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
316 .byte   0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
317 .byte   0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
318 .byte   0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
319 .byte   0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
320 .byte   0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
321 .byte   0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
322 .byte   0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
323 .byte   0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
324 .byte   0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
325 .byte   0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
326 .byte   0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
327 .byte   0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
328 .byte   0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
329
330
331 .globl  .AES_encrypt
332 .align  7
333 .AES_encrypt:
334         $STU    $sp,-$FRAME($sp)
335         mflr    r0
336
337         $PUSH   $out,`$FRAME-$SIZE_T*19`($sp)
338         $PUSH   r14,`$FRAME-$SIZE_T*18`($sp)
339         $PUSH   r15,`$FRAME-$SIZE_T*17`($sp)
340         $PUSH   r16,`$FRAME-$SIZE_T*16`($sp)
341         $PUSH   r17,`$FRAME-$SIZE_T*15`($sp)
342         $PUSH   r18,`$FRAME-$SIZE_T*14`($sp)
343         $PUSH   r19,`$FRAME-$SIZE_T*13`($sp)
344         $PUSH   r20,`$FRAME-$SIZE_T*12`($sp)
345         $PUSH   r21,`$FRAME-$SIZE_T*11`($sp)
346         $PUSH   r22,`$FRAME-$SIZE_T*10`($sp)
347         $PUSH   r23,`$FRAME-$SIZE_T*9`($sp)
348         $PUSH   r24,`$FRAME-$SIZE_T*8`($sp)
349         $PUSH   r25,`$FRAME-$SIZE_T*7`($sp)
350         $PUSH   r26,`$FRAME-$SIZE_T*6`($sp)
351         $PUSH   r27,`$FRAME-$SIZE_T*5`($sp)
352         $PUSH   r28,`$FRAME-$SIZE_T*4`($sp)
353         $PUSH   r29,`$FRAME-$SIZE_T*3`($sp)
354         $PUSH   r30,`$FRAME-$SIZE_T*2`($sp)
355         $PUSH   r31,`$FRAME-$SIZE_T*1`($sp)
356         $PUSH   r0,`$FRAME+$LRSAVE`($sp)
357
358         andi.   $t0,$inp,3
359         andi.   $t1,$out,3
360         or.     $t0,$t0,$t1
361         bne     Lenc_unaligned
362
363 Lenc_unaligned_ok:
364         lwz     $s0,0($inp)
365         lwz     $s1,4($inp)
366         lwz     $s2,8($inp)
367         lwz     $s3,12($inp)
368         bl      LAES_Te
369         bl      Lppc_AES_encrypt_compact
370         $POP    $out,`$FRAME-$SIZE_T*19`($sp)
371         stw     $s0,0($out)
372         stw     $s1,4($out)
373         stw     $s2,8($out)
374         stw     $s3,12($out)
375         b       Lenc_done
376
377 Lenc_unaligned:
378         subfic  $t0,$inp,4096
379         subfic  $t1,$out,4096
380         andi.   $t0,$t0,4096-16
381         beq     Lenc_xpage
382         andi.   $t1,$t1,4096-16
383         bne     Lenc_unaligned_ok
384
385 Lenc_xpage:
386         lbz     $acc00,0($inp)
387         lbz     $acc01,1($inp)
388         lbz     $acc02,2($inp)
389         lbz     $s0,3($inp)
390         lbz     $acc04,4($inp)
391         lbz     $acc05,5($inp)
392         lbz     $acc06,6($inp)
393         lbz     $s1,7($inp)
394         lbz     $acc08,8($inp)
395         lbz     $acc09,9($inp)
396         lbz     $acc10,10($inp)
397         insrwi  $s0,$acc00,8,0
398         lbz     $s2,11($inp)
399         insrwi  $s1,$acc04,8,0
400         lbz     $acc12,12($inp)
401         insrwi  $s0,$acc01,8,8
402         lbz     $acc13,13($inp)
403         insrwi  $s1,$acc05,8,8
404         lbz     $acc14,14($inp)
405         insrwi  $s0,$acc02,8,16
406         lbz     $s3,15($inp)
407         insrwi  $s1,$acc06,8,16
408         insrwi  $s2,$acc08,8,0
409         insrwi  $s3,$acc12,8,0
410         insrwi  $s2,$acc09,8,8
411         insrwi  $s3,$acc13,8,8
412         insrwi  $s2,$acc10,8,16
413         insrwi  $s3,$acc14,8,16
414
415         bl      LAES_Te
416         bl      Lppc_AES_encrypt_compact
417         $POP    $out,`$FRAME-$SIZE_T*19`($sp)
418
419         extrwi  $acc00,$s0,8,0
420         extrwi  $acc01,$s0,8,8
421         stb     $acc00,0($out)
422         extrwi  $acc02,$s0,8,16
423         stb     $acc01,1($out)
424         stb     $acc02,2($out)
425         extrwi  $acc04,$s1,8,0
426         stb     $s0,3($out)
427         extrwi  $acc05,$s1,8,8
428         stb     $acc04,4($out)
429         extrwi  $acc06,$s1,8,16
430         stb     $acc05,5($out)
431         stb     $acc06,6($out)
432         extrwi  $acc08,$s2,8,0
433         stb     $s1,7($out)
434         extrwi  $acc09,$s2,8,8
435         stb     $acc08,8($out)
436         extrwi  $acc10,$s2,8,16
437         stb     $acc09,9($out)
438         stb     $acc10,10($out)
439         extrwi  $acc12,$s3,8,0
440         stb     $s2,11($out)
441         extrwi  $acc13,$s3,8,8
442         stb     $acc12,12($out)
443         extrwi  $acc14,$s3,8,16
444         stb     $acc13,13($out)
445         stb     $acc14,14($out)
446         stb     $s3,15($out)
447
448 Lenc_done:
449         $POP    r0,`$FRAME+$LRSAVE`($sp)
450         $POP    r14,`$FRAME-$SIZE_T*18`($sp)
451         $POP    r15,`$FRAME-$SIZE_T*17`($sp)
452         $POP    r16,`$FRAME-$SIZE_T*16`($sp)
453         $POP    r17,`$FRAME-$SIZE_T*15`($sp)
454         $POP    r18,`$FRAME-$SIZE_T*14`($sp)
455         $POP    r19,`$FRAME-$SIZE_T*13`($sp)
456         $POP    r20,`$FRAME-$SIZE_T*12`($sp)
457         $POP    r21,`$FRAME-$SIZE_T*11`($sp)
458         $POP    r22,`$FRAME-$SIZE_T*10`($sp)
459         $POP    r23,`$FRAME-$SIZE_T*9`($sp)
460         $POP    r24,`$FRAME-$SIZE_T*8`($sp)
461         $POP    r25,`$FRAME-$SIZE_T*7`($sp)
462         $POP    r26,`$FRAME-$SIZE_T*6`($sp)
463         $POP    r27,`$FRAME-$SIZE_T*5`($sp)
464         $POP    r28,`$FRAME-$SIZE_T*4`($sp)
465         $POP    r29,`$FRAME-$SIZE_T*3`($sp)
466         $POP    r30,`$FRAME-$SIZE_T*2`($sp)
467         $POP    r31,`$FRAME-$SIZE_T*1`($sp)
468         mtlr    r0
469         addi    $sp,$sp,$FRAME
470         blr
471         .long   0
472         .byte   0,12,4,1,0x80,18,3,0
473         .long   0
474
475 .align  5
476 Lppc_AES_encrypt:
477         lwz     $acc00,240($key)
478         addi    $Tbl1,$Tbl0,3
479         lwz     $t0,0($key)
480         addi    $Tbl2,$Tbl0,2
481         lwz     $t1,4($key)
482         addi    $Tbl3,$Tbl0,1
483         lwz     $t2,8($key)
484         addi    $acc00,$acc00,-1
485         lwz     $t3,12($key)
486         addi    $key,$key,16
487         xor     $s0,$s0,$t0
488         xor     $s1,$s1,$t1
489         xor     $s2,$s2,$t2
490         xor     $s3,$s3,$t3
491         mtctr   $acc00
492 .align  4
493 Lenc_loop:
494         rlwinm  $acc00,$s0,`32-24+3`,21,28
495         rlwinm  $acc01,$s1,`32-24+3`,21,28
496         rlwinm  $acc02,$s2,`32-24+3`,21,28
497         rlwinm  $acc03,$s3,`32-24+3`,21,28
498         lwz     $t0,0($key)
499         rlwinm  $acc04,$s1,`32-16+3`,21,28
500         lwz     $t1,4($key)
501         rlwinm  $acc05,$s2,`32-16+3`,21,28
502         lwz     $t2,8($key)
503         rlwinm  $acc06,$s3,`32-16+3`,21,28
504         lwz     $t3,12($key)
505         rlwinm  $acc07,$s0,`32-16+3`,21,28
506         lwzx    $acc00,$Tbl0,$acc00
507         rlwinm  $acc08,$s2,`32-8+3`,21,28
508         lwzx    $acc01,$Tbl0,$acc01
509         rlwinm  $acc09,$s3,`32-8+3`,21,28
510         lwzx    $acc02,$Tbl0,$acc02
511         rlwinm  $acc10,$s0,`32-8+3`,21,28
512         lwzx    $acc03,$Tbl0,$acc03
513         rlwinm  $acc11,$s1,`32-8+3`,21,28
514         lwzx    $acc04,$Tbl1,$acc04
515         rlwinm  $acc12,$s3,`0+3`,21,28
516         lwzx    $acc05,$Tbl1,$acc05
517         rlwinm  $acc13,$s0,`0+3`,21,28
518         lwzx    $acc06,$Tbl1,$acc06
519         rlwinm  $acc14,$s1,`0+3`,21,28
520         lwzx    $acc07,$Tbl1,$acc07
521         rlwinm  $acc15,$s2,`0+3`,21,28
522         lwzx    $acc08,$Tbl2,$acc08
523         xor     $t0,$t0,$acc00
524         lwzx    $acc09,$Tbl2,$acc09
525         xor     $t1,$t1,$acc01
526         lwzx    $acc10,$Tbl2,$acc10
527         xor     $t2,$t2,$acc02
528         lwzx    $acc11,$Tbl2,$acc11
529         xor     $t3,$t3,$acc03
530         lwzx    $acc12,$Tbl3,$acc12
531         xor     $t0,$t0,$acc04
532         lwzx    $acc13,$Tbl3,$acc13
533         xor     $t1,$t1,$acc05
534         lwzx    $acc14,$Tbl3,$acc14
535         xor     $t2,$t2,$acc06
536         lwzx    $acc15,$Tbl3,$acc15
537         xor     $t3,$t3,$acc07
538         xor     $t0,$t0,$acc08
539         xor     $t1,$t1,$acc09
540         xor     $t2,$t2,$acc10
541         xor     $t3,$t3,$acc11
542         xor     $s0,$t0,$acc12
543         xor     $s1,$t1,$acc13
544         xor     $s2,$t2,$acc14
545         xor     $s3,$t3,$acc15
546         addi    $key,$key,16
547         bdnz-   Lenc_loop
548
549         addi    $Tbl2,$Tbl0,2048
550         nop
551         lwz     $t0,0($key)
552         rlwinm  $acc00,$s0,`32-24`,24,31
553         lwz     $t1,4($key)
554         rlwinm  $acc01,$s1,`32-24`,24,31
555         lwz     $t2,8($key)
556         rlwinm  $acc02,$s2,`32-24`,24,31
557         lwz     $t3,12($key)
558         rlwinm  $acc03,$s3,`32-24`,24,31
559         lwz     $acc08,`2048+0`($Tbl0)  ! prefetch Te4
560         rlwinm  $acc04,$s1,`32-16`,24,31
561         lwz     $acc09,`2048+32`($Tbl0)
562         rlwinm  $acc05,$s2,`32-16`,24,31
563         lwz     $acc10,`2048+64`($Tbl0)
564         rlwinm  $acc06,$s3,`32-16`,24,31
565         lwz     $acc11,`2048+96`($Tbl0)
566         rlwinm  $acc07,$s0,`32-16`,24,31
567         lwz     $acc12,`2048+128`($Tbl0)
568         rlwinm  $acc08,$s2,`32-8`,24,31
569         lwz     $acc13,`2048+160`($Tbl0)
570         rlwinm  $acc09,$s3,`32-8`,24,31
571         lwz     $acc14,`2048+192`($Tbl0)
572         rlwinm  $acc10,$s0,`32-8`,24,31
573         lwz     $acc15,`2048+224`($Tbl0)
574         rlwinm  $acc11,$s1,`32-8`,24,31
575         lbzx    $acc00,$Tbl2,$acc00
576         rlwinm  $acc12,$s3,`0`,24,31
577         lbzx    $acc01,$Tbl2,$acc01
578         rlwinm  $acc13,$s0,`0`,24,31
579         lbzx    $acc02,$Tbl2,$acc02
580         rlwinm  $acc14,$s1,`0`,24,31
581         lbzx    $acc03,$Tbl2,$acc03
582         rlwinm  $acc15,$s2,`0`,24,31
583         lbzx    $acc04,$Tbl2,$acc04
584         rlwinm  $s0,$acc00,24,0,7
585         lbzx    $acc05,$Tbl2,$acc05
586         rlwinm  $s1,$acc01,24,0,7
587         lbzx    $acc06,$Tbl2,$acc06
588         rlwinm  $s2,$acc02,24,0,7
589         lbzx    $acc07,$Tbl2,$acc07
590         rlwinm  $s3,$acc03,24,0,7
591         lbzx    $acc08,$Tbl2,$acc08
592         rlwimi  $s0,$acc04,16,8,15
593         lbzx    $acc09,$Tbl2,$acc09
594         rlwimi  $s1,$acc05,16,8,15
595         lbzx    $acc10,$Tbl2,$acc10
596         rlwimi  $s2,$acc06,16,8,15
597         lbzx    $acc11,$Tbl2,$acc11
598         rlwimi  $s3,$acc07,16,8,15
599         lbzx    $acc12,$Tbl2,$acc12
600         rlwimi  $s0,$acc08,8,16,23
601         lbzx    $acc13,$Tbl2,$acc13
602         rlwimi  $s1,$acc09,8,16,23
603         lbzx    $acc14,$Tbl2,$acc14
604         rlwimi  $s2,$acc10,8,16,23
605         lbzx    $acc15,$Tbl2,$acc15
606         rlwimi  $s3,$acc11,8,16,23
607         or      $s0,$s0,$acc12
608         or      $s1,$s1,$acc13
609         or      $s2,$s2,$acc14
610         or      $s3,$s3,$acc15
611         xor     $s0,$s0,$t0
612         xor     $s1,$s1,$t1
613         xor     $s2,$s2,$t2
614         xor     $s3,$s3,$t3
615         blr
616         .long   0
617         .byte   0,12,0x14,0,0,0,0,0
618
619 .align  4
620 Lppc_AES_encrypt_compact:
621         lwz     $acc00,240($key)
622         addi    $Tbl1,$Tbl0,2048
623         lwz     $t0,0($key)
624         lis     $mask80,0x8080
625         lwz     $t1,4($key)
626         lis     $mask1b,0x1b1b
627         lwz     $t2,8($key)
628         ori     $mask80,$mask80,0x8080
629         lwz     $t3,12($key)
630         ori     $mask1b,$mask1b,0x1b1b
631         addi    $key,$key,16
632         mtctr   $acc00
633 .align  4
634 Lenc_compact_loop:
635         xor     $s0,$s0,$t0
636         xor     $s1,$s1,$t1
637         rlwinm  $acc00,$s0,`32-24`,24,31
638         xor     $s2,$s2,$t2
639         rlwinm  $acc01,$s1,`32-24`,24,31
640         xor     $s3,$s3,$t3
641         rlwinm  $acc02,$s2,`32-24`,24,31
642         rlwinm  $acc03,$s3,`32-24`,24,31
643         rlwinm  $acc04,$s1,`32-16`,24,31
644         rlwinm  $acc05,$s2,`32-16`,24,31
645         rlwinm  $acc06,$s3,`32-16`,24,31
646         rlwinm  $acc07,$s0,`32-16`,24,31
647         lbzx    $acc00,$Tbl1,$acc00
648         rlwinm  $acc08,$s2,`32-8`,24,31
649         lbzx    $acc01,$Tbl1,$acc01
650         rlwinm  $acc09,$s3,`32-8`,24,31
651         lbzx    $acc02,$Tbl1,$acc02
652         rlwinm  $acc10,$s0,`32-8`,24,31
653         lbzx    $acc03,$Tbl1,$acc03
654         rlwinm  $acc11,$s1,`32-8`,24,31
655         lbzx    $acc04,$Tbl1,$acc04
656         rlwinm  $acc12,$s3,`0`,24,31
657         lbzx    $acc05,$Tbl1,$acc05
658         rlwinm  $acc13,$s0,`0`,24,31
659         lbzx    $acc06,$Tbl1,$acc06
660         rlwinm  $acc14,$s1,`0`,24,31
661         lbzx    $acc07,$Tbl1,$acc07
662         rlwinm  $acc15,$s2,`0`,24,31
663         lbzx    $acc08,$Tbl1,$acc08
664         rlwinm  $s0,$acc00,24,0,7
665         lbzx    $acc09,$Tbl1,$acc09
666         rlwinm  $s1,$acc01,24,0,7
667         lbzx    $acc10,$Tbl1,$acc10
668         rlwinm  $s2,$acc02,24,0,7
669         lbzx    $acc11,$Tbl1,$acc11
670         rlwinm  $s3,$acc03,24,0,7
671         lbzx    $acc12,$Tbl1,$acc12
672         rlwimi  $s0,$acc04,16,8,15
673         lbzx    $acc13,$Tbl1,$acc13
674         rlwimi  $s1,$acc05,16,8,15
675         lbzx    $acc14,$Tbl1,$acc14
676         rlwimi  $s2,$acc06,16,8,15
677         lbzx    $acc15,$Tbl1,$acc15
678         rlwimi  $s3,$acc07,16,8,15
679         rlwimi  $s0,$acc08,8,16,23
680         rlwimi  $s1,$acc09,8,16,23
681         rlwimi  $s2,$acc10,8,16,23
682         rlwimi  $s3,$acc11,8,16,23
683         lwz     $t0,0($key)
684         or      $s0,$s0,$acc12
685         lwz     $t1,4($key)
686         or      $s1,$s1,$acc13
687         lwz     $t2,8($key)
688         or      $s2,$s2,$acc14
689         lwz     $t3,12($key)
690         or      $s3,$s3,$acc15
691
692         addi    $key,$key,16
693         bdz     Lenc_compact_done
694
695         and     $acc00,$s0,$mask80      # r1=r0&0x80808080
696         and     $acc01,$s1,$mask80
697         and     $acc02,$s2,$mask80
698         and     $acc03,$s3,$mask80
699         srwi    $acc04,$acc00,7         # r1>>7
700         andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
701         srwi    $acc05,$acc01,7
702         andc    $acc09,$s1,$mask80
703         srwi    $acc06,$acc02,7
704         andc    $acc10,$s2,$mask80
705         srwi    $acc07,$acc03,7
706         andc    $acc11,$s3,$mask80
707         sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
708         sub     $acc01,$acc01,$acc05
709         sub     $acc02,$acc02,$acc06
710         sub     $acc03,$acc03,$acc07
711         add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
712         add     $acc09,$acc09,$acc09
713         add     $acc10,$acc10,$acc10
714         add     $acc11,$acc11,$acc11
715         and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
716         and     $acc01,$acc01,$mask1b
717         and     $acc02,$acc02,$mask1b
718         and     $acc03,$acc03,$mask1b
719         xor     $acc00,$acc00,$acc08    # r2
720         xor     $acc01,$acc01,$acc09
721          rotlwi $acc12,$s0,16           # ROTATE(r0,16)
722         xor     $acc02,$acc02,$acc10
723          rotlwi $acc13,$s1,16
724         xor     $acc03,$acc03,$acc11
725          rotlwi $acc14,$s2,16
726
727         xor     $s0,$s0,$acc00          # r0^r2
728         rotlwi  $acc15,$s3,16
729         xor     $s1,$s1,$acc01
730         rotrwi  $s0,$s0,24              # ROTATE(r2^r0,24)
731         xor     $s2,$s2,$acc02
732         rotrwi  $s1,$s1,24
733         xor     $s3,$s3,$acc03
734         rotrwi  $s2,$s2,24
735         xor     $s0,$s0,$acc00          # ROTATE(r2^r0,24)^r2
736         rotrwi  $s3,$s3,24
737         xor     $s1,$s1,$acc01
738         xor     $s2,$s2,$acc02
739         xor     $s3,$s3,$acc03
740         rotlwi  $acc08,$acc12,8         # ROTATE(r0,24)
741         xor     $s0,$s0,$acc12          #
742         rotlwi  $acc09,$acc13,8
743         xor     $s1,$s1,$acc13
744         rotlwi  $acc10,$acc14,8
745         xor     $s2,$s2,$acc14
746         rotlwi  $acc11,$acc15,8
747         xor     $s3,$s3,$acc15
748         xor     $s0,$s0,$acc08          #
749         xor     $s1,$s1,$acc09
750         xor     $s2,$s2,$acc10
751         xor     $s3,$s3,$acc11
752
753         b       Lenc_compact_loop
754 .align  4
755 Lenc_compact_done:
756         xor     $s0,$s0,$t0
757         xor     $s1,$s1,$t1
758         xor     $s2,$s2,$t2
759         xor     $s3,$s3,$t3
760         blr
761         .long   0
762         .byte   0,12,0x14,0,0,0,0,0
763
764 .globl  .AES_decrypt
765 .align  7
766 .AES_decrypt:
767         $STU    $sp,-$FRAME($sp)
768         mflr    r0
769
770         $PUSH   $out,`$FRAME-$SIZE_T*19`($sp)
771         $PUSH   r14,`$FRAME-$SIZE_T*18`($sp)
772         $PUSH   r15,`$FRAME-$SIZE_T*17`($sp)
773         $PUSH   r16,`$FRAME-$SIZE_T*16`($sp)
774         $PUSH   r17,`$FRAME-$SIZE_T*15`($sp)
775         $PUSH   r18,`$FRAME-$SIZE_T*14`($sp)
776         $PUSH   r19,`$FRAME-$SIZE_T*13`($sp)
777         $PUSH   r20,`$FRAME-$SIZE_T*12`($sp)
778         $PUSH   r21,`$FRAME-$SIZE_T*11`($sp)
779         $PUSH   r22,`$FRAME-$SIZE_T*10`($sp)
780         $PUSH   r23,`$FRAME-$SIZE_T*9`($sp)
781         $PUSH   r24,`$FRAME-$SIZE_T*8`($sp)
782         $PUSH   r25,`$FRAME-$SIZE_T*7`($sp)
783         $PUSH   r26,`$FRAME-$SIZE_T*6`($sp)
784         $PUSH   r27,`$FRAME-$SIZE_T*5`($sp)
785         $PUSH   r28,`$FRAME-$SIZE_T*4`($sp)
786         $PUSH   r29,`$FRAME-$SIZE_T*3`($sp)
787         $PUSH   r30,`$FRAME-$SIZE_T*2`($sp)
788         $PUSH   r31,`$FRAME-$SIZE_T*1`($sp)
789         $PUSH   r0,`$FRAME+$LRSAVE`($sp)
790
791         andi.   $t0,$inp,3
792         andi.   $t1,$out,3
793         or.     $t0,$t0,$t1
794         bne     Ldec_unaligned
795
796 Ldec_unaligned_ok:
797         lwz     $s0,0($inp)
798         lwz     $s1,4($inp)
799         lwz     $s2,8($inp)
800         lwz     $s3,12($inp)
801         bl      LAES_Td
802         bl      Lppc_AES_decrypt_compact
803         $POP    $out,`$FRAME-$SIZE_T*19`($sp)
804         stw     $s0,0($out)
805         stw     $s1,4($out)
806         stw     $s2,8($out)
807         stw     $s3,12($out)
808         b       Ldec_done
809
810 Ldec_unaligned:
811         subfic  $t0,$inp,4096
812         subfic  $t1,$out,4096
813         andi.   $t0,$t0,4096-16
814         beq     Ldec_xpage
815         andi.   $t1,$t1,4096-16
816         bne     Ldec_unaligned_ok
817
818 Ldec_xpage:
819         lbz     $acc00,0($inp)
820         lbz     $acc01,1($inp)
821         lbz     $acc02,2($inp)
822         lbz     $s0,3($inp)
823         lbz     $acc04,4($inp)
824         lbz     $acc05,5($inp)
825         lbz     $acc06,6($inp)
826         lbz     $s1,7($inp)
827         lbz     $acc08,8($inp)
828         lbz     $acc09,9($inp)
829         lbz     $acc10,10($inp)
830         insrwi  $s0,$acc00,8,0
831         lbz     $s2,11($inp)
832         insrwi  $s1,$acc04,8,0
833         lbz     $acc12,12($inp)
834         insrwi  $s0,$acc01,8,8
835         lbz     $acc13,13($inp)
836         insrwi  $s1,$acc05,8,8
837         lbz     $acc14,14($inp)
838         insrwi  $s0,$acc02,8,16
839         lbz     $s3,15($inp)
840         insrwi  $s1,$acc06,8,16
841         insrwi  $s2,$acc08,8,0
842         insrwi  $s3,$acc12,8,0
843         insrwi  $s2,$acc09,8,8
844         insrwi  $s3,$acc13,8,8
845         insrwi  $s2,$acc10,8,16
846         insrwi  $s3,$acc14,8,16
847
848         bl      LAES_Td
849         bl      Lppc_AES_decrypt_compact
850         $POP    $out,`$FRAME-$SIZE_T*19`($sp)
851
852         extrwi  $acc00,$s0,8,0
853         extrwi  $acc01,$s0,8,8
854         stb     $acc00,0($out)
855         extrwi  $acc02,$s0,8,16
856         stb     $acc01,1($out)
857         stb     $acc02,2($out)
858         extrwi  $acc04,$s1,8,0
859         stb     $s0,3($out)
860         extrwi  $acc05,$s1,8,8
861         stb     $acc04,4($out)
862         extrwi  $acc06,$s1,8,16
863         stb     $acc05,5($out)
864         stb     $acc06,6($out)
865         extrwi  $acc08,$s2,8,0
866         stb     $s1,7($out)
867         extrwi  $acc09,$s2,8,8
868         stb     $acc08,8($out)
869         extrwi  $acc10,$s2,8,16
870         stb     $acc09,9($out)
871         stb     $acc10,10($out)
872         extrwi  $acc12,$s3,8,0
873         stb     $s2,11($out)
874         extrwi  $acc13,$s3,8,8
875         stb     $acc12,12($out)
876         extrwi  $acc14,$s3,8,16
877         stb     $acc13,13($out)
878         stb     $acc14,14($out)
879         stb     $s3,15($out)
880
881 Ldec_done:
882         $POP    r0,`$FRAME+$LRSAVE`($sp)
883         $POP    r14,`$FRAME-$SIZE_T*18`($sp)
884         $POP    r15,`$FRAME-$SIZE_T*17`($sp)
885         $POP    r16,`$FRAME-$SIZE_T*16`($sp)
886         $POP    r17,`$FRAME-$SIZE_T*15`($sp)
887         $POP    r18,`$FRAME-$SIZE_T*14`($sp)
888         $POP    r19,`$FRAME-$SIZE_T*13`($sp)
889         $POP    r20,`$FRAME-$SIZE_T*12`($sp)
890         $POP    r21,`$FRAME-$SIZE_T*11`($sp)
891         $POP    r22,`$FRAME-$SIZE_T*10`($sp)
892         $POP    r23,`$FRAME-$SIZE_T*9`($sp)
893         $POP    r24,`$FRAME-$SIZE_T*8`($sp)
894         $POP    r25,`$FRAME-$SIZE_T*7`($sp)
895         $POP    r26,`$FRAME-$SIZE_T*6`($sp)
896         $POP    r27,`$FRAME-$SIZE_T*5`($sp)
897         $POP    r28,`$FRAME-$SIZE_T*4`($sp)
898         $POP    r29,`$FRAME-$SIZE_T*3`($sp)
899         $POP    r30,`$FRAME-$SIZE_T*2`($sp)
900         $POP    r31,`$FRAME-$SIZE_T*1`($sp)
901         mtlr    r0
902         addi    $sp,$sp,$FRAME
903         blr
904         .long   0
905         .byte   0,12,4,1,0x80,18,3,0
906         .long   0
907
908 .align  5
909 Lppc_AES_decrypt:
910         lwz     $acc00,240($key)
911         addi    $Tbl1,$Tbl0,3
912         lwz     $t0,0($key)
913         addi    $Tbl2,$Tbl0,2
914         lwz     $t1,4($key)
915         addi    $Tbl3,$Tbl0,1
916         lwz     $t2,8($key)
917         addi    $acc00,$acc00,-1
918         lwz     $t3,12($key)
919         addi    $key,$key,16
920         xor     $s0,$s0,$t0
921         xor     $s1,$s1,$t1
922         xor     $s2,$s2,$t2
923         xor     $s3,$s3,$t3
924         mtctr   $acc00
925 .align  4
926 Ldec_loop:
927         rlwinm  $acc00,$s0,`32-24+3`,21,28
928         rlwinm  $acc01,$s1,`32-24+3`,21,28
929         rlwinm  $acc02,$s2,`32-24+3`,21,28
930         rlwinm  $acc03,$s3,`32-24+3`,21,28
931         lwz     $t0,0($key)
932         rlwinm  $acc04,$s3,`32-16+3`,21,28
933         lwz     $t1,4($key)
934         rlwinm  $acc05,$s0,`32-16+3`,21,28
935         lwz     $t2,8($key)
936         rlwinm  $acc06,$s1,`32-16+3`,21,28
937         lwz     $t3,12($key)
938         rlwinm  $acc07,$s2,`32-16+3`,21,28
939         lwzx    $acc00,$Tbl0,$acc00
940         rlwinm  $acc08,$s2,`32-8+3`,21,28
941         lwzx    $acc01,$Tbl0,$acc01
942         rlwinm  $acc09,$s3,`32-8+3`,21,28
943         lwzx    $acc02,$Tbl0,$acc02
944         rlwinm  $acc10,$s0,`32-8+3`,21,28
945         lwzx    $acc03,$Tbl0,$acc03
946         rlwinm  $acc11,$s1,`32-8+3`,21,28
947         lwzx    $acc04,$Tbl1,$acc04
948         rlwinm  $acc12,$s1,`0+3`,21,28
949         lwzx    $acc05,$Tbl1,$acc05
950         rlwinm  $acc13,$s2,`0+3`,21,28
951         lwzx    $acc06,$Tbl1,$acc06
952         rlwinm  $acc14,$s3,`0+3`,21,28
953         lwzx    $acc07,$Tbl1,$acc07
954         rlwinm  $acc15,$s0,`0+3`,21,28
955         lwzx    $acc08,$Tbl2,$acc08
956         xor     $t0,$t0,$acc00
957         lwzx    $acc09,$Tbl2,$acc09
958         xor     $t1,$t1,$acc01
959         lwzx    $acc10,$Tbl2,$acc10
960         xor     $t2,$t2,$acc02
961         lwzx    $acc11,$Tbl2,$acc11
962         xor     $t3,$t3,$acc03
963         lwzx    $acc12,$Tbl3,$acc12
964         xor     $t0,$t0,$acc04
965         lwzx    $acc13,$Tbl3,$acc13
966         xor     $t1,$t1,$acc05
967         lwzx    $acc14,$Tbl3,$acc14
968         xor     $t2,$t2,$acc06
969         lwzx    $acc15,$Tbl3,$acc15
970         xor     $t3,$t3,$acc07
971         xor     $t0,$t0,$acc08
972         xor     $t1,$t1,$acc09
973         xor     $t2,$t2,$acc10
974         xor     $t3,$t3,$acc11
975         xor     $s0,$t0,$acc12
976         xor     $s1,$t1,$acc13
977         xor     $s2,$t2,$acc14
978         xor     $s3,$t3,$acc15
979         addi    $key,$key,16
980         bdnz-   Ldec_loop
981
982         addi    $Tbl2,$Tbl0,2048
983         nop
984         lwz     $t0,0($key)
985         rlwinm  $acc00,$s0,`32-24`,24,31
986         lwz     $t1,4($key)
987         rlwinm  $acc01,$s1,`32-24`,24,31
988         lwz     $t2,8($key)
989         rlwinm  $acc02,$s2,`32-24`,24,31
990         lwz     $t3,12($key)
991         rlwinm  $acc03,$s3,`32-24`,24,31
992         lwz     $acc08,`2048+0`($Tbl0)  ! prefetch Td4
993         rlwinm  $acc04,$s3,`32-16`,24,31
994         lwz     $acc09,`2048+32`($Tbl0)
995         rlwinm  $acc05,$s0,`32-16`,24,31
996         lwz     $acc10,`2048+64`($Tbl0)
997         lbzx    $acc00,$Tbl2,$acc00
998         lwz     $acc11,`2048+96`($Tbl0)
999         lbzx    $acc01,$Tbl2,$acc01
1000         lwz     $acc12,`2048+128`($Tbl0)
1001         rlwinm  $acc06,$s1,`32-16`,24,31
1002         lwz     $acc13,`2048+160`($Tbl0)
1003         rlwinm  $acc07,$s2,`32-16`,24,31
1004         lwz     $acc14,`2048+192`($Tbl0)
1005         rlwinm  $acc08,$s2,`32-8`,24,31
1006         lwz     $acc15,`2048+224`($Tbl0)
1007         rlwinm  $acc09,$s3,`32-8`,24,31
1008         lbzx    $acc02,$Tbl2,$acc02
1009         rlwinm  $acc10,$s0,`32-8`,24,31
1010         lbzx    $acc03,$Tbl2,$acc03
1011         rlwinm  $acc11,$s1,`32-8`,24,31
1012         lbzx    $acc04,$Tbl2,$acc04
1013         rlwinm  $acc12,$s1,`0`,24,31
1014         lbzx    $acc05,$Tbl2,$acc05
1015         rlwinm  $acc13,$s2,`0`,24,31
1016         lbzx    $acc06,$Tbl2,$acc06
1017         rlwinm  $acc14,$s3,`0`,24,31
1018         lbzx    $acc07,$Tbl2,$acc07
1019         rlwinm  $acc15,$s0,`0`,24,31
1020         lbzx    $acc08,$Tbl2,$acc08
1021         rlwinm  $s0,$acc00,24,0,7
1022         lbzx    $acc09,$Tbl2,$acc09
1023         rlwinm  $s1,$acc01,24,0,7
1024         lbzx    $acc10,$Tbl2,$acc10
1025         rlwinm  $s2,$acc02,24,0,7
1026         lbzx    $acc11,$Tbl2,$acc11
1027         rlwinm  $s3,$acc03,24,0,7
1028         lbzx    $acc12,$Tbl2,$acc12
1029         rlwimi  $s0,$acc04,16,8,15
1030         lbzx    $acc13,$Tbl2,$acc13
1031         rlwimi  $s1,$acc05,16,8,15
1032         lbzx    $acc14,$Tbl2,$acc14
1033         rlwimi  $s2,$acc06,16,8,15
1034         lbzx    $acc15,$Tbl2,$acc15
1035         rlwimi  $s3,$acc07,16,8,15
1036         rlwimi  $s0,$acc08,8,16,23
1037         rlwimi  $s1,$acc09,8,16,23
1038         rlwimi  $s2,$acc10,8,16,23
1039         rlwimi  $s3,$acc11,8,16,23
1040         or      $s0,$s0,$acc12
1041         or      $s1,$s1,$acc13
1042         or      $s2,$s2,$acc14
1043         or      $s3,$s3,$acc15
1044         xor     $s0,$s0,$t0
1045         xor     $s1,$s1,$t1
1046         xor     $s2,$s2,$t2
1047         xor     $s3,$s3,$t3
1048         blr
1049         .long   0
1050         .byte   0,12,0x14,0,0,0,0,0
1051
1052 .align  4
1053 Lppc_AES_decrypt_compact:
1054         lwz     $acc00,240($key)
1055         addi    $Tbl1,$Tbl0,2048
1056         lwz     $t0,0($key)
1057         lis     $mask80,0x8080
1058         lwz     $t1,4($key)
1059         lis     $mask1b,0x1b1b
1060         lwz     $t2,8($key)
1061         ori     $mask80,$mask80,0x8080
1062         lwz     $t3,12($key)
1063         ori     $mask1b,$mask1b,0x1b1b
1064         addi    $key,$key,16
1065 ___
1066 $code.=<<___ if ($SIZE_T==8);
1067         insrdi  $mask80,$mask80,32,0
1068         insrdi  $mask1b,$mask1b,32,0
1069 ___
1070 $code.=<<___;
1071         mtctr   $acc00
1072 .align  4
1073 Ldec_compact_loop:
1074         xor     $s0,$s0,$t0
1075         xor     $s1,$s1,$t1
1076         rlwinm  $acc00,$s0,`32-24`,24,31
1077         xor     $s2,$s2,$t2
1078         rlwinm  $acc01,$s1,`32-24`,24,31
1079         xor     $s3,$s3,$t3
1080         rlwinm  $acc02,$s2,`32-24`,24,31
1081         rlwinm  $acc03,$s3,`32-24`,24,31
1082         rlwinm  $acc04,$s3,`32-16`,24,31
1083         rlwinm  $acc05,$s0,`32-16`,24,31
1084         rlwinm  $acc06,$s1,`32-16`,24,31
1085         rlwinm  $acc07,$s2,`32-16`,24,31
1086         lbzx    $acc00,$Tbl1,$acc00
1087         rlwinm  $acc08,$s2,`32-8`,24,31
1088         lbzx    $acc01,$Tbl1,$acc01
1089         rlwinm  $acc09,$s3,`32-8`,24,31
1090         lbzx    $acc02,$Tbl1,$acc02
1091         rlwinm  $acc10,$s0,`32-8`,24,31
1092         lbzx    $acc03,$Tbl1,$acc03
1093         rlwinm  $acc11,$s1,`32-8`,24,31
1094         lbzx    $acc04,$Tbl1,$acc04
1095         rlwinm  $acc12,$s1,`0`,24,31
1096         lbzx    $acc05,$Tbl1,$acc05
1097         rlwinm  $acc13,$s2,`0`,24,31
1098         lbzx    $acc06,$Tbl1,$acc06
1099         rlwinm  $acc14,$s3,`0`,24,31
1100         lbzx    $acc07,$Tbl1,$acc07
1101         rlwinm  $acc15,$s0,`0`,24,31
1102         lbzx    $acc08,$Tbl1,$acc08
1103         rlwinm  $s0,$acc00,24,0,7
1104         lbzx    $acc09,$Tbl1,$acc09
1105         rlwinm  $s1,$acc01,24,0,7
1106         lbzx    $acc10,$Tbl1,$acc10
1107         rlwinm  $s2,$acc02,24,0,7
1108         lbzx    $acc11,$Tbl1,$acc11
1109         rlwinm  $s3,$acc03,24,0,7
1110         lbzx    $acc12,$Tbl1,$acc12
1111         rlwimi  $s0,$acc04,16,8,15
1112         lbzx    $acc13,$Tbl1,$acc13
1113         rlwimi  $s1,$acc05,16,8,15
1114         lbzx    $acc14,$Tbl1,$acc14
1115         rlwimi  $s2,$acc06,16,8,15
1116         lbzx    $acc15,$Tbl1,$acc15
1117         rlwimi  $s3,$acc07,16,8,15
1118         rlwimi  $s0,$acc08,8,16,23
1119         rlwimi  $s1,$acc09,8,16,23
1120         rlwimi  $s2,$acc10,8,16,23
1121         rlwimi  $s3,$acc11,8,16,23
1122         lwz     $t0,0($key)
1123         or      $s0,$s0,$acc12
1124         lwz     $t1,4($key)
1125         or      $s1,$s1,$acc13
1126         lwz     $t2,8($key)
1127         or      $s2,$s2,$acc14
1128         lwz     $t3,12($key)
1129         or      $s3,$s3,$acc15
1130
1131         addi    $key,$key,16
1132         bdz     Ldec_compact_done
1133 ___
1134 $code.=<<___ if ($SIZE_T==8);
1135         # vectorized permutation improves decrypt performance by 10%
1136         insrdi  $s0,$s1,32,0
1137         insrdi  $s2,$s3,32,0
1138
1139         and     $acc00,$s0,$mask80      # r1=r0&0x80808080
1140         and     $acc02,$s2,$mask80
1141         srdi    $acc04,$acc00,7         # r1>>7
1142         srdi    $acc06,$acc02,7
1143         andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
1144         andc    $acc10,$s2,$mask80
1145         sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
1146         sub     $acc02,$acc02,$acc06
1147         add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
1148         add     $acc10,$acc10,$acc10
1149         and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1150         and     $acc02,$acc02,$mask1b
1151         xor     $acc00,$acc00,$acc08    # r2
1152         xor     $acc02,$acc02,$acc10
1153
1154         and     $acc04,$acc00,$mask80   # r1=r2&0x80808080
1155         and     $acc06,$acc02,$mask80
1156         srdi    $acc08,$acc04,7         # r1>>7
1157         srdi    $acc10,$acc06,7
1158         andc    $acc12,$acc00,$mask80   # r2&0x7f7f7f7f
1159         andc    $acc14,$acc02,$mask80
1160         sub     $acc04,$acc04,$acc08    # r1-(r1>>7)
1161         sub     $acc06,$acc06,$acc10
1162         add     $acc12,$acc12,$acc12    # (r2&0x7f7f7f7f)<<1
1163         add     $acc14,$acc14,$acc14
1164         and     $acc04,$acc04,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1165         and     $acc06,$acc06,$mask1b
1166         xor     $acc04,$acc04,$acc12    # r4
1167         xor     $acc06,$acc06,$acc14
1168
1169         and     $acc08,$acc04,$mask80   # r1=r4&0x80808080
1170         and     $acc10,$acc06,$mask80
1171         srdi    $acc12,$acc08,7         # r1>>7
1172         srdi    $acc14,$acc10,7
1173         sub     $acc08,$acc08,$acc12    # r1-(r1>>7)
1174         sub     $acc10,$acc10,$acc14
1175         andc    $acc12,$acc04,$mask80   # r4&0x7f7f7f7f
1176         andc    $acc14,$acc06,$mask80
1177         add     $acc12,$acc12,$acc12    # (r4&0x7f7f7f7f)<<1
1178         add     $acc14,$acc14,$acc14
1179         and     $acc08,$acc08,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1180         and     $acc10,$acc10,$mask1b
1181         xor     $acc08,$acc08,$acc12    # r8
1182         xor     $acc10,$acc10,$acc14
1183
1184         xor     $acc00,$acc00,$s0       # r2^r0
1185         xor     $acc02,$acc02,$s2
1186         xor     $acc04,$acc04,$s0       # r4^r0
1187         xor     $acc06,$acc06,$s2
1188
1189         extrdi  $acc01,$acc00,32,0
1190         extrdi  $acc03,$acc02,32,0
1191         extrdi  $acc05,$acc04,32,0
1192         extrdi  $acc07,$acc06,32,0
1193         extrdi  $acc09,$acc08,32,0
1194         extrdi  $acc11,$acc10,32,0
1195 ___
1196 $code.=<<___ if ($SIZE_T==4);
1197         and     $acc00,$s0,$mask80      # r1=r0&0x80808080
1198         and     $acc01,$s1,$mask80
1199         and     $acc02,$s2,$mask80
1200         and     $acc03,$s3,$mask80
1201         srwi    $acc04,$acc00,7         # r1>>7
1202         andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
1203         srwi    $acc05,$acc01,7
1204         andc    $acc09,$s1,$mask80
1205         srwi    $acc06,$acc02,7
1206         andc    $acc10,$s2,$mask80
1207         srwi    $acc07,$acc03,7
1208         andc    $acc11,$s3,$mask80
1209         sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
1210         sub     $acc01,$acc01,$acc05
1211         sub     $acc02,$acc02,$acc06
1212         sub     $acc03,$acc03,$acc07
1213         add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
1214         add     $acc09,$acc09,$acc09
1215         add     $acc10,$acc10,$acc10
1216         add     $acc11,$acc11,$acc11
1217         and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1218         and     $acc01,$acc01,$mask1b
1219         and     $acc02,$acc02,$mask1b
1220         and     $acc03,$acc03,$mask1b
1221         xor     $acc00,$acc00,$acc08    # r2
1222         xor     $acc01,$acc01,$acc09
1223         xor     $acc02,$acc02,$acc10
1224         xor     $acc03,$acc03,$acc11
1225
1226         and     $acc04,$acc00,$mask80   # r1=r2&0x80808080
1227         and     $acc05,$acc01,$mask80
1228         and     $acc06,$acc02,$mask80
1229         and     $acc07,$acc03,$mask80
1230         srwi    $acc08,$acc04,7         # r1>>7
1231         andc    $acc12,$acc00,$mask80   # r2&0x7f7f7f7f
1232         srwi    $acc09,$acc05,7
1233         andc    $acc13,$acc01,$mask80
1234         srwi    $acc10,$acc06,7
1235         andc    $acc14,$acc02,$mask80
1236         srwi    $acc11,$acc07,7
1237         andc    $acc15,$acc03,$mask80
1238         sub     $acc04,$acc04,$acc08    # r1-(r1>>7)
1239         sub     $acc05,$acc05,$acc09
1240         sub     $acc06,$acc06,$acc10
1241         sub     $acc07,$acc07,$acc11
1242         add     $acc12,$acc12,$acc12    # (r2&0x7f7f7f7f)<<1
1243         add     $acc13,$acc13,$acc13
1244         add     $acc14,$acc14,$acc14
1245         add     $acc15,$acc15,$acc15
1246         and     $acc04,$acc04,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1247         and     $acc05,$acc05,$mask1b
1248         and     $acc06,$acc06,$mask1b
1249         and     $acc07,$acc07,$mask1b
1250         xor     $acc04,$acc04,$acc12    # r4
1251         xor     $acc05,$acc05,$acc13
1252         xor     $acc06,$acc06,$acc14
1253         xor     $acc07,$acc07,$acc15
1254
1255         and     $acc08,$acc04,$mask80   # r1=r4&0x80808080
1256         and     $acc09,$acc05,$mask80
1257         srwi    $acc12,$acc08,7         # r1>>7
1258         and     $acc10,$acc06,$mask80
1259         srwi    $acc13,$acc09,7
1260         and     $acc11,$acc07,$mask80
1261         srwi    $acc14,$acc10,7
1262         sub     $acc08,$acc08,$acc12    # r1-(r1>>7)
1263         srwi    $acc15,$acc11,7
1264         sub     $acc09,$acc09,$acc13
1265         sub     $acc10,$acc10,$acc14
1266         sub     $acc11,$acc11,$acc15
1267         andc    $acc12,$acc04,$mask80   # r4&0x7f7f7f7f
1268         andc    $acc13,$acc05,$mask80
1269         andc    $acc14,$acc06,$mask80
1270         andc    $acc15,$acc07,$mask80
1271         add     $acc12,$acc12,$acc12    # (r4&0x7f7f7f7f)<<1
1272         add     $acc13,$acc13,$acc13
1273         add     $acc14,$acc14,$acc14
1274         add     $acc15,$acc15,$acc15
1275         and     $acc08,$acc08,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1276         and     $acc09,$acc09,$mask1b
1277         and     $acc10,$acc10,$mask1b
1278         and     $acc11,$acc11,$mask1b
1279         xor     $acc08,$acc08,$acc12    # r8
1280         xor     $acc09,$acc09,$acc13
1281         xor     $acc10,$acc10,$acc14
1282         xor     $acc11,$acc11,$acc15
1283
1284         xor     $acc00,$acc00,$s0       # r2^r0
1285         xor     $acc01,$acc01,$s1
1286         xor     $acc02,$acc02,$s2
1287         xor     $acc03,$acc03,$s3
1288         xor     $acc04,$acc04,$s0       # r4^r0
1289         xor     $acc05,$acc05,$s1
1290         xor     $acc06,$acc06,$s2
1291         xor     $acc07,$acc07,$s3
1292 ___
1293 $code.=<<___;
1294         rotrwi  $s0,$s0,8               # = ROTATE(r0,8)
1295         rotrwi  $s1,$s1,8
1296         xor     $s0,$s0,$acc00          # ^= r2^r0
1297         rotrwi  $s2,$s2,8
1298         xor     $s1,$s1,$acc01
1299         rotrwi  $s3,$s3,8
1300         xor     $s2,$s2,$acc02
1301         xor     $s3,$s3,$acc03
1302         xor     $acc00,$acc00,$acc08
1303         xor     $acc01,$acc01,$acc09
1304         xor     $acc02,$acc02,$acc10
1305         xor     $acc03,$acc03,$acc11
1306         xor     $s0,$s0,$acc04          # ^= r4^r0
1307         rotrwi  $acc00,$acc00,24
1308         xor     $s1,$s1,$acc05
1309         rotrwi  $acc01,$acc01,24
1310         xor     $s2,$s2,$acc06
1311         rotrwi  $acc02,$acc02,24
1312         xor     $s3,$s3,$acc07
1313         rotrwi  $acc03,$acc03,24
1314         xor     $acc04,$acc04,$acc08
1315         xor     $acc05,$acc05,$acc09
1316         xor     $acc06,$acc06,$acc10
1317         xor     $acc07,$acc07,$acc11
1318         xor     $s0,$s0,$acc08          # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
1319         rotrwi  $acc04,$acc04,16
1320         xor     $s1,$s1,$acc09
1321         rotrwi  $acc05,$acc05,16
1322         xor     $s2,$s2,$acc10
1323         rotrwi  $acc06,$acc06,16
1324         xor     $s3,$s3,$acc11
1325         rotrwi  $acc07,$acc07,16
1326         xor     $s0,$s0,$acc00          # ^= ROTATE(r8^r2^r0,24)
1327         rotrwi  $acc08,$acc08,8
1328         xor     $s1,$s1,$acc01
1329         rotrwi  $acc09,$acc09,8
1330         xor     $s2,$s2,$acc02
1331         rotrwi  $acc10,$acc10,8
1332         xor     $s3,$s3,$acc03
1333         rotrwi  $acc11,$acc11,8
1334         xor     $s0,$s0,$acc04          # ^= ROTATE(r8^r4^r0,16)
1335         xor     $s1,$s1,$acc05
1336         xor     $s2,$s2,$acc06
1337         xor     $s3,$s3,$acc07
1338         xor     $s0,$s0,$acc08          # ^= ROTATE(r8,8)       
1339         xor     $s1,$s1,$acc09  
1340         xor     $s2,$s2,$acc10  
1341         xor     $s3,$s3,$acc11  
1342
1343         b       Ldec_compact_loop
1344 .align  4
1345 Ldec_compact_done:
1346         xor     $s0,$s0,$t0
1347         xor     $s1,$s1,$t1
1348         xor     $s2,$s2,$t2
1349         xor     $s3,$s3,$t3
1350         blr
1351         .long   0
1352         .byte   0,12,0x14,0,0,0,0,0
1353
1354 .asciz  "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
1355 .align  7
1356 ___
1357
1358 $code =~ s/\`([^\`]*)\`/eval $1/gem;
1359 print $code;
1360 close STDOUT;