aes/asm/aes-ppc.pl: add little-endian support.
[openssl.git] / crypto / aes / asm / aes-ppc.pl
1 #!/usr/bin/env perl
2
3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9
10 # Needs more work: key setup, CBC routine...
11 #
12 # ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
13 # 128-bit key, which is ~40% better than 64-bit code generated by gcc
14 # 4.0. But these are not the ones currently used! Their "compact"
15 # counterparts are, for security reason. ppc_AES_encrypt_compact runs
16 # at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
17 # at 1/3 of ppc_AES_decrypt.
18
19 # February 2010
20 #
21 # Rescheduling instructions to favour Power6 pipeline gave 10%
22 # performance improvement on the platfrom in question (and marginal
23 # improvement even on others). It should be noted that Power6 fails
24 # to process byte in 18 cycles, only in 23, because it fails to issue
25 # 4 load instructions in two cycles, only in 3. As result non-compact
26 # block subroutines are 25% slower than one would expect. Compact
27 # functions scale better, because they have pure computational part,
28 # which scales perfectly with clock frequency. To be specific
29 # ppc_AES_encrypt_compact operates at 42 cycles per byte, while
30 # ppc_AES_decrypt_compact - at 55 (in 64-bit build).
31
32 $flavour = shift;
33
34 if ($flavour =~ /64/) {
35         $SIZE_T =8;
36         $LRSAVE =2*$SIZE_T;
37         $STU    ="stdu";
38         $POP    ="ld";
39         $PUSH   ="std";
40 } elsif ($flavour =~ /32/) {
41         $SIZE_T =4;
42         $LRSAVE =$SIZE_T;
43         $STU    ="stwu";
44         $POP    ="lwz";
45         $PUSH   ="stw";
46 } else { die "nonsense $flavour"; }
47
48 $LITTLE_ENDIAN=0;
49 if ($flavour =~ /le$/) {
50         die "little-endian is 64-bit only: $flavour" if ($SIZE_T == 4);
51         $LITTLE_ENDIAN=1;
52 }
53
54 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
55 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
56 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
57 die "can't locate ppc-xlate.pl";
58
59 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
60
61 $FRAME=32*$SIZE_T;
62
63 sub _data_word()
64 { my $i;
65     while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
66 }
67
68 $sp="r1";
69 $toc="r2";
70 $inp="r3";
71 $out="r4";
72 $key="r5";
73
74 $Tbl0="r3";
75 $Tbl1="r6";
76 $Tbl2="r7";
77 $Tbl3=$out;     # stay away from "r2"; $out is offloaded to stack
78
79 $s0="r8";
80 $s1="r9";
81 $s2="r10";
82 $s3="r11";
83
84 $t0="r12";
85 $t1="r0";       # stay away from "r13";
86 $t2="r14";
87 $t3="r15";
88
89 $acc00="r16";
90 $acc01="r17";
91 $acc02="r18";
92 $acc03="r19";
93
94 $acc04="r20";
95 $acc05="r21";
96 $acc06="r22";
97 $acc07="r23";
98
99 $acc08="r24";
100 $acc09="r25";
101 $acc10="r26";
102 $acc11="r27";
103
104 $acc12="r28";
105 $acc13="r29";
106 $acc14="r30";
107 $acc15="r31";
108
109 $mask80=$Tbl2;
110 $mask1b=$Tbl3;
111
112 $code.=<<___;
113 .machine        "any"
114 .text
115
116 .align  7
117 LAES_Te:
118         mflr    r0
119         bcl     20,31,\$+4
120         mflr    $Tbl0   ;    vvvvv "distance" between . and 1st data entry
121         addi    $Tbl0,$Tbl0,`128-8`
122         mtlr    r0
123         blr
124         .long   0
125         .byte   0,12,0x14,0,0,0,0,0
126         .space  `64-9*4`
127 LAES_Td:
128         mflr    r0
129         bcl     20,31,\$+4
130         mflr    $Tbl0   ;    vvvvvvvv "distance" between . and 1st data entry
131         addi    $Tbl0,$Tbl0,`128-64-8+2048+256`
132         mtlr    r0
133         blr
134         .long   0
135         .byte   0,12,0x14,0,0,0,0,0
136         .space  `128-64-9*4`
137 ___
138 &_data_word(
139         0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
140         0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
141         0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
142         0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
143         0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
144         0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
145         0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
146         0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
147         0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
148         0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
149         0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
150         0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
151         0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
152         0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
153         0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
154         0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
155         0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
156         0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
157         0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
158         0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
159         0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
160         0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
161         0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
162         0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
163         0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
164         0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
165         0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
166         0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
167         0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
168         0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
169         0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
170         0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
171         0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
172         0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
173         0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
174         0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
175         0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
176         0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
177         0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
178         0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
179         0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
180         0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
181         0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
182         0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
183         0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
184         0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
185         0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
186         0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
187         0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
188         0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
189         0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
190         0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
191         0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
192         0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
193         0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
194         0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
195         0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
196         0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
197         0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
198         0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
199         0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
200         0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
201         0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
202         0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
203 $code.=<<___;
204 .byte   0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
205 .byte   0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
206 .byte   0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
207 .byte   0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
208 .byte   0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
209 .byte   0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
210 .byte   0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
211 .byte   0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
212 .byte   0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
213 .byte   0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
214 .byte   0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
215 .byte   0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
216 .byte   0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
217 .byte   0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
218 .byte   0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
219 .byte   0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
220 .byte   0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
221 .byte   0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
222 .byte   0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
223 .byte   0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
224 .byte   0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
225 .byte   0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
226 .byte   0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
227 .byte   0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
228 .byte   0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
229 .byte   0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
230 .byte   0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
231 .byte   0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
232 .byte   0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
233 .byte   0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
234 .byte   0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
235 .byte   0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
236 ___
237 &_data_word(
238         0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
239         0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
240         0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
241         0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
242         0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
243         0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
244         0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
245         0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
246         0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
247         0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
248         0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
249         0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
250         0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
251         0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
252         0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
253         0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
254         0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
255         0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
256         0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
257         0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
258         0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
259         0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
260         0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
261         0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
262         0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
263         0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
264         0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
265         0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
266         0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
267         0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
268         0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
269         0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
270         0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
271         0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
272         0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
273         0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
274         0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
275         0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
276         0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
277         0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
278         0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
279         0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
280         0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
281         0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
282         0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
283         0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
284         0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
285         0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
286         0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
287         0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
288         0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
289         0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
290         0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
291         0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
292         0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
293         0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
294         0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
295         0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
296         0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
297         0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
298         0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
299         0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
300         0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
301         0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
302 $code.=<<___;
303 .byte   0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
304 .byte   0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
305 .byte   0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
306 .byte   0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
307 .byte   0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
308 .byte   0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
309 .byte   0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
310 .byte   0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
311 .byte   0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
312 .byte   0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
313 .byte   0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
314 .byte   0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
315 .byte   0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
316 .byte   0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
317 .byte   0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
318 .byte   0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
319 .byte   0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
320 .byte   0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
321 .byte   0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
322 .byte   0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
323 .byte   0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
324 .byte   0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
325 .byte   0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
326 .byte   0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
327 .byte   0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
328 .byte   0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
329 .byte   0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
330 .byte   0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
331 .byte   0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
332 .byte   0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
333 .byte   0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
334 .byte   0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
335
336
337 .globl  .AES_encrypt
338 .align  7
339 .AES_encrypt:
340         $STU    $sp,-$FRAME($sp)
341         mflr    r0
342
343         $PUSH   $out,`$FRAME-$SIZE_T*19`($sp)
344         $PUSH   r14,`$FRAME-$SIZE_T*18`($sp)
345         $PUSH   r15,`$FRAME-$SIZE_T*17`($sp)
346         $PUSH   r16,`$FRAME-$SIZE_T*16`($sp)
347         $PUSH   r17,`$FRAME-$SIZE_T*15`($sp)
348         $PUSH   r18,`$FRAME-$SIZE_T*14`($sp)
349         $PUSH   r19,`$FRAME-$SIZE_T*13`($sp)
350         $PUSH   r20,`$FRAME-$SIZE_T*12`($sp)
351         $PUSH   r21,`$FRAME-$SIZE_T*11`($sp)
352         $PUSH   r22,`$FRAME-$SIZE_T*10`($sp)
353         $PUSH   r23,`$FRAME-$SIZE_T*9`($sp)
354         $PUSH   r24,`$FRAME-$SIZE_T*8`($sp)
355         $PUSH   r25,`$FRAME-$SIZE_T*7`($sp)
356         $PUSH   r26,`$FRAME-$SIZE_T*6`($sp)
357         $PUSH   r27,`$FRAME-$SIZE_T*5`($sp)
358         $PUSH   r28,`$FRAME-$SIZE_T*4`($sp)
359         $PUSH   r29,`$FRAME-$SIZE_T*3`($sp)
360         $PUSH   r30,`$FRAME-$SIZE_T*2`($sp)
361         $PUSH   r31,`$FRAME-$SIZE_T*1`($sp)
362         $PUSH   r0,`$FRAME+$LRSAVE`($sp)
363
364         andi.   $t0,$inp,3
365         andi.   $t1,$out,3
366         or.     $t0,$t0,$t1
367         bne     Lenc_unaligned
368
369 Lenc_unaligned_ok:
370 ___
371 $code.=<<___ if (!$LITTLE_ENDIAN);
372         lwz     $s0,0($inp)
373         lwz     $s1,4($inp)
374         lwz     $s2,8($inp)
375         lwz     $s3,12($inp)
376 ___
377 $code.=<<___ if ($LITTLE_ENDIAN);
378         lwz     $t0,0($inp)
379         lwz     $t1,4($inp)
380         lwz     $t2,8($inp)
381         lwz     $t3,12($inp)
382         rotlwi  $s0,$t0,8
383         rotlwi  $s1,$t1,8
384         rotlwi  $s2,$t2,8
385         rotlwi  $s3,$t3,8
386         rlwimi  $s0,$t0,24,0,7
387         rlwimi  $s1,$t1,24,0,7
388         rlwimi  $s2,$t2,24,0,7
389         rlwimi  $s3,$t3,24,0,7
390         rlwimi  $s0,$t0,24,16,23
391         rlwimi  $s1,$t1,24,16,23
392         rlwimi  $s2,$t2,24,16,23
393         rlwimi  $s3,$t3,24,16,23
394 ___
395 $code.=<<___;
396         bl      LAES_Te
397         bl      Lppc_AES_encrypt_compact
398         $POP    $out,`$FRAME-$SIZE_T*19`($sp)
399 ___
400 $code.=<<___ if ($LITTLE_ENDIAN);
401         rotlwi  $t0,$s0,8
402         rotlwi  $t1,$s1,8
403         rotlwi  $t2,$s2,8
404         rotlwi  $t3,$s3,8
405         rlwimi  $t0,$s0,24,0,7
406         rlwimi  $t1,$s1,24,0,7
407         rlwimi  $t2,$s2,24,0,7
408         rlwimi  $t3,$s3,24,0,7
409         rlwimi  $t0,$s0,24,16,23
410         rlwimi  $t1,$s1,24,16,23
411         rlwimi  $t2,$s2,24,16,23
412         rlwimi  $t3,$s3,24,16,23
413         stw     $t0,0($out)
414         stw     $t1,4($out)
415         stw     $t2,8($out)
416         stw     $t3,12($out)
417 ___
418 $code.=<<___ if (!$LITTLE_ENDIAN);
419         stw     $s0,0($out)
420         stw     $s1,4($out)
421         stw     $s2,8($out)
422         stw     $s3,12($out)
423 ___
424 $code.=<<___;
425         b       Lenc_done
426
427 Lenc_unaligned:
428         subfic  $t0,$inp,4096
429         subfic  $t1,$out,4096
430         andi.   $t0,$t0,4096-16
431         beq     Lenc_xpage
432         andi.   $t1,$t1,4096-16
433         bne     Lenc_unaligned_ok
434
435 Lenc_xpage:
436         lbz     $acc00,0($inp)
437         lbz     $acc01,1($inp)
438         lbz     $acc02,2($inp)
439         lbz     $s0,3($inp)
440         lbz     $acc04,4($inp)
441         lbz     $acc05,5($inp)
442         lbz     $acc06,6($inp)
443         lbz     $s1,7($inp)
444         lbz     $acc08,8($inp)
445         lbz     $acc09,9($inp)
446         lbz     $acc10,10($inp)
447         insrwi  $s0,$acc00,8,0
448         lbz     $s2,11($inp)
449         insrwi  $s1,$acc04,8,0
450         lbz     $acc12,12($inp)
451         insrwi  $s0,$acc01,8,8
452         lbz     $acc13,13($inp)
453         insrwi  $s1,$acc05,8,8
454         lbz     $acc14,14($inp)
455         insrwi  $s0,$acc02,8,16
456         lbz     $s3,15($inp)
457         insrwi  $s1,$acc06,8,16
458         insrwi  $s2,$acc08,8,0
459         insrwi  $s3,$acc12,8,0
460         insrwi  $s2,$acc09,8,8
461         insrwi  $s3,$acc13,8,8
462         insrwi  $s2,$acc10,8,16
463         insrwi  $s3,$acc14,8,16
464
465         bl      LAES_Te
466         bl      Lppc_AES_encrypt_compact
467         $POP    $out,`$FRAME-$SIZE_T*19`($sp)
468
469         extrwi  $acc00,$s0,8,0
470         extrwi  $acc01,$s0,8,8
471         stb     $acc00,0($out)
472         extrwi  $acc02,$s0,8,16
473         stb     $acc01,1($out)
474         stb     $acc02,2($out)
475         extrwi  $acc04,$s1,8,0
476         stb     $s0,3($out)
477         extrwi  $acc05,$s1,8,8
478         stb     $acc04,4($out)
479         extrwi  $acc06,$s1,8,16
480         stb     $acc05,5($out)
481         stb     $acc06,6($out)
482         extrwi  $acc08,$s2,8,0
483         stb     $s1,7($out)
484         extrwi  $acc09,$s2,8,8
485         stb     $acc08,8($out)
486         extrwi  $acc10,$s2,8,16
487         stb     $acc09,9($out)
488         stb     $acc10,10($out)
489         extrwi  $acc12,$s3,8,0
490         stb     $s2,11($out)
491         extrwi  $acc13,$s3,8,8
492         stb     $acc12,12($out)
493         extrwi  $acc14,$s3,8,16
494         stb     $acc13,13($out)
495         stb     $acc14,14($out)
496         stb     $s3,15($out)
497
498 Lenc_done:
499         $POP    r0,`$FRAME+$LRSAVE`($sp)
500         $POP    r14,`$FRAME-$SIZE_T*18`($sp)
501         $POP    r15,`$FRAME-$SIZE_T*17`($sp)
502         $POP    r16,`$FRAME-$SIZE_T*16`($sp)
503         $POP    r17,`$FRAME-$SIZE_T*15`($sp)
504         $POP    r18,`$FRAME-$SIZE_T*14`($sp)
505         $POP    r19,`$FRAME-$SIZE_T*13`($sp)
506         $POP    r20,`$FRAME-$SIZE_T*12`($sp)
507         $POP    r21,`$FRAME-$SIZE_T*11`($sp)
508         $POP    r22,`$FRAME-$SIZE_T*10`($sp)
509         $POP    r23,`$FRAME-$SIZE_T*9`($sp)
510         $POP    r24,`$FRAME-$SIZE_T*8`($sp)
511         $POP    r25,`$FRAME-$SIZE_T*7`($sp)
512         $POP    r26,`$FRAME-$SIZE_T*6`($sp)
513         $POP    r27,`$FRAME-$SIZE_T*5`($sp)
514         $POP    r28,`$FRAME-$SIZE_T*4`($sp)
515         $POP    r29,`$FRAME-$SIZE_T*3`($sp)
516         $POP    r30,`$FRAME-$SIZE_T*2`($sp)
517         $POP    r31,`$FRAME-$SIZE_T*1`($sp)
518         mtlr    r0
519         addi    $sp,$sp,$FRAME
520         blr
521         .long   0
522         .byte   0,12,4,1,0x80,18,3,0
523         .long   0
524
525 .align  5
526 Lppc_AES_encrypt:
527         lwz     $acc00,240($key)
528         addi    $Tbl1,$Tbl0,3
529         lwz     $t0,0($key)
530         addi    $Tbl2,$Tbl0,2
531         lwz     $t1,4($key)
532         addi    $Tbl3,$Tbl0,1
533         lwz     $t2,8($key)
534         addi    $acc00,$acc00,-1
535         lwz     $t3,12($key)
536         addi    $key,$key,16
537         xor     $s0,$s0,$t0
538         xor     $s1,$s1,$t1
539         xor     $s2,$s2,$t2
540         xor     $s3,$s3,$t3
541         mtctr   $acc00
542 .align  4
543 Lenc_loop:
544         rlwinm  $acc00,$s0,`32-24+3`,21,28
545         rlwinm  $acc01,$s1,`32-24+3`,21,28
546         rlwinm  $acc02,$s2,`32-24+3`,21,28
547         rlwinm  $acc03,$s3,`32-24+3`,21,28
548         lwz     $t0,0($key)
549         rlwinm  $acc04,$s1,`32-16+3`,21,28
550         lwz     $t1,4($key)
551         rlwinm  $acc05,$s2,`32-16+3`,21,28
552         lwz     $t2,8($key)
553         rlwinm  $acc06,$s3,`32-16+3`,21,28
554         lwz     $t3,12($key)
555         rlwinm  $acc07,$s0,`32-16+3`,21,28
556         lwzx    $acc00,$Tbl0,$acc00
557         rlwinm  $acc08,$s2,`32-8+3`,21,28
558         lwzx    $acc01,$Tbl0,$acc01
559         rlwinm  $acc09,$s3,`32-8+3`,21,28
560         lwzx    $acc02,$Tbl0,$acc02
561         rlwinm  $acc10,$s0,`32-8+3`,21,28
562         lwzx    $acc03,$Tbl0,$acc03
563         rlwinm  $acc11,$s1,`32-8+3`,21,28
564         lwzx    $acc04,$Tbl1,$acc04
565         rlwinm  $acc12,$s3,`0+3`,21,28
566         lwzx    $acc05,$Tbl1,$acc05
567         rlwinm  $acc13,$s0,`0+3`,21,28
568         lwzx    $acc06,$Tbl1,$acc06
569         rlwinm  $acc14,$s1,`0+3`,21,28
570         lwzx    $acc07,$Tbl1,$acc07
571         rlwinm  $acc15,$s2,`0+3`,21,28
572         lwzx    $acc08,$Tbl2,$acc08
573         xor     $t0,$t0,$acc00
574         lwzx    $acc09,$Tbl2,$acc09
575         xor     $t1,$t1,$acc01
576         lwzx    $acc10,$Tbl2,$acc10
577         xor     $t2,$t2,$acc02
578         lwzx    $acc11,$Tbl2,$acc11
579         xor     $t3,$t3,$acc03
580         lwzx    $acc12,$Tbl3,$acc12
581         xor     $t0,$t0,$acc04
582         lwzx    $acc13,$Tbl3,$acc13
583         xor     $t1,$t1,$acc05
584         lwzx    $acc14,$Tbl3,$acc14
585         xor     $t2,$t2,$acc06
586         lwzx    $acc15,$Tbl3,$acc15
587         xor     $t3,$t3,$acc07
588         xor     $t0,$t0,$acc08
589         xor     $t1,$t1,$acc09
590         xor     $t2,$t2,$acc10
591         xor     $t3,$t3,$acc11
592         xor     $s0,$t0,$acc12
593         xor     $s1,$t1,$acc13
594         xor     $s2,$t2,$acc14
595         xor     $s3,$t3,$acc15
596         addi    $key,$key,16
597         bdnz-   Lenc_loop
598
599         addi    $Tbl2,$Tbl0,2048
600         nop
601         lwz     $t0,0($key)
602         rlwinm  $acc00,$s0,`32-24`,24,31
603         lwz     $t1,4($key)
604         rlwinm  $acc01,$s1,`32-24`,24,31
605         lwz     $t2,8($key)
606         rlwinm  $acc02,$s2,`32-24`,24,31
607         lwz     $t3,12($key)
608         rlwinm  $acc03,$s3,`32-24`,24,31
609         lwz     $acc08,`2048+0`($Tbl0)  ! prefetch Te4
610         rlwinm  $acc04,$s1,`32-16`,24,31
611         lwz     $acc09,`2048+32`($Tbl0)
612         rlwinm  $acc05,$s2,`32-16`,24,31
613         lwz     $acc10,`2048+64`($Tbl0)
614         rlwinm  $acc06,$s3,`32-16`,24,31
615         lwz     $acc11,`2048+96`($Tbl0)
616         rlwinm  $acc07,$s0,`32-16`,24,31
617         lwz     $acc12,`2048+128`($Tbl0)
618         rlwinm  $acc08,$s2,`32-8`,24,31
619         lwz     $acc13,`2048+160`($Tbl0)
620         rlwinm  $acc09,$s3,`32-8`,24,31
621         lwz     $acc14,`2048+192`($Tbl0)
622         rlwinm  $acc10,$s0,`32-8`,24,31
623         lwz     $acc15,`2048+224`($Tbl0)
624         rlwinm  $acc11,$s1,`32-8`,24,31
625         lbzx    $acc00,$Tbl2,$acc00
626         rlwinm  $acc12,$s3,`0`,24,31
627         lbzx    $acc01,$Tbl2,$acc01
628         rlwinm  $acc13,$s0,`0`,24,31
629         lbzx    $acc02,$Tbl2,$acc02
630         rlwinm  $acc14,$s1,`0`,24,31
631         lbzx    $acc03,$Tbl2,$acc03
632         rlwinm  $acc15,$s2,`0`,24,31
633         lbzx    $acc04,$Tbl2,$acc04
634         rlwinm  $s0,$acc00,24,0,7
635         lbzx    $acc05,$Tbl2,$acc05
636         rlwinm  $s1,$acc01,24,0,7
637         lbzx    $acc06,$Tbl2,$acc06
638         rlwinm  $s2,$acc02,24,0,7
639         lbzx    $acc07,$Tbl2,$acc07
640         rlwinm  $s3,$acc03,24,0,7
641         lbzx    $acc08,$Tbl2,$acc08
642         rlwimi  $s0,$acc04,16,8,15
643         lbzx    $acc09,$Tbl2,$acc09
644         rlwimi  $s1,$acc05,16,8,15
645         lbzx    $acc10,$Tbl2,$acc10
646         rlwimi  $s2,$acc06,16,8,15
647         lbzx    $acc11,$Tbl2,$acc11
648         rlwimi  $s3,$acc07,16,8,15
649         lbzx    $acc12,$Tbl2,$acc12
650         rlwimi  $s0,$acc08,8,16,23
651         lbzx    $acc13,$Tbl2,$acc13
652         rlwimi  $s1,$acc09,8,16,23
653         lbzx    $acc14,$Tbl2,$acc14
654         rlwimi  $s2,$acc10,8,16,23
655         lbzx    $acc15,$Tbl2,$acc15
656         rlwimi  $s3,$acc11,8,16,23
657         or      $s0,$s0,$acc12
658         or      $s1,$s1,$acc13
659         or      $s2,$s2,$acc14
660         or      $s3,$s3,$acc15
661         xor     $s0,$s0,$t0
662         xor     $s1,$s1,$t1
663         xor     $s2,$s2,$t2
664         xor     $s3,$s3,$t3
665         blr
666         .long   0
667         .byte   0,12,0x14,0,0,0,0,0
668
669 .align  4
670 Lppc_AES_encrypt_compact:
671         lwz     $acc00,240($key)
672         addi    $Tbl1,$Tbl0,2048
673         lwz     $t0,0($key)
674         lis     $mask80,0x8080
675         lwz     $t1,4($key)
676         lis     $mask1b,0x1b1b
677         lwz     $t2,8($key)
678         ori     $mask80,$mask80,0x8080
679         lwz     $t3,12($key)
680         ori     $mask1b,$mask1b,0x1b1b
681         addi    $key,$key,16
682         mtctr   $acc00
683 .align  4
684 Lenc_compact_loop:
685         xor     $s0,$s0,$t0
686         xor     $s1,$s1,$t1
687         rlwinm  $acc00,$s0,`32-24`,24,31
688         xor     $s2,$s2,$t2
689         rlwinm  $acc01,$s1,`32-24`,24,31
690         xor     $s3,$s3,$t3
691         rlwinm  $acc02,$s2,`32-24`,24,31
692         rlwinm  $acc03,$s3,`32-24`,24,31
693         rlwinm  $acc04,$s1,`32-16`,24,31
694         rlwinm  $acc05,$s2,`32-16`,24,31
695         rlwinm  $acc06,$s3,`32-16`,24,31
696         rlwinm  $acc07,$s0,`32-16`,24,31
697         lbzx    $acc00,$Tbl1,$acc00
698         rlwinm  $acc08,$s2,`32-8`,24,31
699         lbzx    $acc01,$Tbl1,$acc01
700         rlwinm  $acc09,$s3,`32-8`,24,31
701         lbzx    $acc02,$Tbl1,$acc02
702         rlwinm  $acc10,$s0,`32-8`,24,31
703         lbzx    $acc03,$Tbl1,$acc03
704         rlwinm  $acc11,$s1,`32-8`,24,31
705         lbzx    $acc04,$Tbl1,$acc04
706         rlwinm  $acc12,$s3,`0`,24,31
707         lbzx    $acc05,$Tbl1,$acc05
708         rlwinm  $acc13,$s0,`0`,24,31
709         lbzx    $acc06,$Tbl1,$acc06
710         rlwinm  $acc14,$s1,`0`,24,31
711         lbzx    $acc07,$Tbl1,$acc07
712         rlwinm  $acc15,$s2,`0`,24,31
713         lbzx    $acc08,$Tbl1,$acc08
714         rlwinm  $s0,$acc00,24,0,7
715         lbzx    $acc09,$Tbl1,$acc09
716         rlwinm  $s1,$acc01,24,0,7
717         lbzx    $acc10,$Tbl1,$acc10
718         rlwinm  $s2,$acc02,24,0,7
719         lbzx    $acc11,$Tbl1,$acc11
720         rlwinm  $s3,$acc03,24,0,7
721         lbzx    $acc12,$Tbl1,$acc12
722         rlwimi  $s0,$acc04,16,8,15
723         lbzx    $acc13,$Tbl1,$acc13
724         rlwimi  $s1,$acc05,16,8,15
725         lbzx    $acc14,$Tbl1,$acc14
726         rlwimi  $s2,$acc06,16,8,15
727         lbzx    $acc15,$Tbl1,$acc15
728         rlwimi  $s3,$acc07,16,8,15
729         rlwimi  $s0,$acc08,8,16,23
730         rlwimi  $s1,$acc09,8,16,23
731         rlwimi  $s2,$acc10,8,16,23
732         rlwimi  $s3,$acc11,8,16,23
733         lwz     $t0,0($key)
734         or      $s0,$s0,$acc12
735         lwz     $t1,4($key)
736         or      $s1,$s1,$acc13
737         lwz     $t2,8($key)
738         or      $s2,$s2,$acc14
739         lwz     $t3,12($key)
740         or      $s3,$s3,$acc15
741
742         addi    $key,$key,16
743         bdz     Lenc_compact_done
744
745         and     $acc00,$s0,$mask80      # r1=r0&0x80808080
746         and     $acc01,$s1,$mask80
747         and     $acc02,$s2,$mask80
748         and     $acc03,$s3,$mask80
749         srwi    $acc04,$acc00,7         # r1>>7
750         andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
751         srwi    $acc05,$acc01,7
752         andc    $acc09,$s1,$mask80
753         srwi    $acc06,$acc02,7
754         andc    $acc10,$s2,$mask80
755         srwi    $acc07,$acc03,7
756         andc    $acc11,$s3,$mask80
757         sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
758         sub     $acc01,$acc01,$acc05
759         sub     $acc02,$acc02,$acc06
760         sub     $acc03,$acc03,$acc07
761         add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
762         add     $acc09,$acc09,$acc09
763         add     $acc10,$acc10,$acc10
764         add     $acc11,$acc11,$acc11
765         and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
766         and     $acc01,$acc01,$mask1b
767         and     $acc02,$acc02,$mask1b
768         and     $acc03,$acc03,$mask1b
769         xor     $acc00,$acc00,$acc08    # r2
770         xor     $acc01,$acc01,$acc09
771          rotlwi $acc12,$s0,16           # ROTATE(r0,16)
772         xor     $acc02,$acc02,$acc10
773          rotlwi $acc13,$s1,16
774         xor     $acc03,$acc03,$acc11
775          rotlwi $acc14,$s2,16
776
777         xor     $s0,$s0,$acc00          # r0^r2
778         rotlwi  $acc15,$s3,16
779         xor     $s1,$s1,$acc01
780         rotrwi  $s0,$s0,24              # ROTATE(r2^r0,24)
781         xor     $s2,$s2,$acc02
782         rotrwi  $s1,$s1,24
783         xor     $s3,$s3,$acc03
784         rotrwi  $s2,$s2,24
785         xor     $s0,$s0,$acc00          # ROTATE(r2^r0,24)^r2
786         rotrwi  $s3,$s3,24
787         xor     $s1,$s1,$acc01
788         xor     $s2,$s2,$acc02
789         xor     $s3,$s3,$acc03
790         rotlwi  $acc08,$acc12,8         # ROTATE(r0,24)
791         xor     $s0,$s0,$acc12          #
792         rotlwi  $acc09,$acc13,8
793         xor     $s1,$s1,$acc13
794         rotlwi  $acc10,$acc14,8
795         xor     $s2,$s2,$acc14
796         rotlwi  $acc11,$acc15,8
797         xor     $s3,$s3,$acc15
798         xor     $s0,$s0,$acc08          #
799         xor     $s1,$s1,$acc09
800         xor     $s2,$s2,$acc10
801         xor     $s3,$s3,$acc11
802
803         b       Lenc_compact_loop
804 .align  4
805 Lenc_compact_done:
806         xor     $s0,$s0,$t0
807         xor     $s1,$s1,$t1
808         xor     $s2,$s2,$t2
809         xor     $s3,$s3,$t3
810         blr
811         .long   0
812         .byte   0,12,0x14,0,0,0,0,0
813 .size   .AES_encrypt,.-.AES_encrypt
814
815 .globl  .AES_decrypt
816 .align  7
817 .AES_decrypt:
818         $STU    $sp,-$FRAME($sp)
819         mflr    r0
820
821         $PUSH   $out,`$FRAME-$SIZE_T*19`($sp)
822         $PUSH   r14,`$FRAME-$SIZE_T*18`($sp)
823         $PUSH   r15,`$FRAME-$SIZE_T*17`($sp)
824         $PUSH   r16,`$FRAME-$SIZE_T*16`($sp)
825         $PUSH   r17,`$FRAME-$SIZE_T*15`($sp)
826         $PUSH   r18,`$FRAME-$SIZE_T*14`($sp)
827         $PUSH   r19,`$FRAME-$SIZE_T*13`($sp)
828         $PUSH   r20,`$FRAME-$SIZE_T*12`($sp)
829         $PUSH   r21,`$FRAME-$SIZE_T*11`($sp)
830         $PUSH   r22,`$FRAME-$SIZE_T*10`($sp)
831         $PUSH   r23,`$FRAME-$SIZE_T*9`($sp)
832         $PUSH   r24,`$FRAME-$SIZE_T*8`($sp)
833         $PUSH   r25,`$FRAME-$SIZE_T*7`($sp)
834         $PUSH   r26,`$FRAME-$SIZE_T*6`($sp)
835         $PUSH   r27,`$FRAME-$SIZE_T*5`($sp)
836         $PUSH   r28,`$FRAME-$SIZE_T*4`($sp)
837         $PUSH   r29,`$FRAME-$SIZE_T*3`($sp)
838         $PUSH   r30,`$FRAME-$SIZE_T*2`($sp)
839         $PUSH   r31,`$FRAME-$SIZE_T*1`($sp)
840         $PUSH   r0,`$FRAME+$LRSAVE`($sp)
841
842         andi.   $t0,$inp,3
843         andi.   $t1,$out,3
844         or.     $t0,$t0,$t1
845         bne     Ldec_unaligned
846
847 Ldec_unaligned_ok:
848 ___
849 $code.=<<___ if (!$LITTLE_ENDIAN);
850         lwz     $s0,0($inp)
851         lwz     $s1,4($inp)
852         lwz     $s2,8($inp)
853         lwz     $s3,12($inp)
854 ___
855 $code.=<<___ if ($LITTLE_ENDIAN);
856         lwz     $t0,0($inp)
857         lwz     $t1,4($inp)
858         lwz     $t2,8($inp)
859         lwz     $t3,12($inp)
860         rotlwi  $s0,$t0,8
861         rotlwi  $s1,$t1,8
862         rotlwi  $s2,$t2,8
863         rotlwi  $s3,$t3,8
864         rlwimi  $s0,$t0,24,0,7
865         rlwimi  $s1,$t1,24,0,7
866         rlwimi  $s2,$t2,24,0,7
867         rlwimi  $s3,$t3,24,0,7
868         rlwimi  $s0,$t0,24,16,23
869         rlwimi  $s1,$t1,24,16,23
870         rlwimi  $s2,$t2,24,16,23
871         rlwimi  $s3,$t3,24,16,23
872 ___
873 $code.=<<___;
874         bl      LAES_Td
875         bl      Lppc_AES_decrypt_compact
876         $POP    $out,`$FRAME-$SIZE_T*19`($sp)
877 ___
878 $code.=<<___ if ($LITTLE_ENDIAN);
879         rotlwi  $t0,$s0,8
880         rotlwi  $t1,$s1,8
881         rotlwi  $t2,$s2,8
882         rotlwi  $t3,$s3,8
883         rlwimi  $t0,$s0,24,0,7
884         rlwimi  $t1,$s1,24,0,7
885         rlwimi  $t2,$s2,24,0,7
886         rlwimi  $t3,$s3,24,0,7
887         rlwimi  $t0,$s0,24,16,23
888         rlwimi  $t1,$s1,24,16,23
889         rlwimi  $t2,$s2,24,16,23
890         rlwimi  $t3,$s3,24,16,23
891         stw     $t0,0($out)
892         stw     $t1,4($out)
893         stw     $t2,8($out)
894         stw     $t3,12($out)
895 ___
896 $code.=<<___ if (!$LITTLE_ENDIAN);
897         stw     $s0,0($out)
898         stw     $s1,4($out)
899         stw     $s2,8($out)
900         stw     $s3,12($out)
901 ___
902 $code.=<<___;
903         b       Ldec_done
904
905 Ldec_unaligned:
906         subfic  $t0,$inp,4096
907         subfic  $t1,$out,4096
908         andi.   $t0,$t0,4096-16
909         beq     Ldec_xpage
910         andi.   $t1,$t1,4096-16
911         bne     Ldec_unaligned_ok
912
913 Ldec_xpage:
914         lbz     $acc00,0($inp)
915         lbz     $acc01,1($inp)
916         lbz     $acc02,2($inp)
917         lbz     $s0,3($inp)
918         lbz     $acc04,4($inp)
919         lbz     $acc05,5($inp)
920         lbz     $acc06,6($inp)
921         lbz     $s1,7($inp)
922         lbz     $acc08,8($inp)
923         lbz     $acc09,9($inp)
924         lbz     $acc10,10($inp)
925         insrwi  $s0,$acc00,8,0
926         lbz     $s2,11($inp)
927         insrwi  $s1,$acc04,8,0
928         lbz     $acc12,12($inp)
929         insrwi  $s0,$acc01,8,8
930         lbz     $acc13,13($inp)
931         insrwi  $s1,$acc05,8,8
932         lbz     $acc14,14($inp)
933         insrwi  $s0,$acc02,8,16
934         lbz     $s3,15($inp)
935         insrwi  $s1,$acc06,8,16
936         insrwi  $s2,$acc08,8,0
937         insrwi  $s3,$acc12,8,0
938         insrwi  $s2,$acc09,8,8
939         insrwi  $s3,$acc13,8,8
940         insrwi  $s2,$acc10,8,16
941         insrwi  $s3,$acc14,8,16
942
943         bl      LAES_Td
944         bl      Lppc_AES_decrypt_compact
945         $POP    $out,`$FRAME-$SIZE_T*19`($sp)
946
947         extrwi  $acc00,$s0,8,0
948         extrwi  $acc01,$s0,8,8
949         stb     $acc00,0($out)
950         extrwi  $acc02,$s0,8,16
951         stb     $acc01,1($out)
952         stb     $acc02,2($out)
953         extrwi  $acc04,$s1,8,0
954         stb     $s0,3($out)
955         extrwi  $acc05,$s1,8,8
956         stb     $acc04,4($out)
957         extrwi  $acc06,$s1,8,16
958         stb     $acc05,5($out)
959         stb     $acc06,6($out)
960         extrwi  $acc08,$s2,8,0
961         stb     $s1,7($out)
962         extrwi  $acc09,$s2,8,8
963         stb     $acc08,8($out)
964         extrwi  $acc10,$s2,8,16
965         stb     $acc09,9($out)
966         stb     $acc10,10($out)
967         extrwi  $acc12,$s3,8,0
968         stb     $s2,11($out)
969         extrwi  $acc13,$s3,8,8
970         stb     $acc12,12($out)
971         extrwi  $acc14,$s3,8,16
972         stb     $acc13,13($out)
973         stb     $acc14,14($out)
974         stb     $s3,15($out)
975
976 Ldec_done:
977         $POP    r0,`$FRAME+$LRSAVE`($sp)
978         $POP    r14,`$FRAME-$SIZE_T*18`($sp)
979         $POP    r15,`$FRAME-$SIZE_T*17`($sp)
980         $POP    r16,`$FRAME-$SIZE_T*16`($sp)
981         $POP    r17,`$FRAME-$SIZE_T*15`($sp)
982         $POP    r18,`$FRAME-$SIZE_T*14`($sp)
983         $POP    r19,`$FRAME-$SIZE_T*13`($sp)
984         $POP    r20,`$FRAME-$SIZE_T*12`($sp)
985         $POP    r21,`$FRAME-$SIZE_T*11`($sp)
986         $POP    r22,`$FRAME-$SIZE_T*10`($sp)
987         $POP    r23,`$FRAME-$SIZE_T*9`($sp)
988         $POP    r24,`$FRAME-$SIZE_T*8`($sp)
989         $POP    r25,`$FRAME-$SIZE_T*7`($sp)
990         $POP    r26,`$FRAME-$SIZE_T*6`($sp)
991         $POP    r27,`$FRAME-$SIZE_T*5`($sp)
992         $POP    r28,`$FRAME-$SIZE_T*4`($sp)
993         $POP    r29,`$FRAME-$SIZE_T*3`($sp)
994         $POP    r30,`$FRAME-$SIZE_T*2`($sp)
995         $POP    r31,`$FRAME-$SIZE_T*1`($sp)
996         mtlr    r0
997         addi    $sp,$sp,$FRAME
998         blr
999         .long   0
1000         .byte   0,12,4,1,0x80,18,3,0
1001         .long   0
1002
1003 .align  5
1004 Lppc_AES_decrypt:
1005         lwz     $acc00,240($key)
1006         addi    $Tbl1,$Tbl0,3
1007         lwz     $t0,0($key)
1008         addi    $Tbl2,$Tbl0,2
1009         lwz     $t1,4($key)
1010         addi    $Tbl3,$Tbl0,1
1011         lwz     $t2,8($key)
1012         addi    $acc00,$acc00,-1
1013         lwz     $t3,12($key)
1014         addi    $key,$key,16
1015         xor     $s0,$s0,$t0
1016         xor     $s1,$s1,$t1
1017         xor     $s2,$s2,$t2
1018         xor     $s3,$s3,$t3
1019         mtctr   $acc00
1020 .align  4
1021 Ldec_loop:
1022         rlwinm  $acc00,$s0,`32-24+3`,21,28
1023         rlwinm  $acc01,$s1,`32-24+3`,21,28
1024         rlwinm  $acc02,$s2,`32-24+3`,21,28
1025         rlwinm  $acc03,$s3,`32-24+3`,21,28
1026         lwz     $t0,0($key)
1027         rlwinm  $acc04,$s3,`32-16+3`,21,28
1028         lwz     $t1,4($key)
1029         rlwinm  $acc05,$s0,`32-16+3`,21,28
1030         lwz     $t2,8($key)
1031         rlwinm  $acc06,$s1,`32-16+3`,21,28
1032         lwz     $t3,12($key)
1033         rlwinm  $acc07,$s2,`32-16+3`,21,28
1034         lwzx    $acc00,$Tbl0,$acc00
1035         rlwinm  $acc08,$s2,`32-8+3`,21,28
1036         lwzx    $acc01,$Tbl0,$acc01
1037         rlwinm  $acc09,$s3,`32-8+3`,21,28
1038         lwzx    $acc02,$Tbl0,$acc02
1039         rlwinm  $acc10,$s0,`32-8+3`,21,28
1040         lwzx    $acc03,$Tbl0,$acc03
1041         rlwinm  $acc11,$s1,`32-8+3`,21,28
1042         lwzx    $acc04,$Tbl1,$acc04
1043         rlwinm  $acc12,$s1,`0+3`,21,28
1044         lwzx    $acc05,$Tbl1,$acc05
1045         rlwinm  $acc13,$s2,`0+3`,21,28
1046         lwzx    $acc06,$Tbl1,$acc06
1047         rlwinm  $acc14,$s3,`0+3`,21,28
1048         lwzx    $acc07,$Tbl1,$acc07
1049         rlwinm  $acc15,$s0,`0+3`,21,28
1050         lwzx    $acc08,$Tbl2,$acc08
1051         xor     $t0,$t0,$acc00
1052         lwzx    $acc09,$Tbl2,$acc09
1053         xor     $t1,$t1,$acc01
1054         lwzx    $acc10,$Tbl2,$acc10
1055         xor     $t2,$t2,$acc02
1056         lwzx    $acc11,$Tbl2,$acc11
1057         xor     $t3,$t3,$acc03
1058         lwzx    $acc12,$Tbl3,$acc12
1059         xor     $t0,$t0,$acc04
1060         lwzx    $acc13,$Tbl3,$acc13
1061         xor     $t1,$t1,$acc05
1062         lwzx    $acc14,$Tbl3,$acc14
1063         xor     $t2,$t2,$acc06
1064         lwzx    $acc15,$Tbl3,$acc15
1065         xor     $t3,$t3,$acc07
1066         xor     $t0,$t0,$acc08
1067         xor     $t1,$t1,$acc09
1068         xor     $t2,$t2,$acc10
1069         xor     $t3,$t3,$acc11
1070         xor     $s0,$t0,$acc12
1071         xor     $s1,$t1,$acc13
1072         xor     $s2,$t2,$acc14
1073         xor     $s3,$t3,$acc15
1074         addi    $key,$key,16
1075         bdnz-   Ldec_loop
1076
1077         addi    $Tbl2,$Tbl0,2048
1078         nop
1079         lwz     $t0,0($key)
1080         rlwinm  $acc00,$s0,`32-24`,24,31
1081         lwz     $t1,4($key)
1082         rlwinm  $acc01,$s1,`32-24`,24,31
1083         lwz     $t2,8($key)
1084         rlwinm  $acc02,$s2,`32-24`,24,31
1085         lwz     $t3,12($key)
1086         rlwinm  $acc03,$s3,`32-24`,24,31
1087         lwz     $acc08,`2048+0`($Tbl0)  ! prefetch Td4
1088         rlwinm  $acc04,$s3,`32-16`,24,31
1089         lwz     $acc09,`2048+32`($Tbl0)
1090         rlwinm  $acc05,$s0,`32-16`,24,31
1091         lwz     $acc10,`2048+64`($Tbl0)
1092         lbzx    $acc00,$Tbl2,$acc00
1093         lwz     $acc11,`2048+96`($Tbl0)
1094         lbzx    $acc01,$Tbl2,$acc01
1095         lwz     $acc12,`2048+128`($Tbl0)
1096         rlwinm  $acc06,$s1,`32-16`,24,31
1097         lwz     $acc13,`2048+160`($Tbl0)
1098         rlwinm  $acc07,$s2,`32-16`,24,31
1099         lwz     $acc14,`2048+192`($Tbl0)
1100         rlwinm  $acc08,$s2,`32-8`,24,31
1101         lwz     $acc15,`2048+224`($Tbl0)
1102         rlwinm  $acc09,$s3,`32-8`,24,31
1103         lbzx    $acc02,$Tbl2,$acc02
1104         rlwinm  $acc10,$s0,`32-8`,24,31
1105         lbzx    $acc03,$Tbl2,$acc03
1106         rlwinm  $acc11,$s1,`32-8`,24,31
1107         lbzx    $acc04,$Tbl2,$acc04
1108         rlwinm  $acc12,$s1,`0`,24,31
1109         lbzx    $acc05,$Tbl2,$acc05
1110         rlwinm  $acc13,$s2,`0`,24,31
1111         lbzx    $acc06,$Tbl2,$acc06
1112         rlwinm  $acc14,$s3,`0`,24,31
1113         lbzx    $acc07,$Tbl2,$acc07
1114         rlwinm  $acc15,$s0,`0`,24,31
1115         lbzx    $acc08,$Tbl2,$acc08
1116         rlwinm  $s0,$acc00,24,0,7
1117         lbzx    $acc09,$Tbl2,$acc09
1118         rlwinm  $s1,$acc01,24,0,7
1119         lbzx    $acc10,$Tbl2,$acc10
1120         rlwinm  $s2,$acc02,24,0,7
1121         lbzx    $acc11,$Tbl2,$acc11
1122         rlwinm  $s3,$acc03,24,0,7
1123         lbzx    $acc12,$Tbl2,$acc12
1124         rlwimi  $s0,$acc04,16,8,15
1125         lbzx    $acc13,$Tbl2,$acc13
1126         rlwimi  $s1,$acc05,16,8,15
1127         lbzx    $acc14,$Tbl2,$acc14
1128         rlwimi  $s2,$acc06,16,8,15
1129         lbzx    $acc15,$Tbl2,$acc15
1130         rlwimi  $s3,$acc07,16,8,15
1131         rlwimi  $s0,$acc08,8,16,23
1132         rlwimi  $s1,$acc09,8,16,23
1133         rlwimi  $s2,$acc10,8,16,23
1134         rlwimi  $s3,$acc11,8,16,23
1135         or      $s0,$s0,$acc12
1136         or      $s1,$s1,$acc13
1137         or      $s2,$s2,$acc14
1138         or      $s3,$s3,$acc15
1139         xor     $s0,$s0,$t0
1140         xor     $s1,$s1,$t1
1141         xor     $s2,$s2,$t2
1142         xor     $s3,$s3,$t3
1143         blr
1144         .long   0
1145         .byte   0,12,0x14,0,0,0,0,0
1146
1147 .align  4
1148 Lppc_AES_decrypt_compact:
1149         lwz     $acc00,240($key)
1150         addi    $Tbl1,$Tbl0,2048
1151         lwz     $t0,0($key)
1152         lis     $mask80,0x8080
1153         lwz     $t1,4($key)
1154         lis     $mask1b,0x1b1b
1155         lwz     $t2,8($key)
1156         ori     $mask80,$mask80,0x8080
1157         lwz     $t3,12($key)
1158         ori     $mask1b,$mask1b,0x1b1b
1159         addi    $key,$key,16
1160 ___
1161 $code.=<<___ if ($SIZE_T==8);
1162         insrdi  $mask80,$mask80,32,0
1163         insrdi  $mask1b,$mask1b,32,0
1164 ___
1165 $code.=<<___;
1166         mtctr   $acc00
1167 .align  4
1168 Ldec_compact_loop:
1169         xor     $s0,$s0,$t0
1170         xor     $s1,$s1,$t1
1171         rlwinm  $acc00,$s0,`32-24`,24,31
1172         xor     $s2,$s2,$t2
1173         rlwinm  $acc01,$s1,`32-24`,24,31
1174         xor     $s3,$s3,$t3
1175         rlwinm  $acc02,$s2,`32-24`,24,31
1176         rlwinm  $acc03,$s3,`32-24`,24,31
1177         rlwinm  $acc04,$s3,`32-16`,24,31
1178         rlwinm  $acc05,$s0,`32-16`,24,31
1179         rlwinm  $acc06,$s1,`32-16`,24,31
1180         rlwinm  $acc07,$s2,`32-16`,24,31
1181         lbzx    $acc00,$Tbl1,$acc00
1182         rlwinm  $acc08,$s2,`32-8`,24,31
1183         lbzx    $acc01,$Tbl1,$acc01
1184         rlwinm  $acc09,$s3,`32-8`,24,31
1185         lbzx    $acc02,$Tbl1,$acc02
1186         rlwinm  $acc10,$s0,`32-8`,24,31
1187         lbzx    $acc03,$Tbl1,$acc03
1188         rlwinm  $acc11,$s1,`32-8`,24,31
1189         lbzx    $acc04,$Tbl1,$acc04
1190         rlwinm  $acc12,$s1,`0`,24,31
1191         lbzx    $acc05,$Tbl1,$acc05
1192         rlwinm  $acc13,$s2,`0`,24,31
1193         lbzx    $acc06,$Tbl1,$acc06
1194         rlwinm  $acc14,$s3,`0`,24,31
1195         lbzx    $acc07,$Tbl1,$acc07
1196         rlwinm  $acc15,$s0,`0`,24,31
1197         lbzx    $acc08,$Tbl1,$acc08
1198         rlwinm  $s0,$acc00,24,0,7
1199         lbzx    $acc09,$Tbl1,$acc09
1200         rlwinm  $s1,$acc01,24,0,7
1201         lbzx    $acc10,$Tbl1,$acc10
1202         rlwinm  $s2,$acc02,24,0,7
1203         lbzx    $acc11,$Tbl1,$acc11
1204         rlwinm  $s3,$acc03,24,0,7
1205         lbzx    $acc12,$Tbl1,$acc12
1206         rlwimi  $s0,$acc04,16,8,15
1207         lbzx    $acc13,$Tbl1,$acc13
1208         rlwimi  $s1,$acc05,16,8,15
1209         lbzx    $acc14,$Tbl1,$acc14
1210         rlwimi  $s2,$acc06,16,8,15
1211         lbzx    $acc15,$Tbl1,$acc15
1212         rlwimi  $s3,$acc07,16,8,15
1213         rlwimi  $s0,$acc08,8,16,23
1214         rlwimi  $s1,$acc09,8,16,23
1215         rlwimi  $s2,$acc10,8,16,23
1216         rlwimi  $s3,$acc11,8,16,23
1217         lwz     $t0,0($key)
1218         or      $s0,$s0,$acc12
1219         lwz     $t1,4($key)
1220         or      $s1,$s1,$acc13
1221         lwz     $t2,8($key)
1222         or      $s2,$s2,$acc14
1223         lwz     $t3,12($key)
1224         or      $s3,$s3,$acc15
1225
1226         addi    $key,$key,16
1227         bdz     Ldec_compact_done
1228 ___
1229 $code.=<<___ if ($SIZE_T==8);
1230         # vectorized permutation improves decrypt performance by 10%
1231         insrdi  $s0,$s1,32,0
1232         insrdi  $s2,$s3,32,0
1233
1234         and     $acc00,$s0,$mask80      # r1=r0&0x80808080
1235         and     $acc02,$s2,$mask80
1236         srdi    $acc04,$acc00,7         # r1>>7
1237         srdi    $acc06,$acc02,7
1238         andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
1239         andc    $acc10,$s2,$mask80
1240         sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
1241         sub     $acc02,$acc02,$acc06
1242         add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
1243         add     $acc10,$acc10,$acc10
1244         and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1245         and     $acc02,$acc02,$mask1b
1246         xor     $acc00,$acc00,$acc08    # r2
1247         xor     $acc02,$acc02,$acc10
1248
1249         and     $acc04,$acc00,$mask80   # r1=r2&0x80808080
1250         and     $acc06,$acc02,$mask80
1251         srdi    $acc08,$acc04,7         # r1>>7
1252         srdi    $acc10,$acc06,7
1253         andc    $acc12,$acc00,$mask80   # r2&0x7f7f7f7f
1254         andc    $acc14,$acc02,$mask80
1255         sub     $acc04,$acc04,$acc08    # r1-(r1>>7)
1256         sub     $acc06,$acc06,$acc10
1257         add     $acc12,$acc12,$acc12    # (r2&0x7f7f7f7f)<<1
1258         add     $acc14,$acc14,$acc14
1259         and     $acc04,$acc04,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1260         and     $acc06,$acc06,$mask1b
1261         xor     $acc04,$acc04,$acc12    # r4
1262         xor     $acc06,$acc06,$acc14
1263
1264         and     $acc08,$acc04,$mask80   # r1=r4&0x80808080
1265         and     $acc10,$acc06,$mask80
1266         srdi    $acc12,$acc08,7         # r1>>7
1267         srdi    $acc14,$acc10,7
1268         sub     $acc08,$acc08,$acc12    # r1-(r1>>7)
1269         sub     $acc10,$acc10,$acc14
1270         andc    $acc12,$acc04,$mask80   # r4&0x7f7f7f7f
1271         andc    $acc14,$acc06,$mask80
1272         add     $acc12,$acc12,$acc12    # (r4&0x7f7f7f7f)<<1
1273         add     $acc14,$acc14,$acc14
1274         and     $acc08,$acc08,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1275         and     $acc10,$acc10,$mask1b
1276         xor     $acc08,$acc08,$acc12    # r8
1277         xor     $acc10,$acc10,$acc14
1278
1279         xor     $acc00,$acc00,$s0       # r2^r0
1280         xor     $acc02,$acc02,$s2
1281         xor     $acc04,$acc04,$s0       # r4^r0
1282         xor     $acc06,$acc06,$s2
1283
1284         extrdi  $acc01,$acc00,32,0
1285         extrdi  $acc03,$acc02,32,0
1286         extrdi  $acc05,$acc04,32,0
1287         extrdi  $acc07,$acc06,32,0
1288         extrdi  $acc09,$acc08,32,0
1289         extrdi  $acc11,$acc10,32,0
1290 ___
1291 $code.=<<___ if ($SIZE_T==4);
1292         and     $acc00,$s0,$mask80      # r1=r0&0x80808080
1293         and     $acc01,$s1,$mask80
1294         and     $acc02,$s2,$mask80
1295         and     $acc03,$s3,$mask80
1296         srwi    $acc04,$acc00,7         # r1>>7
1297         andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
1298         srwi    $acc05,$acc01,7
1299         andc    $acc09,$s1,$mask80
1300         srwi    $acc06,$acc02,7
1301         andc    $acc10,$s2,$mask80
1302         srwi    $acc07,$acc03,7
1303         andc    $acc11,$s3,$mask80
1304         sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
1305         sub     $acc01,$acc01,$acc05
1306         sub     $acc02,$acc02,$acc06
1307         sub     $acc03,$acc03,$acc07
1308         add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
1309         add     $acc09,$acc09,$acc09
1310         add     $acc10,$acc10,$acc10
1311         add     $acc11,$acc11,$acc11
1312         and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1313         and     $acc01,$acc01,$mask1b
1314         and     $acc02,$acc02,$mask1b
1315         and     $acc03,$acc03,$mask1b
1316         xor     $acc00,$acc00,$acc08    # r2
1317         xor     $acc01,$acc01,$acc09
1318         xor     $acc02,$acc02,$acc10
1319         xor     $acc03,$acc03,$acc11
1320
1321         and     $acc04,$acc00,$mask80   # r1=r2&0x80808080
1322         and     $acc05,$acc01,$mask80
1323         and     $acc06,$acc02,$mask80
1324         and     $acc07,$acc03,$mask80
1325         srwi    $acc08,$acc04,7         # r1>>7
1326         andc    $acc12,$acc00,$mask80   # r2&0x7f7f7f7f
1327         srwi    $acc09,$acc05,7
1328         andc    $acc13,$acc01,$mask80
1329         srwi    $acc10,$acc06,7
1330         andc    $acc14,$acc02,$mask80
1331         srwi    $acc11,$acc07,7
1332         andc    $acc15,$acc03,$mask80
1333         sub     $acc04,$acc04,$acc08    # r1-(r1>>7)
1334         sub     $acc05,$acc05,$acc09
1335         sub     $acc06,$acc06,$acc10
1336         sub     $acc07,$acc07,$acc11
1337         add     $acc12,$acc12,$acc12    # (r2&0x7f7f7f7f)<<1
1338         add     $acc13,$acc13,$acc13
1339         add     $acc14,$acc14,$acc14
1340         add     $acc15,$acc15,$acc15
1341         and     $acc04,$acc04,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1342         and     $acc05,$acc05,$mask1b
1343         and     $acc06,$acc06,$mask1b
1344         and     $acc07,$acc07,$mask1b
1345         xor     $acc04,$acc04,$acc12    # r4
1346         xor     $acc05,$acc05,$acc13
1347         xor     $acc06,$acc06,$acc14
1348         xor     $acc07,$acc07,$acc15
1349
1350         and     $acc08,$acc04,$mask80   # r1=r4&0x80808080
1351         and     $acc09,$acc05,$mask80
1352         srwi    $acc12,$acc08,7         # r1>>7
1353         and     $acc10,$acc06,$mask80
1354         srwi    $acc13,$acc09,7
1355         and     $acc11,$acc07,$mask80
1356         srwi    $acc14,$acc10,7
1357         sub     $acc08,$acc08,$acc12    # r1-(r1>>7)
1358         srwi    $acc15,$acc11,7
1359         sub     $acc09,$acc09,$acc13
1360         sub     $acc10,$acc10,$acc14
1361         sub     $acc11,$acc11,$acc15
1362         andc    $acc12,$acc04,$mask80   # r4&0x7f7f7f7f
1363         andc    $acc13,$acc05,$mask80
1364         andc    $acc14,$acc06,$mask80
1365         andc    $acc15,$acc07,$mask80
1366         add     $acc12,$acc12,$acc12    # (r4&0x7f7f7f7f)<<1
1367         add     $acc13,$acc13,$acc13
1368         add     $acc14,$acc14,$acc14
1369         add     $acc15,$acc15,$acc15
1370         and     $acc08,$acc08,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1371         and     $acc09,$acc09,$mask1b
1372         and     $acc10,$acc10,$mask1b
1373         and     $acc11,$acc11,$mask1b
1374         xor     $acc08,$acc08,$acc12    # r8
1375         xor     $acc09,$acc09,$acc13
1376         xor     $acc10,$acc10,$acc14
1377         xor     $acc11,$acc11,$acc15
1378
1379         xor     $acc00,$acc00,$s0       # r2^r0
1380         xor     $acc01,$acc01,$s1
1381         xor     $acc02,$acc02,$s2
1382         xor     $acc03,$acc03,$s3
1383         xor     $acc04,$acc04,$s0       # r4^r0
1384         xor     $acc05,$acc05,$s1
1385         xor     $acc06,$acc06,$s2
1386         xor     $acc07,$acc07,$s3
1387 ___
1388 $code.=<<___;
1389         rotrwi  $s0,$s0,8               # = ROTATE(r0,8)
1390         rotrwi  $s1,$s1,8
1391         xor     $s0,$s0,$acc00          # ^= r2^r0
1392         rotrwi  $s2,$s2,8
1393         xor     $s1,$s1,$acc01
1394         rotrwi  $s3,$s3,8
1395         xor     $s2,$s2,$acc02
1396         xor     $s3,$s3,$acc03
1397         xor     $acc00,$acc00,$acc08
1398         xor     $acc01,$acc01,$acc09
1399         xor     $acc02,$acc02,$acc10
1400         xor     $acc03,$acc03,$acc11
1401         xor     $s0,$s0,$acc04          # ^= r4^r0
1402         rotrwi  $acc00,$acc00,24
1403         xor     $s1,$s1,$acc05
1404         rotrwi  $acc01,$acc01,24
1405         xor     $s2,$s2,$acc06
1406         rotrwi  $acc02,$acc02,24
1407         xor     $s3,$s3,$acc07
1408         rotrwi  $acc03,$acc03,24
1409         xor     $acc04,$acc04,$acc08
1410         xor     $acc05,$acc05,$acc09
1411         xor     $acc06,$acc06,$acc10
1412         xor     $acc07,$acc07,$acc11
1413         xor     $s0,$s0,$acc08          # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
1414         rotrwi  $acc04,$acc04,16
1415         xor     $s1,$s1,$acc09
1416         rotrwi  $acc05,$acc05,16
1417         xor     $s2,$s2,$acc10
1418         rotrwi  $acc06,$acc06,16
1419         xor     $s3,$s3,$acc11
1420         rotrwi  $acc07,$acc07,16
1421         xor     $s0,$s0,$acc00          # ^= ROTATE(r8^r2^r0,24)
1422         rotrwi  $acc08,$acc08,8
1423         xor     $s1,$s1,$acc01
1424         rotrwi  $acc09,$acc09,8
1425         xor     $s2,$s2,$acc02
1426         rotrwi  $acc10,$acc10,8
1427         xor     $s3,$s3,$acc03
1428         rotrwi  $acc11,$acc11,8
1429         xor     $s0,$s0,$acc04          # ^= ROTATE(r8^r4^r0,16)
1430         xor     $s1,$s1,$acc05
1431         xor     $s2,$s2,$acc06
1432         xor     $s3,$s3,$acc07
1433         xor     $s0,$s0,$acc08          # ^= ROTATE(r8,8)       
1434         xor     $s1,$s1,$acc09  
1435         xor     $s2,$s2,$acc10  
1436         xor     $s3,$s3,$acc11  
1437
1438         b       Ldec_compact_loop
1439 .align  4
1440 Ldec_compact_done:
1441         xor     $s0,$s0,$t0
1442         xor     $s1,$s1,$t1
1443         xor     $s2,$s2,$t2
1444         xor     $s3,$s3,$t3
1445         blr
1446         .long   0
1447         .byte   0,12,0x14,0,0,0,0,0
1448 .size   .AES_decrypt,.-.AES_decrypt
1449
1450 .asciz  "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
1451 .align  7
1452 ___
1453
1454 $code =~ s/\`([^\`]*)\`/eval $1/gem;
1455 print $code;
1456 close STDOUT;