PPC assembly pack: relax 64-bit requirement for little-endian support.
[openssl.git] / crypto / aes / asm / aes-ppc.pl
1 #!/usr/bin/env perl
2
3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9
10 # Needs more work: key setup, CBC routine...
11 #
12 # ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
13 # 128-bit key, which is ~40% better than 64-bit code generated by gcc
14 # 4.0. But these are not the ones currently used! Their "compact"
15 # counterparts are, for security reason. ppc_AES_encrypt_compact runs
16 # at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
17 # at 1/3 of ppc_AES_decrypt.
18
19 # February 2010
20 #
21 # Rescheduling instructions to favour Power6 pipeline gave 10%
22 # performance improvement on the platfrom in question (and marginal
23 # improvement even on others). It should be noted that Power6 fails
24 # to process byte in 18 cycles, only in 23, because it fails to issue
25 # 4 load instructions in two cycles, only in 3. As result non-compact
26 # block subroutines are 25% slower than one would expect. Compact
27 # functions scale better, because they have pure computational part,
28 # which scales perfectly with clock frequency. To be specific
29 # ppc_AES_encrypt_compact operates at 42 cycles per byte, while
30 # ppc_AES_decrypt_compact - at 55 (in 64-bit build).
31
32 $flavour = shift;
33
34 if ($flavour =~ /64/) {
35         $SIZE_T =8;
36         $LRSAVE =2*$SIZE_T;
37         $STU    ="stdu";
38         $POP    ="ld";
39         $PUSH   ="std";
40 } elsif ($flavour =~ /32/) {
41         $SIZE_T =4;
42         $LRSAVE =$SIZE_T;
43         $STU    ="stwu";
44         $POP    ="lwz";
45         $PUSH   ="stw";
46 } else { die "nonsense $flavour"; }
47
48 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
49
50 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
51 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
52 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
53 die "can't locate ppc-xlate.pl";
54
55 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
56
57 $FRAME=32*$SIZE_T;
58
59 sub _data_word()
60 { my $i;
61     while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
62 }
63
64 $sp="r1";
65 $toc="r2";
66 $inp="r3";
67 $out="r4";
68 $key="r5";
69
70 $Tbl0="r3";
71 $Tbl1="r6";
72 $Tbl2="r7";
73 $Tbl3=$out;     # stay away from "r2"; $out is offloaded to stack
74
75 $s0="r8";
76 $s1="r9";
77 $s2="r10";
78 $s3="r11";
79
80 $t0="r12";
81 $t1="r0";       # stay away from "r13";
82 $t2="r14";
83 $t3="r15";
84
85 $acc00="r16";
86 $acc01="r17";
87 $acc02="r18";
88 $acc03="r19";
89
90 $acc04="r20";
91 $acc05="r21";
92 $acc06="r22";
93 $acc07="r23";
94
95 $acc08="r24";
96 $acc09="r25";
97 $acc10="r26";
98 $acc11="r27";
99
100 $acc12="r28";
101 $acc13="r29";
102 $acc14="r30";
103 $acc15="r31";
104
105 $mask80=$Tbl2;
106 $mask1b=$Tbl3;
107
108 $code.=<<___;
109 .machine        "any"
110 .text
111
112 .align  7
113 LAES_Te:
114         mflr    r0
115         bcl     20,31,\$+4
116         mflr    $Tbl0   ;    vvvvv "distance" between . and 1st data entry
117         addi    $Tbl0,$Tbl0,`128-8`
118         mtlr    r0
119         blr
120         .long   0
121         .byte   0,12,0x14,0,0,0,0,0
122         .space  `64-9*4`
123 LAES_Td:
124         mflr    r0
125         bcl     20,31,\$+4
126         mflr    $Tbl0   ;    vvvvvvvv "distance" between . and 1st data entry
127         addi    $Tbl0,$Tbl0,`128-64-8+2048+256`
128         mtlr    r0
129         blr
130         .long   0
131         .byte   0,12,0x14,0,0,0,0,0
132         .space  `128-64-9*4`
133 ___
134 &_data_word(
135         0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
136         0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
137         0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
138         0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
139         0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
140         0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
141         0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
142         0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
143         0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
144         0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
145         0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
146         0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
147         0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
148         0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
149         0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
150         0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
151         0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
152         0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
153         0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
154         0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
155         0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
156         0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
157         0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
158         0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
159         0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
160         0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
161         0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
162         0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
163         0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
164         0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
165         0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
166         0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
167         0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
168         0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
169         0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
170         0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
171         0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
172         0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
173         0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
174         0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
175         0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
176         0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
177         0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
178         0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
179         0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
180         0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
181         0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
182         0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
183         0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
184         0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
185         0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
186         0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
187         0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
188         0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
189         0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
190         0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
191         0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
192         0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
193         0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
194         0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
195         0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
196         0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
197         0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
198         0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
199 $code.=<<___;
200 .byte   0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
201 .byte   0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
202 .byte   0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
203 .byte   0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
204 .byte   0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
205 .byte   0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
206 .byte   0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
207 .byte   0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
208 .byte   0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
209 .byte   0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
210 .byte   0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
211 .byte   0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
212 .byte   0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
213 .byte   0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
214 .byte   0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
215 .byte   0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
216 .byte   0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
217 .byte   0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
218 .byte   0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
219 .byte   0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
220 .byte   0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
221 .byte   0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
222 .byte   0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
223 .byte   0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
224 .byte   0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
225 .byte   0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
226 .byte   0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
227 .byte   0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
228 .byte   0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
229 .byte   0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
230 .byte   0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
231 .byte   0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
232 ___
233 &_data_word(
234         0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
235         0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
236         0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
237         0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
238         0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
239         0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
240         0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
241         0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
242         0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
243         0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
244         0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
245         0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
246         0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
247         0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
248         0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
249         0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
250         0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
251         0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
252         0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
253         0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
254         0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
255         0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
256         0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
257         0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
258         0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
259         0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
260         0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
261         0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
262         0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
263         0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
264         0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
265         0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
266         0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
267         0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
268         0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
269         0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
270         0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
271         0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
272         0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
273         0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
274         0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
275         0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
276         0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
277         0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
278         0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
279         0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
280         0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
281         0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
282         0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
283         0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
284         0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
285         0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
286         0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
287         0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
288         0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
289         0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
290         0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
291         0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
292         0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
293         0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
294         0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
295         0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
296         0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
297         0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
298 $code.=<<___;
299 .byte   0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
300 .byte   0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
301 .byte   0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
302 .byte   0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
303 .byte   0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
304 .byte   0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
305 .byte   0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
306 .byte   0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
307 .byte   0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
308 .byte   0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
309 .byte   0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
310 .byte   0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
311 .byte   0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
312 .byte   0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
313 .byte   0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
314 .byte   0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
315 .byte   0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
316 .byte   0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
317 .byte   0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
318 .byte   0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
319 .byte   0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
320 .byte   0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
321 .byte   0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
322 .byte   0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
323 .byte   0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
324 .byte   0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
325 .byte   0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
326 .byte   0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
327 .byte   0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
328 .byte   0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
329 .byte   0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
330 .byte   0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
331
332
333 .globl  .AES_encrypt
334 .align  7
335 .AES_encrypt:
336         $STU    $sp,-$FRAME($sp)
337         mflr    r0
338
339         $PUSH   $out,`$FRAME-$SIZE_T*19`($sp)
340         $PUSH   r14,`$FRAME-$SIZE_T*18`($sp)
341         $PUSH   r15,`$FRAME-$SIZE_T*17`($sp)
342         $PUSH   r16,`$FRAME-$SIZE_T*16`($sp)
343         $PUSH   r17,`$FRAME-$SIZE_T*15`($sp)
344         $PUSH   r18,`$FRAME-$SIZE_T*14`($sp)
345         $PUSH   r19,`$FRAME-$SIZE_T*13`($sp)
346         $PUSH   r20,`$FRAME-$SIZE_T*12`($sp)
347         $PUSH   r21,`$FRAME-$SIZE_T*11`($sp)
348         $PUSH   r22,`$FRAME-$SIZE_T*10`($sp)
349         $PUSH   r23,`$FRAME-$SIZE_T*9`($sp)
350         $PUSH   r24,`$FRAME-$SIZE_T*8`($sp)
351         $PUSH   r25,`$FRAME-$SIZE_T*7`($sp)
352         $PUSH   r26,`$FRAME-$SIZE_T*6`($sp)
353         $PUSH   r27,`$FRAME-$SIZE_T*5`($sp)
354         $PUSH   r28,`$FRAME-$SIZE_T*4`($sp)
355         $PUSH   r29,`$FRAME-$SIZE_T*3`($sp)
356         $PUSH   r30,`$FRAME-$SIZE_T*2`($sp)
357         $PUSH   r31,`$FRAME-$SIZE_T*1`($sp)
358         $PUSH   r0,`$FRAME+$LRSAVE`($sp)
359
360         andi.   $t0,$inp,3
361         andi.   $t1,$out,3
362         or.     $t0,$t0,$t1
363         bne     Lenc_unaligned
364
365 Lenc_unaligned_ok:
366 ___
367 $code.=<<___ if (!$LITTLE_ENDIAN);
368         lwz     $s0,0($inp)
369         lwz     $s1,4($inp)
370         lwz     $s2,8($inp)
371         lwz     $s3,12($inp)
372 ___
373 $code.=<<___ if ($LITTLE_ENDIAN);
374         lwz     $t0,0($inp)
375         lwz     $t1,4($inp)
376         lwz     $t2,8($inp)
377         lwz     $t3,12($inp)
378         rotlwi  $s0,$t0,8
379         rotlwi  $s1,$t1,8
380         rotlwi  $s2,$t2,8
381         rotlwi  $s3,$t3,8
382         rlwimi  $s0,$t0,24,0,7
383         rlwimi  $s1,$t1,24,0,7
384         rlwimi  $s2,$t2,24,0,7
385         rlwimi  $s3,$t3,24,0,7
386         rlwimi  $s0,$t0,24,16,23
387         rlwimi  $s1,$t1,24,16,23
388         rlwimi  $s2,$t2,24,16,23
389         rlwimi  $s3,$t3,24,16,23
390 ___
391 $code.=<<___;
392         bl      LAES_Te
393         bl      Lppc_AES_encrypt_compact
394         $POP    $out,`$FRAME-$SIZE_T*19`($sp)
395 ___
396 $code.=<<___ if ($LITTLE_ENDIAN);
397         rotlwi  $t0,$s0,8
398         rotlwi  $t1,$s1,8
399         rotlwi  $t2,$s2,8
400         rotlwi  $t3,$s3,8
401         rlwimi  $t0,$s0,24,0,7
402         rlwimi  $t1,$s1,24,0,7
403         rlwimi  $t2,$s2,24,0,7
404         rlwimi  $t3,$s3,24,0,7
405         rlwimi  $t0,$s0,24,16,23
406         rlwimi  $t1,$s1,24,16,23
407         rlwimi  $t2,$s2,24,16,23
408         rlwimi  $t3,$s3,24,16,23
409         stw     $t0,0($out)
410         stw     $t1,4($out)
411         stw     $t2,8($out)
412         stw     $t3,12($out)
413 ___
414 $code.=<<___ if (!$LITTLE_ENDIAN);
415         stw     $s0,0($out)
416         stw     $s1,4($out)
417         stw     $s2,8($out)
418         stw     $s3,12($out)
419 ___
420 $code.=<<___;
421         b       Lenc_done
422
423 Lenc_unaligned:
424         subfic  $t0,$inp,4096
425         subfic  $t1,$out,4096
426         andi.   $t0,$t0,4096-16
427         beq     Lenc_xpage
428         andi.   $t1,$t1,4096-16
429         bne     Lenc_unaligned_ok
430
431 Lenc_xpage:
432         lbz     $acc00,0($inp)
433         lbz     $acc01,1($inp)
434         lbz     $acc02,2($inp)
435         lbz     $s0,3($inp)
436         lbz     $acc04,4($inp)
437         lbz     $acc05,5($inp)
438         lbz     $acc06,6($inp)
439         lbz     $s1,7($inp)
440         lbz     $acc08,8($inp)
441         lbz     $acc09,9($inp)
442         lbz     $acc10,10($inp)
443         insrwi  $s0,$acc00,8,0
444         lbz     $s2,11($inp)
445         insrwi  $s1,$acc04,8,0
446         lbz     $acc12,12($inp)
447         insrwi  $s0,$acc01,8,8
448         lbz     $acc13,13($inp)
449         insrwi  $s1,$acc05,8,8
450         lbz     $acc14,14($inp)
451         insrwi  $s0,$acc02,8,16
452         lbz     $s3,15($inp)
453         insrwi  $s1,$acc06,8,16
454         insrwi  $s2,$acc08,8,0
455         insrwi  $s3,$acc12,8,0
456         insrwi  $s2,$acc09,8,8
457         insrwi  $s3,$acc13,8,8
458         insrwi  $s2,$acc10,8,16
459         insrwi  $s3,$acc14,8,16
460
461         bl      LAES_Te
462         bl      Lppc_AES_encrypt_compact
463         $POP    $out,`$FRAME-$SIZE_T*19`($sp)
464
465         extrwi  $acc00,$s0,8,0
466         extrwi  $acc01,$s0,8,8
467         stb     $acc00,0($out)
468         extrwi  $acc02,$s0,8,16
469         stb     $acc01,1($out)
470         stb     $acc02,2($out)
471         extrwi  $acc04,$s1,8,0
472         stb     $s0,3($out)
473         extrwi  $acc05,$s1,8,8
474         stb     $acc04,4($out)
475         extrwi  $acc06,$s1,8,16
476         stb     $acc05,5($out)
477         stb     $acc06,6($out)
478         extrwi  $acc08,$s2,8,0
479         stb     $s1,7($out)
480         extrwi  $acc09,$s2,8,8
481         stb     $acc08,8($out)
482         extrwi  $acc10,$s2,8,16
483         stb     $acc09,9($out)
484         stb     $acc10,10($out)
485         extrwi  $acc12,$s3,8,0
486         stb     $s2,11($out)
487         extrwi  $acc13,$s3,8,8
488         stb     $acc12,12($out)
489         extrwi  $acc14,$s3,8,16
490         stb     $acc13,13($out)
491         stb     $acc14,14($out)
492         stb     $s3,15($out)
493
494 Lenc_done:
495         $POP    r0,`$FRAME+$LRSAVE`($sp)
496         $POP    r14,`$FRAME-$SIZE_T*18`($sp)
497         $POP    r15,`$FRAME-$SIZE_T*17`($sp)
498         $POP    r16,`$FRAME-$SIZE_T*16`($sp)
499         $POP    r17,`$FRAME-$SIZE_T*15`($sp)
500         $POP    r18,`$FRAME-$SIZE_T*14`($sp)
501         $POP    r19,`$FRAME-$SIZE_T*13`($sp)
502         $POP    r20,`$FRAME-$SIZE_T*12`($sp)
503         $POP    r21,`$FRAME-$SIZE_T*11`($sp)
504         $POP    r22,`$FRAME-$SIZE_T*10`($sp)
505         $POP    r23,`$FRAME-$SIZE_T*9`($sp)
506         $POP    r24,`$FRAME-$SIZE_T*8`($sp)
507         $POP    r25,`$FRAME-$SIZE_T*7`($sp)
508         $POP    r26,`$FRAME-$SIZE_T*6`($sp)
509         $POP    r27,`$FRAME-$SIZE_T*5`($sp)
510         $POP    r28,`$FRAME-$SIZE_T*4`($sp)
511         $POP    r29,`$FRAME-$SIZE_T*3`($sp)
512         $POP    r30,`$FRAME-$SIZE_T*2`($sp)
513         $POP    r31,`$FRAME-$SIZE_T*1`($sp)
514         mtlr    r0
515         addi    $sp,$sp,$FRAME
516         blr
517         .long   0
518         .byte   0,12,4,1,0x80,18,3,0
519         .long   0
520
521 .align  5
522 Lppc_AES_encrypt:
523         lwz     $acc00,240($key)
524         addi    $Tbl1,$Tbl0,3
525         lwz     $t0,0($key)
526         addi    $Tbl2,$Tbl0,2
527         lwz     $t1,4($key)
528         addi    $Tbl3,$Tbl0,1
529         lwz     $t2,8($key)
530         addi    $acc00,$acc00,-1
531         lwz     $t3,12($key)
532         addi    $key,$key,16
533         xor     $s0,$s0,$t0
534         xor     $s1,$s1,$t1
535         xor     $s2,$s2,$t2
536         xor     $s3,$s3,$t3
537         mtctr   $acc00
538 .align  4
539 Lenc_loop:
540         rlwinm  $acc00,$s0,`32-24+3`,21,28
541         rlwinm  $acc01,$s1,`32-24+3`,21,28
542         rlwinm  $acc02,$s2,`32-24+3`,21,28
543         rlwinm  $acc03,$s3,`32-24+3`,21,28
544         lwz     $t0,0($key)
545         rlwinm  $acc04,$s1,`32-16+3`,21,28
546         lwz     $t1,4($key)
547         rlwinm  $acc05,$s2,`32-16+3`,21,28
548         lwz     $t2,8($key)
549         rlwinm  $acc06,$s3,`32-16+3`,21,28
550         lwz     $t3,12($key)
551         rlwinm  $acc07,$s0,`32-16+3`,21,28
552         lwzx    $acc00,$Tbl0,$acc00
553         rlwinm  $acc08,$s2,`32-8+3`,21,28
554         lwzx    $acc01,$Tbl0,$acc01
555         rlwinm  $acc09,$s3,`32-8+3`,21,28
556         lwzx    $acc02,$Tbl0,$acc02
557         rlwinm  $acc10,$s0,`32-8+3`,21,28
558         lwzx    $acc03,$Tbl0,$acc03
559         rlwinm  $acc11,$s1,`32-8+3`,21,28
560         lwzx    $acc04,$Tbl1,$acc04
561         rlwinm  $acc12,$s3,`0+3`,21,28
562         lwzx    $acc05,$Tbl1,$acc05
563         rlwinm  $acc13,$s0,`0+3`,21,28
564         lwzx    $acc06,$Tbl1,$acc06
565         rlwinm  $acc14,$s1,`0+3`,21,28
566         lwzx    $acc07,$Tbl1,$acc07
567         rlwinm  $acc15,$s2,`0+3`,21,28
568         lwzx    $acc08,$Tbl2,$acc08
569         xor     $t0,$t0,$acc00
570         lwzx    $acc09,$Tbl2,$acc09
571         xor     $t1,$t1,$acc01
572         lwzx    $acc10,$Tbl2,$acc10
573         xor     $t2,$t2,$acc02
574         lwzx    $acc11,$Tbl2,$acc11
575         xor     $t3,$t3,$acc03
576         lwzx    $acc12,$Tbl3,$acc12
577         xor     $t0,$t0,$acc04
578         lwzx    $acc13,$Tbl3,$acc13
579         xor     $t1,$t1,$acc05
580         lwzx    $acc14,$Tbl3,$acc14
581         xor     $t2,$t2,$acc06
582         lwzx    $acc15,$Tbl3,$acc15
583         xor     $t3,$t3,$acc07
584         xor     $t0,$t0,$acc08
585         xor     $t1,$t1,$acc09
586         xor     $t2,$t2,$acc10
587         xor     $t3,$t3,$acc11
588         xor     $s0,$t0,$acc12
589         xor     $s1,$t1,$acc13
590         xor     $s2,$t2,$acc14
591         xor     $s3,$t3,$acc15
592         addi    $key,$key,16
593         bdnz-   Lenc_loop
594
595         addi    $Tbl2,$Tbl0,2048
596         nop
597         lwz     $t0,0($key)
598         rlwinm  $acc00,$s0,`32-24`,24,31
599         lwz     $t1,4($key)
600         rlwinm  $acc01,$s1,`32-24`,24,31
601         lwz     $t2,8($key)
602         rlwinm  $acc02,$s2,`32-24`,24,31
603         lwz     $t3,12($key)
604         rlwinm  $acc03,$s3,`32-24`,24,31
605         lwz     $acc08,`2048+0`($Tbl0)  ! prefetch Te4
606         rlwinm  $acc04,$s1,`32-16`,24,31
607         lwz     $acc09,`2048+32`($Tbl0)
608         rlwinm  $acc05,$s2,`32-16`,24,31
609         lwz     $acc10,`2048+64`($Tbl0)
610         rlwinm  $acc06,$s3,`32-16`,24,31
611         lwz     $acc11,`2048+96`($Tbl0)
612         rlwinm  $acc07,$s0,`32-16`,24,31
613         lwz     $acc12,`2048+128`($Tbl0)
614         rlwinm  $acc08,$s2,`32-8`,24,31
615         lwz     $acc13,`2048+160`($Tbl0)
616         rlwinm  $acc09,$s3,`32-8`,24,31
617         lwz     $acc14,`2048+192`($Tbl0)
618         rlwinm  $acc10,$s0,`32-8`,24,31
619         lwz     $acc15,`2048+224`($Tbl0)
620         rlwinm  $acc11,$s1,`32-8`,24,31
621         lbzx    $acc00,$Tbl2,$acc00
622         rlwinm  $acc12,$s3,`0`,24,31
623         lbzx    $acc01,$Tbl2,$acc01
624         rlwinm  $acc13,$s0,`0`,24,31
625         lbzx    $acc02,$Tbl2,$acc02
626         rlwinm  $acc14,$s1,`0`,24,31
627         lbzx    $acc03,$Tbl2,$acc03
628         rlwinm  $acc15,$s2,`0`,24,31
629         lbzx    $acc04,$Tbl2,$acc04
630         rlwinm  $s0,$acc00,24,0,7
631         lbzx    $acc05,$Tbl2,$acc05
632         rlwinm  $s1,$acc01,24,0,7
633         lbzx    $acc06,$Tbl2,$acc06
634         rlwinm  $s2,$acc02,24,0,7
635         lbzx    $acc07,$Tbl2,$acc07
636         rlwinm  $s3,$acc03,24,0,7
637         lbzx    $acc08,$Tbl2,$acc08
638         rlwimi  $s0,$acc04,16,8,15
639         lbzx    $acc09,$Tbl2,$acc09
640         rlwimi  $s1,$acc05,16,8,15
641         lbzx    $acc10,$Tbl2,$acc10
642         rlwimi  $s2,$acc06,16,8,15
643         lbzx    $acc11,$Tbl2,$acc11
644         rlwimi  $s3,$acc07,16,8,15
645         lbzx    $acc12,$Tbl2,$acc12
646         rlwimi  $s0,$acc08,8,16,23
647         lbzx    $acc13,$Tbl2,$acc13
648         rlwimi  $s1,$acc09,8,16,23
649         lbzx    $acc14,$Tbl2,$acc14
650         rlwimi  $s2,$acc10,8,16,23
651         lbzx    $acc15,$Tbl2,$acc15
652         rlwimi  $s3,$acc11,8,16,23
653         or      $s0,$s0,$acc12
654         or      $s1,$s1,$acc13
655         or      $s2,$s2,$acc14
656         or      $s3,$s3,$acc15
657         xor     $s0,$s0,$t0
658         xor     $s1,$s1,$t1
659         xor     $s2,$s2,$t2
660         xor     $s3,$s3,$t3
661         blr
662         .long   0
663         .byte   0,12,0x14,0,0,0,0,0
664
665 .align  4
666 Lppc_AES_encrypt_compact:
667         lwz     $acc00,240($key)
668         addi    $Tbl1,$Tbl0,2048
669         lwz     $t0,0($key)
670         lis     $mask80,0x8080
671         lwz     $t1,4($key)
672         lis     $mask1b,0x1b1b
673         lwz     $t2,8($key)
674         ori     $mask80,$mask80,0x8080
675         lwz     $t3,12($key)
676         ori     $mask1b,$mask1b,0x1b1b
677         addi    $key,$key,16
678         mtctr   $acc00
679 .align  4
680 Lenc_compact_loop:
681         xor     $s0,$s0,$t0
682         xor     $s1,$s1,$t1
683         rlwinm  $acc00,$s0,`32-24`,24,31
684         xor     $s2,$s2,$t2
685         rlwinm  $acc01,$s1,`32-24`,24,31
686         xor     $s3,$s3,$t3
687         rlwinm  $acc02,$s2,`32-24`,24,31
688         rlwinm  $acc03,$s3,`32-24`,24,31
689         rlwinm  $acc04,$s1,`32-16`,24,31
690         rlwinm  $acc05,$s2,`32-16`,24,31
691         rlwinm  $acc06,$s3,`32-16`,24,31
692         rlwinm  $acc07,$s0,`32-16`,24,31
693         lbzx    $acc00,$Tbl1,$acc00
694         rlwinm  $acc08,$s2,`32-8`,24,31
695         lbzx    $acc01,$Tbl1,$acc01
696         rlwinm  $acc09,$s3,`32-8`,24,31
697         lbzx    $acc02,$Tbl1,$acc02
698         rlwinm  $acc10,$s0,`32-8`,24,31
699         lbzx    $acc03,$Tbl1,$acc03
700         rlwinm  $acc11,$s1,`32-8`,24,31
701         lbzx    $acc04,$Tbl1,$acc04
702         rlwinm  $acc12,$s3,`0`,24,31
703         lbzx    $acc05,$Tbl1,$acc05
704         rlwinm  $acc13,$s0,`0`,24,31
705         lbzx    $acc06,$Tbl1,$acc06
706         rlwinm  $acc14,$s1,`0`,24,31
707         lbzx    $acc07,$Tbl1,$acc07
708         rlwinm  $acc15,$s2,`0`,24,31
709         lbzx    $acc08,$Tbl1,$acc08
710         rlwinm  $s0,$acc00,24,0,7
711         lbzx    $acc09,$Tbl1,$acc09
712         rlwinm  $s1,$acc01,24,0,7
713         lbzx    $acc10,$Tbl1,$acc10
714         rlwinm  $s2,$acc02,24,0,7
715         lbzx    $acc11,$Tbl1,$acc11
716         rlwinm  $s3,$acc03,24,0,7
717         lbzx    $acc12,$Tbl1,$acc12
718         rlwimi  $s0,$acc04,16,8,15
719         lbzx    $acc13,$Tbl1,$acc13
720         rlwimi  $s1,$acc05,16,8,15
721         lbzx    $acc14,$Tbl1,$acc14
722         rlwimi  $s2,$acc06,16,8,15
723         lbzx    $acc15,$Tbl1,$acc15
724         rlwimi  $s3,$acc07,16,8,15
725         rlwimi  $s0,$acc08,8,16,23
726         rlwimi  $s1,$acc09,8,16,23
727         rlwimi  $s2,$acc10,8,16,23
728         rlwimi  $s3,$acc11,8,16,23
729         lwz     $t0,0($key)
730         or      $s0,$s0,$acc12
731         lwz     $t1,4($key)
732         or      $s1,$s1,$acc13
733         lwz     $t2,8($key)
734         or      $s2,$s2,$acc14
735         lwz     $t3,12($key)
736         or      $s3,$s3,$acc15
737
738         addi    $key,$key,16
739         bdz     Lenc_compact_done
740
741         and     $acc00,$s0,$mask80      # r1=r0&0x80808080
742         and     $acc01,$s1,$mask80
743         and     $acc02,$s2,$mask80
744         and     $acc03,$s3,$mask80
745         srwi    $acc04,$acc00,7         # r1>>7
746         andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
747         srwi    $acc05,$acc01,7
748         andc    $acc09,$s1,$mask80
749         srwi    $acc06,$acc02,7
750         andc    $acc10,$s2,$mask80
751         srwi    $acc07,$acc03,7
752         andc    $acc11,$s3,$mask80
753         sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
754         sub     $acc01,$acc01,$acc05
755         sub     $acc02,$acc02,$acc06
756         sub     $acc03,$acc03,$acc07
757         add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
758         add     $acc09,$acc09,$acc09
759         add     $acc10,$acc10,$acc10
760         add     $acc11,$acc11,$acc11
761         and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
762         and     $acc01,$acc01,$mask1b
763         and     $acc02,$acc02,$mask1b
764         and     $acc03,$acc03,$mask1b
765         xor     $acc00,$acc00,$acc08    # r2
766         xor     $acc01,$acc01,$acc09
767          rotlwi $acc12,$s0,16           # ROTATE(r0,16)
768         xor     $acc02,$acc02,$acc10
769          rotlwi $acc13,$s1,16
770         xor     $acc03,$acc03,$acc11
771          rotlwi $acc14,$s2,16
772
773         xor     $s0,$s0,$acc00          # r0^r2
774         rotlwi  $acc15,$s3,16
775         xor     $s1,$s1,$acc01
776         rotrwi  $s0,$s0,24              # ROTATE(r2^r0,24)
777         xor     $s2,$s2,$acc02
778         rotrwi  $s1,$s1,24
779         xor     $s3,$s3,$acc03
780         rotrwi  $s2,$s2,24
781         xor     $s0,$s0,$acc00          # ROTATE(r2^r0,24)^r2
782         rotrwi  $s3,$s3,24
783         xor     $s1,$s1,$acc01
784         xor     $s2,$s2,$acc02
785         xor     $s3,$s3,$acc03
786         rotlwi  $acc08,$acc12,8         # ROTATE(r0,24)
787         xor     $s0,$s0,$acc12          #
788         rotlwi  $acc09,$acc13,8
789         xor     $s1,$s1,$acc13
790         rotlwi  $acc10,$acc14,8
791         xor     $s2,$s2,$acc14
792         rotlwi  $acc11,$acc15,8
793         xor     $s3,$s3,$acc15
794         xor     $s0,$s0,$acc08          #
795         xor     $s1,$s1,$acc09
796         xor     $s2,$s2,$acc10
797         xor     $s3,$s3,$acc11
798
799         b       Lenc_compact_loop
800 .align  4
801 Lenc_compact_done:
802         xor     $s0,$s0,$t0
803         xor     $s1,$s1,$t1
804         xor     $s2,$s2,$t2
805         xor     $s3,$s3,$t3
806         blr
807         .long   0
808         .byte   0,12,0x14,0,0,0,0,0
809 .size   .AES_encrypt,.-.AES_encrypt
810
811 .globl  .AES_decrypt
812 .align  7
813 .AES_decrypt:
814         $STU    $sp,-$FRAME($sp)
815         mflr    r0
816
817         $PUSH   $out,`$FRAME-$SIZE_T*19`($sp)
818         $PUSH   r14,`$FRAME-$SIZE_T*18`($sp)
819         $PUSH   r15,`$FRAME-$SIZE_T*17`($sp)
820         $PUSH   r16,`$FRAME-$SIZE_T*16`($sp)
821         $PUSH   r17,`$FRAME-$SIZE_T*15`($sp)
822         $PUSH   r18,`$FRAME-$SIZE_T*14`($sp)
823         $PUSH   r19,`$FRAME-$SIZE_T*13`($sp)
824         $PUSH   r20,`$FRAME-$SIZE_T*12`($sp)
825         $PUSH   r21,`$FRAME-$SIZE_T*11`($sp)
826         $PUSH   r22,`$FRAME-$SIZE_T*10`($sp)
827         $PUSH   r23,`$FRAME-$SIZE_T*9`($sp)
828         $PUSH   r24,`$FRAME-$SIZE_T*8`($sp)
829         $PUSH   r25,`$FRAME-$SIZE_T*7`($sp)
830         $PUSH   r26,`$FRAME-$SIZE_T*6`($sp)
831         $PUSH   r27,`$FRAME-$SIZE_T*5`($sp)
832         $PUSH   r28,`$FRAME-$SIZE_T*4`($sp)
833         $PUSH   r29,`$FRAME-$SIZE_T*3`($sp)
834         $PUSH   r30,`$FRAME-$SIZE_T*2`($sp)
835         $PUSH   r31,`$FRAME-$SIZE_T*1`($sp)
836         $PUSH   r0,`$FRAME+$LRSAVE`($sp)
837
838         andi.   $t0,$inp,3
839         andi.   $t1,$out,3
840         or.     $t0,$t0,$t1
841         bne     Ldec_unaligned
842
843 Ldec_unaligned_ok:
844 ___
845 $code.=<<___ if (!$LITTLE_ENDIAN);
846         lwz     $s0,0($inp)
847         lwz     $s1,4($inp)
848         lwz     $s2,8($inp)
849         lwz     $s3,12($inp)
850 ___
851 $code.=<<___ if ($LITTLE_ENDIAN);
852         lwz     $t0,0($inp)
853         lwz     $t1,4($inp)
854         lwz     $t2,8($inp)
855         lwz     $t3,12($inp)
856         rotlwi  $s0,$t0,8
857         rotlwi  $s1,$t1,8
858         rotlwi  $s2,$t2,8
859         rotlwi  $s3,$t3,8
860         rlwimi  $s0,$t0,24,0,7
861         rlwimi  $s1,$t1,24,0,7
862         rlwimi  $s2,$t2,24,0,7
863         rlwimi  $s3,$t3,24,0,7
864         rlwimi  $s0,$t0,24,16,23
865         rlwimi  $s1,$t1,24,16,23
866         rlwimi  $s2,$t2,24,16,23
867         rlwimi  $s3,$t3,24,16,23
868 ___
869 $code.=<<___;
870         bl      LAES_Td
871         bl      Lppc_AES_decrypt_compact
872         $POP    $out,`$FRAME-$SIZE_T*19`($sp)
873 ___
874 $code.=<<___ if ($LITTLE_ENDIAN);
875         rotlwi  $t0,$s0,8
876         rotlwi  $t1,$s1,8
877         rotlwi  $t2,$s2,8
878         rotlwi  $t3,$s3,8
879         rlwimi  $t0,$s0,24,0,7
880         rlwimi  $t1,$s1,24,0,7
881         rlwimi  $t2,$s2,24,0,7
882         rlwimi  $t3,$s3,24,0,7
883         rlwimi  $t0,$s0,24,16,23
884         rlwimi  $t1,$s1,24,16,23
885         rlwimi  $t2,$s2,24,16,23
886         rlwimi  $t3,$s3,24,16,23
887         stw     $t0,0($out)
888         stw     $t1,4($out)
889         stw     $t2,8($out)
890         stw     $t3,12($out)
891 ___
892 $code.=<<___ if (!$LITTLE_ENDIAN);
893         stw     $s0,0($out)
894         stw     $s1,4($out)
895         stw     $s2,8($out)
896         stw     $s3,12($out)
897 ___
898 $code.=<<___;
899         b       Ldec_done
900
901 Ldec_unaligned:
902         subfic  $t0,$inp,4096
903         subfic  $t1,$out,4096
904         andi.   $t0,$t0,4096-16
905         beq     Ldec_xpage
906         andi.   $t1,$t1,4096-16
907         bne     Ldec_unaligned_ok
908
909 Ldec_xpage:
910         lbz     $acc00,0($inp)
911         lbz     $acc01,1($inp)
912         lbz     $acc02,2($inp)
913         lbz     $s0,3($inp)
914         lbz     $acc04,4($inp)
915         lbz     $acc05,5($inp)
916         lbz     $acc06,6($inp)
917         lbz     $s1,7($inp)
918         lbz     $acc08,8($inp)
919         lbz     $acc09,9($inp)
920         lbz     $acc10,10($inp)
921         insrwi  $s0,$acc00,8,0
922         lbz     $s2,11($inp)
923         insrwi  $s1,$acc04,8,0
924         lbz     $acc12,12($inp)
925         insrwi  $s0,$acc01,8,8
926         lbz     $acc13,13($inp)
927         insrwi  $s1,$acc05,8,8
928         lbz     $acc14,14($inp)
929         insrwi  $s0,$acc02,8,16
930         lbz     $s3,15($inp)
931         insrwi  $s1,$acc06,8,16
932         insrwi  $s2,$acc08,8,0
933         insrwi  $s3,$acc12,8,0
934         insrwi  $s2,$acc09,8,8
935         insrwi  $s3,$acc13,8,8
936         insrwi  $s2,$acc10,8,16
937         insrwi  $s3,$acc14,8,16
938
939         bl      LAES_Td
940         bl      Lppc_AES_decrypt_compact
941         $POP    $out,`$FRAME-$SIZE_T*19`($sp)
942
943         extrwi  $acc00,$s0,8,0
944         extrwi  $acc01,$s0,8,8
945         stb     $acc00,0($out)
946         extrwi  $acc02,$s0,8,16
947         stb     $acc01,1($out)
948         stb     $acc02,2($out)
949         extrwi  $acc04,$s1,8,0
950         stb     $s0,3($out)
951         extrwi  $acc05,$s1,8,8
952         stb     $acc04,4($out)
953         extrwi  $acc06,$s1,8,16
954         stb     $acc05,5($out)
955         stb     $acc06,6($out)
956         extrwi  $acc08,$s2,8,0
957         stb     $s1,7($out)
958         extrwi  $acc09,$s2,8,8
959         stb     $acc08,8($out)
960         extrwi  $acc10,$s2,8,16
961         stb     $acc09,9($out)
962         stb     $acc10,10($out)
963         extrwi  $acc12,$s3,8,0
964         stb     $s2,11($out)
965         extrwi  $acc13,$s3,8,8
966         stb     $acc12,12($out)
967         extrwi  $acc14,$s3,8,16
968         stb     $acc13,13($out)
969         stb     $acc14,14($out)
970         stb     $s3,15($out)
971
972 Ldec_done:
973         $POP    r0,`$FRAME+$LRSAVE`($sp)
974         $POP    r14,`$FRAME-$SIZE_T*18`($sp)
975         $POP    r15,`$FRAME-$SIZE_T*17`($sp)
976         $POP    r16,`$FRAME-$SIZE_T*16`($sp)
977         $POP    r17,`$FRAME-$SIZE_T*15`($sp)
978         $POP    r18,`$FRAME-$SIZE_T*14`($sp)
979         $POP    r19,`$FRAME-$SIZE_T*13`($sp)
980         $POP    r20,`$FRAME-$SIZE_T*12`($sp)
981         $POP    r21,`$FRAME-$SIZE_T*11`($sp)
982         $POP    r22,`$FRAME-$SIZE_T*10`($sp)
983         $POP    r23,`$FRAME-$SIZE_T*9`($sp)
984         $POP    r24,`$FRAME-$SIZE_T*8`($sp)
985         $POP    r25,`$FRAME-$SIZE_T*7`($sp)
986         $POP    r26,`$FRAME-$SIZE_T*6`($sp)
987         $POP    r27,`$FRAME-$SIZE_T*5`($sp)
988         $POP    r28,`$FRAME-$SIZE_T*4`($sp)
989         $POP    r29,`$FRAME-$SIZE_T*3`($sp)
990         $POP    r30,`$FRAME-$SIZE_T*2`($sp)
991         $POP    r31,`$FRAME-$SIZE_T*1`($sp)
992         mtlr    r0
993         addi    $sp,$sp,$FRAME
994         blr
995         .long   0
996         .byte   0,12,4,1,0x80,18,3,0
997         .long   0
998
999 .align  5
1000 Lppc_AES_decrypt:
1001         lwz     $acc00,240($key)
1002         addi    $Tbl1,$Tbl0,3
1003         lwz     $t0,0($key)
1004         addi    $Tbl2,$Tbl0,2
1005         lwz     $t1,4($key)
1006         addi    $Tbl3,$Tbl0,1
1007         lwz     $t2,8($key)
1008         addi    $acc00,$acc00,-1
1009         lwz     $t3,12($key)
1010         addi    $key,$key,16
1011         xor     $s0,$s0,$t0
1012         xor     $s1,$s1,$t1
1013         xor     $s2,$s2,$t2
1014         xor     $s3,$s3,$t3
1015         mtctr   $acc00
1016 .align  4
1017 Ldec_loop:
1018         rlwinm  $acc00,$s0,`32-24+3`,21,28
1019         rlwinm  $acc01,$s1,`32-24+3`,21,28
1020         rlwinm  $acc02,$s2,`32-24+3`,21,28
1021         rlwinm  $acc03,$s3,`32-24+3`,21,28
1022         lwz     $t0,0($key)
1023         rlwinm  $acc04,$s3,`32-16+3`,21,28
1024         lwz     $t1,4($key)
1025         rlwinm  $acc05,$s0,`32-16+3`,21,28
1026         lwz     $t2,8($key)
1027         rlwinm  $acc06,$s1,`32-16+3`,21,28
1028         lwz     $t3,12($key)
1029         rlwinm  $acc07,$s2,`32-16+3`,21,28
1030         lwzx    $acc00,$Tbl0,$acc00
1031         rlwinm  $acc08,$s2,`32-8+3`,21,28
1032         lwzx    $acc01,$Tbl0,$acc01
1033         rlwinm  $acc09,$s3,`32-8+3`,21,28
1034         lwzx    $acc02,$Tbl0,$acc02
1035         rlwinm  $acc10,$s0,`32-8+3`,21,28
1036         lwzx    $acc03,$Tbl0,$acc03
1037         rlwinm  $acc11,$s1,`32-8+3`,21,28
1038         lwzx    $acc04,$Tbl1,$acc04
1039         rlwinm  $acc12,$s1,`0+3`,21,28
1040         lwzx    $acc05,$Tbl1,$acc05
1041         rlwinm  $acc13,$s2,`0+3`,21,28
1042         lwzx    $acc06,$Tbl1,$acc06
1043         rlwinm  $acc14,$s3,`0+3`,21,28
1044         lwzx    $acc07,$Tbl1,$acc07
1045         rlwinm  $acc15,$s0,`0+3`,21,28
1046         lwzx    $acc08,$Tbl2,$acc08
1047         xor     $t0,$t0,$acc00
1048         lwzx    $acc09,$Tbl2,$acc09
1049         xor     $t1,$t1,$acc01
1050         lwzx    $acc10,$Tbl2,$acc10
1051         xor     $t2,$t2,$acc02
1052         lwzx    $acc11,$Tbl2,$acc11
1053         xor     $t3,$t3,$acc03
1054         lwzx    $acc12,$Tbl3,$acc12
1055         xor     $t0,$t0,$acc04
1056         lwzx    $acc13,$Tbl3,$acc13
1057         xor     $t1,$t1,$acc05
1058         lwzx    $acc14,$Tbl3,$acc14
1059         xor     $t2,$t2,$acc06
1060         lwzx    $acc15,$Tbl3,$acc15
1061         xor     $t3,$t3,$acc07
1062         xor     $t0,$t0,$acc08
1063         xor     $t1,$t1,$acc09
1064         xor     $t2,$t2,$acc10
1065         xor     $t3,$t3,$acc11
1066         xor     $s0,$t0,$acc12
1067         xor     $s1,$t1,$acc13
1068         xor     $s2,$t2,$acc14
1069         xor     $s3,$t3,$acc15
1070         addi    $key,$key,16
1071         bdnz-   Ldec_loop
1072
1073         addi    $Tbl2,$Tbl0,2048
1074         nop
1075         lwz     $t0,0($key)
1076         rlwinm  $acc00,$s0,`32-24`,24,31
1077         lwz     $t1,4($key)
1078         rlwinm  $acc01,$s1,`32-24`,24,31
1079         lwz     $t2,8($key)
1080         rlwinm  $acc02,$s2,`32-24`,24,31
1081         lwz     $t3,12($key)
1082         rlwinm  $acc03,$s3,`32-24`,24,31
1083         lwz     $acc08,`2048+0`($Tbl0)  ! prefetch Td4
1084         rlwinm  $acc04,$s3,`32-16`,24,31
1085         lwz     $acc09,`2048+32`($Tbl0)
1086         rlwinm  $acc05,$s0,`32-16`,24,31
1087         lwz     $acc10,`2048+64`($Tbl0)
1088         lbzx    $acc00,$Tbl2,$acc00
1089         lwz     $acc11,`2048+96`($Tbl0)
1090         lbzx    $acc01,$Tbl2,$acc01
1091         lwz     $acc12,`2048+128`($Tbl0)
1092         rlwinm  $acc06,$s1,`32-16`,24,31
1093         lwz     $acc13,`2048+160`($Tbl0)
1094         rlwinm  $acc07,$s2,`32-16`,24,31
1095         lwz     $acc14,`2048+192`($Tbl0)
1096         rlwinm  $acc08,$s2,`32-8`,24,31
1097         lwz     $acc15,`2048+224`($Tbl0)
1098         rlwinm  $acc09,$s3,`32-8`,24,31
1099         lbzx    $acc02,$Tbl2,$acc02
1100         rlwinm  $acc10,$s0,`32-8`,24,31
1101         lbzx    $acc03,$Tbl2,$acc03
1102         rlwinm  $acc11,$s1,`32-8`,24,31
1103         lbzx    $acc04,$Tbl2,$acc04
1104         rlwinm  $acc12,$s1,`0`,24,31
1105         lbzx    $acc05,$Tbl2,$acc05
1106         rlwinm  $acc13,$s2,`0`,24,31
1107         lbzx    $acc06,$Tbl2,$acc06
1108         rlwinm  $acc14,$s3,`0`,24,31
1109         lbzx    $acc07,$Tbl2,$acc07
1110         rlwinm  $acc15,$s0,`0`,24,31
1111         lbzx    $acc08,$Tbl2,$acc08
1112         rlwinm  $s0,$acc00,24,0,7
1113         lbzx    $acc09,$Tbl2,$acc09
1114         rlwinm  $s1,$acc01,24,0,7
1115         lbzx    $acc10,$Tbl2,$acc10
1116         rlwinm  $s2,$acc02,24,0,7
1117         lbzx    $acc11,$Tbl2,$acc11
1118         rlwinm  $s3,$acc03,24,0,7
1119         lbzx    $acc12,$Tbl2,$acc12
1120         rlwimi  $s0,$acc04,16,8,15
1121         lbzx    $acc13,$Tbl2,$acc13
1122         rlwimi  $s1,$acc05,16,8,15
1123         lbzx    $acc14,$Tbl2,$acc14
1124         rlwimi  $s2,$acc06,16,8,15
1125         lbzx    $acc15,$Tbl2,$acc15
1126         rlwimi  $s3,$acc07,16,8,15
1127         rlwimi  $s0,$acc08,8,16,23
1128         rlwimi  $s1,$acc09,8,16,23
1129         rlwimi  $s2,$acc10,8,16,23
1130         rlwimi  $s3,$acc11,8,16,23
1131         or      $s0,$s0,$acc12
1132         or      $s1,$s1,$acc13
1133         or      $s2,$s2,$acc14
1134         or      $s3,$s3,$acc15
1135         xor     $s0,$s0,$t0
1136         xor     $s1,$s1,$t1
1137         xor     $s2,$s2,$t2
1138         xor     $s3,$s3,$t3
1139         blr
1140         .long   0
1141         .byte   0,12,0x14,0,0,0,0,0
1142
1143 .align  4
1144 Lppc_AES_decrypt_compact:
1145         lwz     $acc00,240($key)
1146         addi    $Tbl1,$Tbl0,2048
1147         lwz     $t0,0($key)
1148         lis     $mask80,0x8080
1149         lwz     $t1,4($key)
1150         lis     $mask1b,0x1b1b
1151         lwz     $t2,8($key)
1152         ori     $mask80,$mask80,0x8080
1153         lwz     $t3,12($key)
1154         ori     $mask1b,$mask1b,0x1b1b
1155         addi    $key,$key,16
1156 ___
1157 $code.=<<___ if ($SIZE_T==8);
1158         insrdi  $mask80,$mask80,32,0
1159         insrdi  $mask1b,$mask1b,32,0
1160 ___
1161 $code.=<<___;
1162         mtctr   $acc00
1163 .align  4
1164 Ldec_compact_loop:
1165         xor     $s0,$s0,$t0
1166         xor     $s1,$s1,$t1
1167         rlwinm  $acc00,$s0,`32-24`,24,31
1168         xor     $s2,$s2,$t2
1169         rlwinm  $acc01,$s1,`32-24`,24,31
1170         xor     $s3,$s3,$t3
1171         rlwinm  $acc02,$s2,`32-24`,24,31
1172         rlwinm  $acc03,$s3,`32-24`,24,31
1173         rlwinm  $acc04,$s3,`32-16`,24,31
1174         rlwinm  $acc05,$s0,`32-16`,24,31
1175         rlwinm  $acc06,$s1,`32-16`,24,31
1176         rlwinm  $acc07,$s2,`32-16`,24,31
1177         lbzx    $acc00,$Tbl1,$acc00
1178         rlwinm  $acc08,$s2,`32-8`,24,31
1179         lbzx    $acc01,$Tbl1,$acc01
1180         rlwinm  $acc09,$s3,`32-8`,24,31
1181         lbzx    $acc02,$Tbl1,$acc02
1182         rlwinm  $acc10,$s0,`32-8`,24,31
1183         lbzx    $acc03,$Tbl1,$acc03
1184         rlwinm  $acc11,$s1,`32-8`,24,31
1185         lbzx    $acc04,$Tbl1,$acc04
1186         rlwinm  $acc12,$s1,`0`,24,31
1187         lbzx    $acc05,$Tbl1,$acc05
1188         rlwinm  $acc13,$s2,`0`,24,31
1189         lbzx    $acc06,$Tbl1,$acc06
1190         rlwinm  $acc14,$s3,`0`,24,31
1191         lbzx    $acc07,$Tbl1,$acc07
1192         rlwinm  $acc15,$s0,`0`,24,31
1193         lbzx    $acc08,$Tbl1,$acc08
1194         rlwinm  $s0,$acc00,24,0,7
1195         lbzx    $acc09,$Tbl1,$acc09
1196         rlwinm  $s1,$acc01,24,0,7
1197         lbzx    $acc10,$Tbl1,$acc10
1198         rlwinm  $s2,$acc02,24,0,7
1199         lbzx    $acc11,$Tbl1,$acc11
1200         rlwinm  $s3,$acc03,24,0,7
1201         lbzx    $acc12,$Tbl1,$acc12
1202         rlwimi  $s0,$acc04,16,8,15
1203         lbzx    $acc13,$Tbl1,$acc13
1204         rlwimi  $s1,$acc05,16,8,15
1205         lbzx    $acc14,$Tbl1,$acc14
1206         rlwimi  $s2,$acc06,16,8,15
1207         lbzx    $acc15,$Tbl1,$acc15
1208         rlwimi  $s3,$acc07,16,8,15
1209         rlwimi  $s0,$acc08,8,16,23
1210         rlwimi  $s1,$acc09,8,16,23
1211         rlwimi  $s2,$acc10,8,16,23
1212         rlwimi  $s3,$acc11,8,16,23
1213         lwz     $t0,0($key)
1214         or      $s0,$s0,$acc12
1215         lwz     $t1,4($key)
1216         or      $s1,$s1,$acc13
1217         lwz     $t2,8($key)
1218         or      $s2,$s2,$acc14
1219         lwz     $t3,12($key)
1220         or      $s3,$s3,$acc15
1221
1222         addi    $key,$key,16
1223         bdz     Ldec_compact_done
1224 ___
1225 $code.=<<___ if ($SIZE_T==8);
1226         # vectorized permutation improves decrypt performance by 10%
1227         insrdi  $s0,$s1,32,0
1228         insrdi  $s2,$s3,32,0
1229
1230         and     $acc00,$s0,$mask80      # r1=r0&0x80808080
1231         and     $acc02,$s2,$mask80
1232         srdi    $acc04,$acc00,7         # r1>>7
1233         srdi    $acc06,$acc02,7
1234         andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
1235         andc    $acc10,$s2,$mask80
1236         sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
1237         sub     $acc02,$acc02,$acc06
1238         add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
1239         add     $acc10,$acc10,$acc10
1240         and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1241         and     $acc02,$acc02,$mask1b
1242         xor     $acc00,$acc00,$acc08    # r2
1243         xor     $acc02,$acc02,$acc10
1244
1245         and     $acc04,$acc00,$mask80   # r1=r2&0x80808080
1246         and     $acc06,$acc02,$mask80
1247         srdi    $acc08,$acc04,7         # r1>>7
1248         srdi    $acc10,$acc06,7
1249         andc    $acc12,$acc00,$mask80   # r2&0x7f7f7f7f
1250         andc    $acc14,$acc02,$mask80
1251         sub     $acc04,$acc04,$acc08    # r1-(r1>>7)
1252         sub     $acc06,$acc06,$acc10
1253         add     $acc12,$acc12,$acc12    # (r2&0x7f7f7f7f)<<1
1254         add     $acc14,$acc14,$acc14
1255         and     $acc04,$acc04,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1256         and     $acc06,$acc06,$mask1b
1257         xor     $acc04,$acc04,$acc12    # r4
1258         xor     $acc06,$acc06,$acc14
1259
1260         and     $acc08,$acc04,$mask80   # r1=r4&0x80808080
1261         and     $acc10,$acc06,$mask80
1262         srdi    $acc12,$acc08,7         # r1>>7
1263         srdi    $acc14,$acc10,7
1264         sub     $acc08,$acc08,$acc12    # r1-(r1>>7)
1265         sub     $acc10,$acc10,$acc14
1266         andc    $acc12,$acc04,$mask80   # r4&0x7f7f7f7f
1267         andc    $acc14,$acc06,$mask80
1268         add     $acc12,$acc12,$acc12    # (r4&0x7f7f7f7f)<<1
1269         add     $acc14,$acc14,$acc14
1270         and     $acc08,$acc08,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1271         and     $acc10,$acc10,$mask1b
1272         xor     $acc08,$acc08,$acc12    # r8
1273         xor     $acc10,$acc10,$acc14
1274
1275         xor     $acc00,$acc00,$s0       # r2^r0
1276         xor     $acc02,$acc02,$s2
1277         xor     $acc04,$acc04,$s0       # r4^r0
1278         xor     $acc06,$acc06,$s2
1279
1280         extrdi  $acc01,$acc00,32,0
1281         extrdi  $acc03,$acc02,32,0
1282         extrdi  $acc05,$acc04,32,0
1283         extrdi  $acc07,$acc06,32,0
1284         extrdi  $acc09,$acc08,32,0
1285         extrdi  $acc11,$acc10,32,0
1286 ___
1287 $code.=<<___ if ($SIZE_T==4);
1288         and     $acc00,$s0,$mask80      # r1=r0&0x80808080
1289         and     $acc01,$s1,$mask80
1290         and     $acc02,$s2,$mask80
1291         and     $acc03,$s3,$mask80
1292         srwi    $acc04,$acc00,7         # r1>>7
1293         andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
1294         srwi    $acc05,$acc01,7
1295         andc    $acc09,$s1,$mask80
1296         srwi    $acc06,$acc02,7
1297         andc    $acc10,$s2,$mask80
1298         srwi    $acc07,$acc03,7
1299         andc    $acc11,$s3,$mask80
1300         sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
1301         sub     $acc01,$acc01,$acc05
1302         sub     $acc02,$acc02,$acc06
1303         sub     $acc03,$acc03,$acc07
1304         add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
1305         add     $acc09,$acc09,$acc09
1306         add     $acc10,$acc10,$acc10
1307         add     $acc11,$acc11,$acc11
1308         and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1309         and     $acc01,$acc01,$mask1b
1310         and     $acc02,$acc02,$mask1b
1311         and     $acc03,$acc03,$mask1b
1312         xor     $acc00,$acc00,$acc08    # r2
1313         xor     $acc01,$acc01,$acc09
1314         xor     $acc02,$acc02,$acc10
1315         xor     $acc03,$acc03,$acc11
1316
1317         and     $acc04,$acc00,$mask80   # r1=r2&0x80808080
1318         and     $acc05,$acc01,$mask80
1319         and     $acc06,$acc02,$mask80
1320         and     $acc07,$acc03,$mask80
1321         srwi    $acc08,$acc04,7         # r1>>7
1322         andc    $acc12,$acc00,$mask80   # r2&0x7f7f7f7f
1323         srwi    $acc09,$acc05,7
1324         andc    $acc13,$acc01,$mask80
1325         srwi    $acc10,$acc06,7
1326         andc    $acc14,$acc02,$mask80
1327         srwi    $acc11,$acc07,7
1328         andc    $acc15,$acc03,$mask80
1329         sub     $acc04,$acc04,$acc08    # r1-(r1>>7)
1330         sub     $acc05,$acc05,$acc09
1331         sub     $acc06,$acc06,$acc10
1332         sub     $acc07,$acc07,$acc11
1333         add     $acc12,$acc12,$acc12    # (r2&0x7f7f7f7f)<<1
1334         add     $acc13,$acc13,$acc13
1335         add     $acc14,$acc14,$acc14
1336         add     $acc15,$acc15,$acc15
1337         and     $acc04,$acc04,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1338         and     $acc05,$acc05,$mask1b
1339         and     $acc06,$acc06,$mask1b
1340         and     $acc07,$acc07,$mask1b
1341         xor     $acc04,$acc04,$acc12    # r4
1342         xor     $acc05,$acc05,$acc13
1343         xor     $acc06,$acc06,$acc14
1344         xor     $acc07,$acc07,$acc15
1345
1346         and     $acc08,$acc04,$mask80   # r1=r4&0x80808080
1347         and     $acc09,$acc05,$mask80
1348         srwi    $acc12,$acc08,7         # r1>>7
1349         and     $acc10,$acc06,$mask80
1350         srwi    $acc13,$acc09,7
1351         and     $acc11,$acc07,$mask80
1352         srwi    $acc14,$acc10,7
1353         sub     $acc08,$acc08,$acc12    # r1-(r1>>7)
1354         srwi    $acc15,$acc11,7
1355         sub     $acc09,$acc09,$acc13
1356         sub     $acc10,$acc10,$acc14
1357         sub     $acc11,$acc11,$acc15
1358         andc    $acc12,$acc04,$mask80   # r4&0x7f7f7f7f
1359         andc    $acc13,$acc05,$mask80
1360         andc    $acc14,$acc06,$mask80
1361         andc    $acc15,$acc07,$mask80
1362         add     $acc12,$acc12,$acc12    # (r4&0x7f7f7f7f)<<1
1363         add     $acc13,$acc13,$acc13
1364         add     $acc14,$acc14,$acc14
1365         add     $acc15,$acc15,$acc15
1366         and     $acc08,$acc08,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1367         and     $acc09,$acc09,$mask1b
1368         and     $acc10,$acc10,$mask1b
1369         and     $acc11,$acc11,$mask1b
1370         xor     $acc08,$acc08,$acc12    # r8
1371         xor     $acc09,$acc09,$acc13
1372         xor     $acc10,$acc10,$acc14
1373         xor     $acc11,$acc11,$acc15
1374
1375         xor     $acc00,$acc00,$s0       # r2^r0
1376         xor     $acc01,$acc01,$s1
1377         xor     $acc02,$acc02,$s2
1378         xor     $acc03,$acc03,$s3
1379         xor     $acc04,$acc04,$s0       # r4^r0
1380         xor     $acc05,$acc05,$s1
1381         xor     $acc06,$acc06,$s2
1382         xor     $acc07,$acc07,$s3
1383 ___
1384 $code.=<<___;
1385         rotrwi  $s0,$s0,8               # = ROTATE(r0,8)
1386         rotrwi  $s1,$s1,8
1387         xor     $s0,$s0,$acc00          # ^= r2^r0
1388         rotrwi  $s2,$s2,8
1389         xor     $s1,$s1,$acc01
1390         rotrwi  $s3,$s3,8
1391         xor     $s2,$s2,$acc02
1392         xor     $s3,$s3,$acc03
1393         xor     $acc00,$acc00,$acc08
1394         xor     $acc01,$acc01,$acc09
1395         xor     $acc02,$acc02,$acc10
1396         xor     $acc03,$acc03,$acc11
1397         xor     $s0,$s0,$acc04          # ^= r4^r0
1398         rotrwi  $acc00,$acc00,24
1399         xor     $s1,$s1,$acc05
1400         rotrwi  $acc01,$acc01,24
1401         xor     $s2,$s2,$acc06
1402         rotrwi  $acc02,$acc02,24
1403         xor     $s3,$s3,$acc07
1404         rotrwi  $acc03,$acc03,24
1405         xor     $acc04,$acc04,$acc08
1406         xor     $acc05,$acc05,$acc09
1407         xor     $acc06,$acc06,$acc10
1408         xor     $acc07,$acc07,$acc11
1409         xor     $s0,$s0,$acc08          # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
1410         rotrwi  $acc04,$acc04,16
1411         xor     $s1,$s1,$acc09
1412         rotrwi  $acc05,$acc05,16
1413         xor     $s2,$s2,$acc10
1414         rotrwi  $acc06,$acc06,16
1415         xor     $s3,$s3,$acc11
1416         rotrwi  $acc07,$acc07,16
1417         xor     $s0,$s0,$acc00          # ^= ROTATE(r8^r2^r0,24)
1418         rotrwi  $acc08,$acc08,8
1419         xor     $s1,$s1,$acc01
1420         rotrwi  $acc09,$acc09,8
1421         xor     $s2,$s2,$acc02
1422         rotrwi  $acc10,$acc10,8
1423         xor     $s3,$s3,$acc03
1424         rotrwi  $acc11,$acc11,8
1425         xor     $s0,$s0,$acc04          # ^= ROTATE(r8^r4^r0,16)
1426         xor     $s1,$s1,$acc05
1427         xor     $s2,$s2,$acc06
1428         xor     $s3,$s3,$acc07
1429         xor     $s0,$s0,$acc08          # ^= ROTATE(r8,8)       
1430         xor     $s1,$s1,$acc09  
1431         xor     $s2,$s2,$acc10  
1432         xor     $s3,$s3,$acc11  
1433
1434         b       Ldec_compact_loop
1435 .align  4
1436 Ldec_compact_done:
1437         xor     $s0,$s0,$t0
1438         xor     $s1,$s1,$t1
1439         xor     $s2,$s2,$t2
1440         xor     $s3,$s3,$t3
1441         blr
1442         .long   0
1443         .byte   0,12,0x14,0,0,0,0,0
1444 .size   .AES_decrypt,.-.AES_decrypt
1445
1446 .asciz  "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
1447 .align  7
1448 ___
1449
1450 $code =~ s/\`([^\`]*)\`/eval $1/gem;
1451 print $code;
1452 close STDOUT;