Add OpenSSL copyright to .pl files
[openssl.git] / crypto / aes / asm / aes-ppc.pl
1 #! /usr/bin/env perl
2 # Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
3 #
4 # Licensed under the OpenSSL license (the "License").  You may not use
5 # this file except in compliance with the License.  You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
8
9
10 # ====================================================================
11 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
16
17 # Needs more work: key setup, CBC routine...
18 #
19 # ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
20 # 128-bit key, which is ~40% better than 64-bit code generated by gcc
21 # 4.0. But these are not the ones currently used! Their "compact"
22 # counterparts are, for security reason. ppc_AES_encrypt_compact runs
23 # at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
24 # at 1/3 of ppc_AES_decrypt.
25
26 # February 2010
27 #
28 # Rescheduling instructions to favour Power6 pipeline gave 10%
29 # performance improvement on the platfrom in question (and marginal
30 # improvement even on others). It should be noted that Power6 fails
31 # to process byte in 18 cycles, only in 23, because it fails to issue
32 # 4 load instructions in two cycles, only in 3. As result non-compact
33 # block subroutines are 25% slower than one would expect. Compact
34 # functions scale better, because they have pure computational part,
35 # which scales perfectly with clock frequency. To be specific
36 # ppc_AES_encrypt_compact operates at 42 cycles per byte, while
37 # ppc_AES_decrypt_compact - at 55 (in 64-bit build).
38
39 $flavour = shift;
40
41 if ($flavour =~ /64/) {
42         $SIZE_T =8;
43         $LRSAVE =2*$SIZE_T;
44         $STU    ="stdu";
45         $POP    ="ld";
46         $PUSH   ="std";
47 } elsif ($flavour =~ /32/) {
48         $SIZE_T =4;
49         $LRSAVE =$SIZE_T;
50         $STU    ="stwu";
51         $POP    ="lwz";
52         $PUSH   ="stw";
53 } else { die "nonsense $flavour"; }
54
55 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
56
57 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
58 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
59 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
60 die "can't locate ppc-xlate.pl";
61
62 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
63
64 $FRAME=32*$SIZE_T;
65
66 sub _data_word()
67 { my $i;
68     while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
69 }
70
71 $sp="r1";
72 $toc="r2";
73 $inp="r3";
74 $out="r4";
75 $key="r5";
76
77 $Tbl0="r3";
78 $Tbl1="r6";
79 $Tbl2="r7";
80 $Tbl3=$out;     # stay away from "r2"; $out is offloaded to stack
81
82 $s0="r8";
83 $s1="r9";
84 $s2="r10";
85 $s3="r11";
86
87 $t0="r12";
88 $t1="r0";       # stay away from "r13";
89 $t2="r14";
90 $t3="r15";
91
92 $acc00="r16";
93 $acc01="r17";
94 $acc02="r18";
95 $acc03="r19";
96
97 $acc04="r20";
98 $acc05="r21";
99 $acc06="r22";
100 $acc07="r23";
101
102 $acc08="r24";
103 $acc09="r25";
104 $acc10="r26";
105 $acc11="r27";
106
107 $acc12="r28";
108 $acc13="r29";
109 $acc14="r30";
110 $acc15="r31";
111
112 $mask80=$Tbl2;
113 $mask1b=$Tbl3;
114
115 $code.=<<___;
116 .machine        "any"
117 .text
118
119 .align  7
120 LAES_Te:
121         mflr    r0
122         bcl     20,31,\$+4
123         mflr    $Tbl0   ;    vvvvv "distance" between . and 1st data entry
124         addi    $Tbl0,$Tbl0,`128-8`
125         mtlr    r0
126         blr
127         .long   0
128         .byte   0,12,0x14,0,0,0,0,0
129         .space  `64-9*4`
130 LAES_Td:
131         mflr    r0
132         bcl     20,31,\$+4
133         mflr    $Tbl0   ;    vvvvvvvv "distance" between . and 1st data entry
134         addi    $Tbl0,$Tbl0,`128-64-8+2048+256`
135         mtlr    r0
136         blr
137         .long   0
138         .byte   0,12,0x14,0,0,0,0,0
139         .space  `128-64-9*4`
140 ___
141 &_data_word(
142         0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
143         0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
144         0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
145         0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
146         0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
147         0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
148         0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
149         0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
150         0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
151         0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
152         0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
153         0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
154         0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
155         0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
156         0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
157         0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
158         0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
159         0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
160         0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
161         0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
162         0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
163         0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
164         0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
165         0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
166         0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
167         0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
168         0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
169         0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
170         0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
171         0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
172         0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
173         0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
174         0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
175         0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
176         0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
177         0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
178         0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
179         0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
180         0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
181         0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
182         0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
183         0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
184         0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
185         0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
186         0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
187         0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
188         0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
189         0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
190         0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
191         0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
192         0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
193         0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
194         0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
195         0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
196         0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
197         0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
198         0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
199         0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
200         0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
201         0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
202         0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
203         0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
204         0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
205         0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
206 $code.=<<___;
207 .byte   0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
208 .byte   0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
209 .byte   0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
210 .byte   0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
211 .byte   0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
212 .byte   0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
213 .byte   0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
214 .byte   0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
215 .byte   0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
216 .byte   0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
217 .byte   0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
218 .byte   0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
219 .byte   0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
220 .byte   0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
221 .byte   0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
222 .byte   0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
223 .byte   0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
224 .byte   0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
225 .byte   0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
226 .byte   0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
227 .byte   0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
228 .byte   0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
229 .byte   0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
230 .byte   0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
231 .byte   0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
232 .byte   0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
233 .byte   0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
234 .byte   0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
235 .byte   0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
236 .byte   0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
237 .byte   0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
238 .byte   0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
239 ___
240 &_data_word(
241         0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
242         0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
243         0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
244         0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
245         0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
246         0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
247         0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
248         0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
249         0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
250         0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
251         0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
252         0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
253         0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
254         0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
255         0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
256         0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
257         0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
258         0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
259         0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
260         0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
261         0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
262         0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
263         0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
264         0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
265         0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
266         0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
267         0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
268         0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
269         0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
270         0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
271         0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
272         0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
273         0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
274         0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
275         0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
276         0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
277         0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
278         0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
279         0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
280         0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
281         0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
282         0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
283         0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
284         0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
285         0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
286         0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
287         0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
288         0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
289         0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
290         0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
291         0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
292         0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
293         0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
294         0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
295         0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
296         0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
297         0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
298         0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
299         0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
300         0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
301         0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
302         0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
303         0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
304         0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
305 $code.=<<___;
306 .byte   0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
307 .byte   0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
308 .byte   0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
309 .byte   0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
310 .byte   0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
311 .byte   0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
312 .byte   0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
313 .byte   0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
314 .byte   0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
315 .byte   0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
316 .byte   0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
317 .byte   0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
318 .byte   0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
319 .byte   0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
320 .byte   0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
321 .byte   0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
322 .byte   0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
323 .byte   0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
324 .byte   0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
325 .byte   0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
326 .byte   0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
327 .byte   0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
328 .byte   0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
329 .byte   0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
330 .byte   0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
331 .byte   0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
332 .byte   0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
333 .byte   0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
334 .byte   0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
335 .byte   0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
336 .byte   0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
337 .byte   0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
338
339
340 .globl  .AES_encrypt
341 .align  7
342 .AES_encrypt:
343         $STU    $sp,-$FRAME($sp)
344         mflr    r0
345
346         $PUSH   $out,`$FRAME-$SIZE_T*19`($sp)
347         $PUSH   r14,`$FRAME-$SIZE_T*18`($sp)
348         $PUSH   r15,`$FRAME-$SIZE_T*17`($sp)
349         $PUSH   r16,`$FRAME-$SIZE_T*16`($sp)
350         $PUSH   r17,`$FRAME-$SIZE_T*15`($sp)
351         $PUSH   r18,`$FRAME-$SIZE_T*14`($sp)
352         $PUSH   r19,`$FRAME-$SIZE_T*13`($sp)
353         $PUSH   r20,`$FRAME-$SIZE_T*12`($sp)
354         $PUSH   r21,`$FRAME-$SIZE_T*11`($sp)
355         $PUSH   r22,`$FRAME-$SIZE_T*10`($sp)
356         $PUSH   r23,`$FRAME-$SIZE_T*9`($sp)
357         $PUSH   r24,`$FRAME-$SIZE_T*8`($sp)
358         $PUSH   r25,`$FRAME-$SIZE_T*7`($sp)
359         $PUSH   r26,`$FRAME-$SIZE_T*6`($sp)
360         $PUSH   r27,`$FRAME-$SIZE_T*5`($sp)
361         $PUSH   r28,`$FRAME-$SIZE_T*4`($sp)
362         $PUSH   r29,`$FRAME-$SIZE_T*3`($sp)
363         $PUSH   r30,`$FRAME-$SIZE_T*2`($sp)
364         $PUSH   r31,`$FRAME-$SIZE_T*1`($sp)
365         $PUSH   r0,`$FRAME+$LRSAVE`($sp)
366
367         andi.   $t0,$inp,3
368         andi.   $t1,$out,3
369         or.     $t0,$t0,$t1
370         bne     Lenc_unaligned
371
372 Lenc_unaligned_ok:
373 ___
374 $code.=<<___ if (!$LITTLE_ENDIAN);
375         lwz     $s0,0($inp)
376         lwz     $s1,4($inp)
377         lwz     $s2,8($inp)
378         lwz     $s3,12($inp)
379 ___
380 $code.=<<___ if ($LITTLE_ENDIAN);
381         lwz     $t0,0($inp)
382         lwz     $t1,4($inp)
383         lwz     $t2,8($inp)
384         lwz     $t3,12($inp)
385         rotlwi  $s0,$t0,8
386         rotlwi  $s1,$t1,8
387         rotlwi  $s2,$t2,8
388         rotlwi  $s3,$t3,8
389         rlwimi  $s0,$t0,24,0,7
390         rlwimi  $s1,$t1,24,0,7
391         rlwimi  $s2,$t2,24,0,7
392         rlwimi  $s3,$t3,24,0,7
393         rlwimi  $s0,$t0,24,16,23
394         rlwimi  $s1,$t1,24,16,23
395         rlwimi  $s2,$t2,24,16,23
396         rlwimi  $s3,$t3,24,16,23
397 ___
398 $code.=<<___;
399         bl      LAES_Te
400         bl      Lppc_AES_encrypt_compact
401         $POP    $out,`$FRAME-$SIZE_T*19`($sp)
402 ___
403 $code.=<<___ if ($LITTLE_ENDIAN);
404         rotlwi  $t0,$s0,8
405         rotlwi  $t1,$s1,8
406         rotlwi  $t2,$s2,8
407         rotlwi  $t3,$s3,8
408         rlwimi  $t0,$s0,24,0,7
409         rlwimi  $t1,$s1,24,0,7
410         rlwimi  $t2,$s2,24,0,7
411         rlwimi  $t3,$s3,24,0,7
412         rlwimi  $t0,$s0,24,16,23
413         rlwimi  $t1,$s1,24,16,23
414         rlwimi  $t2,$s2,24,16,23
415         rlwimi  $t3,$s3,24,16,23
416         stw     $t0,0($out)
417         stw     $t1,4($out)
418         stw     $t2,8($out)
419         stw     $t3,12($out)
420 ___
421 $code.=<<___ if (!$LITTLE_ENDIAN);
422         stw     $s0,0($out)
423         stw     $s1,4($out)
424         stw     $s2,8($out)
425         stw     $s3,12($out)
426 ___
427 $code.=<<___;
428         b       Lenc_done
429
430 Lenc_unaligned:
431         subfic  $t0,$inp,4096
432         subfic  $t1,$out,4096
433         andi.   $t0,$t0,4096-16
434         beq     Lenc_xpage
435         andi.   $t1,$t1,4096-16
436         bne     Lenc_unaligned_ok
437
438 Lenc_xpage:
439         lbz     $acc00,0($inp)
440         lbz     $acc01,1($inp)
441         lbz     $acc02,2($inp)
442         lbz     $s0,3($inp)
443         lbz     $acc04,4($inp)
444         lbz     $acc05,5($inp)
445         lbz     $acc06,6($inp)
446         lbz     $s1,7($inp)
447         lbz     $acc08,8($inp)
448         lbz     $acc09,9($inp)
449         lbz     $acc10,10($inp)
450         insrwi  $s0,$acc00,8,0
451         lbz     $s2,11($inp)
452         insrwi  $s1,$acc04,8,0
453         lbz     $acc12,12($inp)
454         insrwi  $s0,$acc01,8,8
455         lbz     $acc13,13($inp)
456         insrwi  $s1,$acc05,8,8
457         lbz     $acc14,14($inp)
458         insrwi  $s0,$acc02,8,16
459         lbz     $s3,15($inp)
460         insrwi  $s1,$acc06,8,16
461         insrwi  $s2,$acc08,8,0
462         insrwi  $s3,$acc12,8,0
463         insrwi  $s2,$acc09,8,8
464         insrwi  $s3,$acc13,8,8
465         insrwi  $s2,$acc10,8,16
466         insrwi  $s3,$acc14,8,16
467
468         bl      LAES_Te
469         bl      Lppc_AES_encrypt_compact
470         $POP    $out,`$FRAME-$SIZE_T*19`($sp)
471
472         extrwi  $acc00,$s0,8,0
473         extrwi  $acc01,$s0,8,8
474         stb     $acc00,0($out)
475         extrwi  $acc02,$s0,8,16
476         stb     $acc01,1($out)
477         stb     $acc02,2($out)
478         extrwi  $acc04,$s1,8,0
479         stb     $s0,3($out)
480         extrwi  $acc05,$s1,8,8
481         stb     $acc04,4($out)
482         extrwi  $acc06,$s1,8,16
483         stb     $acc05,5($out)
484         stb     $acc06,6($out)
485         extrwi  $acc08,$s2,8,0
486         stb     $s1,7($out)
487         extrwi  $acc09,$s2,8,8
488         stb     $acc08,8($out)
489         extrwi  $acc10,$s2,8,16
490         stb     $acc09,9($out)
491         stb     $acc10,10($out)
492         extrwi  $acc12,$s3,8,0
493         stb     $s2,11($out)
494         extrwi  $acc13,$s3,8,8
495         stb     $acc12,12($out)
496         extrwi  $acc14,$s3,8,16
497         stb     $acc13,13($out)
498         stb     $acc14,14($out)
499         stb     $s3,15($out)
500
501 Lenc_done:
502         $POP    r0,`$FRAME+$LRSAVE`($sp)
503         $POP    r14,`$FRAME-$SIZE_T*18`($sp)
504         $POP    r15,`$FRAME-$SIZE_T*17`($sp)
505         $POP    r16,`$FRAME-$SIZE_T*16`($sp)
506         $POP    r17,`$FRAME-$SIZE_T*15`($sp)
507         $POP    r18,`$FRAME-$SIZE_T*14`($sp)
508         $POP    r19,`$FRAME-$SIZE_T*13`($sp)
509         $POP    r20,`$FRAME-$SIZE_T*12`($sp)
510         $POP    r21,`$FRAME-$SIZE_T*11`($sp)
511         $POP    r22,`$FRAME-$SIZE_T*10`($sp)
512         $POP    r23,`$FRAME-$SIZE_T*9`($sp)
513         $POP    r24,`$FRAME-$SIZE_T*8`($sp)
514         $POP    r25,`$FRAME-$SIZE_T*7`($sp)
515         $POP    r26,`$FRAME-$SIZE_T*6`($sp)
516         $POP    r27,`$FRAME-$SIZE_T*5`($sp)
517         $POP    r28,`$FRAME-$SIZE_T*4`($sp)
518         $POP    r29,`$FRAME-$SIZE_T*3`($sp)
519         $POP    r30,`$FRAME-$SIZE_T*2`($sp)
520         $POP    r31,`$FRAME-$SIZE_T*1`($sp)
521         mtlr    r0
522         addi    $sp,$sp,$FRAME
523         blr
524         .long   0
525         .byte   0,12,4,1,0x80,18,3,0
526         .long   0
527
528 .align  5
529 Lppc_AES_encrypt:
530         lwz     $acc00,240($key)
531         addi    $Tbl1,$Tbl0,3
532         lwz     $t0,0($key)
533         addi    $Tbl2,$Tbl0,2
534         lwz     $t1,4($key)
535         addi    $Tbl3,$Tbl0,1
536         lwz     $t2,8($key)
537         addi    $acc00,$acc00,-1
538         lwz     $t3,12($key)
539         addi    $key,$key,16
540         xor     $s0,$s0,$t0
541         xor     $s1,$s1,$t1
542         xor     $s2,$s2,$t2
543         xor     $s3,$s3,$t3
544         mtctr   $acc00
545 .align  4
546 Lenc_loop:
547         rlwinm  $acc00,$s0,`32-24+3`,21,28
548         rlwinm  $acc01,$s1,`32-24+3`,21,28
549         rlwinm  $acc02,$s2,`32-24+3`,21,28
550         rlwinm  $acc03,$s3,`32-24+3`,21,28
551         lwz     $t0,0($key)
552         rlwinm  $acc04,$s1,`32-16+3`,21,28
553         lwz     $t1,4($key)
554         rlwinm  $acc05,$s2,`32-16+3`,21,28
555         lwz     $t2,8($key)
556         rlwinm  $acc06,$s3,`32-16+3`,21,28
557         lwz     $t3,12($key)
558         rlwinm  $acc07,$s0,`32-16+3`,21,28
559         lwzx    $acc00,$Tbl0,$acc00
560         rlwinm  $acc08,$s2,`32-8+3`,21,28
561         lwzx    $acc01,$Tbl0,$acc01
562         rlwinm  $acc09,$s3,`32-8+3`,21,28
563         lwzx    $acc02,$Tbl0,$acc02
564         rlwinm  $acc10,$s0,`32-8+3`,21,28
565         lwzx    $acc03,$Tbl0,$acc03
566         rlwinm  $acc11,$s1,`32-8+3`,21,28
567         lwzx    $acc04,$Tbl1,$acc04
568         rlwinm  $acc12,$s3,`0+3`,21,28
569         lwzx    $acc05,$Tbl1,$acc05
570         rlwinm  $acc13,$s0,`0+3`,21,28
571         lwzx    $acc06,$Tbl1,$acc06
572         rlwinm  $acc14,$s1,`0+3`,21,28
573         lwzx    $acc07,$Tbl1,$acc07
574         rlwinm  $acc15,$s2,`0+3`,21,28
575         lwzx    $acc08,$Tbl2,$acc08
576         xor     $t0,$t0,$acc00
577         lwzx    $acc09,$Tbl2,$acc09
578         xor     $t1,$t1,$acc01
579         lwzx    $acc10,$Tbl2,$acc10
580         xor     $t2,$t2,$acc02
581         lwzx    $acc11,$Tbl2,$acc11
582         xor     $t3,$t3,$acc03
583         lwzx    $acc12,$Tbl3,$acc12
584         xor     $t0,$t0,$acc04
585         lwzx    $acc13,$Tbl3,$acc13
586         xor     $t1,$t1,$acc05
587         lwzx    $acc14,$Tbl3,$acc14
588         xor     $t2,$t2,$acc06
589         lwzx    $acc15,$Tbl3,$acc15
590         xor     $t3,$t3,$acc07
591         xor     $t0,$t0,$acc08
592         xor     $t1,$t1,$acc09
593         xor     $t2,$t2,$acc10
594         xor     $t3,$t3,$acc11
595         xor     $s0,$t0,$acc12
596         xor     $s1,$t1,$acc13
597         xor     $s2,$t2,$acc14
598         xor     $s3,$t3,$acc15
599         addi    $key,$key,16
600         bdnz    Lenc_loop
601
602         addi    $Tbl2,$Tbl0,2048
603         nop
604         lwz     $t0,0($key)
605         rlwinm  $acc00,$s0,`32-24`,24,31
606         lwz     $t1,4($key)
607         rlwinm  $acc01,$s1,`32-24`,24,31
608         lwz     $t2,8($key)
609         rlwinm  $acc02,$s2,`32-24`,24,31
610         lwz     $t3,12($key)
611         rlwinm  $acc03,$s3,`32-24`,24,31
612         lwz     $acc08,`2048+0`($Tbl0)  ! prefetch Te4
613         rlwinm  $acc04,$s1,`32-16`,24,31
614         lwz     $acc09,`2048+32`($Tbl0)
615         rlwinm  $acc05,$s2,`32-16`,24,31
616         lwz     $acc10,`2048+64`($Tbl0)
617         rlwinm  $acc06,$s3,`32-16`,24,31
618         lwz     $acc11,`2048+96`($Tbl0)
619         rlwinm  $acc07,$s0,`32-16`,24,31
620         lwz     $acc12,`2048+128`($Tbl0)
621         rlwinm  $acc08,$s2,`32-8`,24,31
622         lwz     $acc13,`2048+160`($Tbl0)
623         rlwinm  $acc09,$s3,`32-8`,24,31
624         lwz     $acc14,`2048+192`($Tbl0)
625         rlwinm  $acc10,$s0,`32-8`,24,31
626         lwz     $acc15,`2048+224`($Tbl0)
627         rlwinm  $acc11,$s1,`32-8`,24,31
628         lbzx    $acc00,$Tbl2,$acc00
629         rlwinm  $acc12,$s3,`0`,24,31
630         lbzx    $acc01,$Tbl2,$acc01
631         rlwinm  $acc13,$s0,`0`,24,31
632         lbzx    $acc02,$Tbl2,$acc02
633         rlwinm  $acc14,$s1,`0`,24,31
634         lbzx    $acc03,$Tbl2,$acc03
635         rlwinm  $acc15,$s2,`0`,24,31
636         lbzx    $acc04,$Tbl2,$acc04
637         rlwinm  $s0,$acc00,24,0,7
638         lbzx    $acc05,$Tbl2,$acc05
639         rlwinm  $s1,$acc01,24,0,7
640         lbzx    $acc06,$Tbl2,$acc06
641         rlwinm  $s2,$acc02,24,0,7
642         lbzx    $acc07,$Tbl2,$acc07
643         rlwinm  $s3,$acc03,24,0,7
644         lbzx    $acc08,$Tbl2,$acc08
645         rlwimi  $s0,$acc04,16,8,15
646         lbzx    $acc09,$Tbl2,$acc09
647         rlwimi  $s1,$acc05,16,8,15
648         lbzx    $acc10,$Tbl2,$acc10
649         rlwimi  $s2,$acc06,16,8,15
650         lbzx    $acc11,$Tbl2,$acc11
651         rlwimi  $s3,$acc07,16,8,15
652         lbzx    $acc12,$Tbl2,$acc12
653         rlwimi  $s0,$acc08,8,16,23
654         lbzx    $acc13,$Tbl2,$acc13
655         rlwimi  $s1,$acc09,8,16,23
656         lbzx    $acc14,$Tbl2,$acc14
657         rlwimi  $s2,$acc10,8,16,23
658         lbzx    $acc15,$Tbl2,$acc15
659         rlwimi  $s3,$acc11,8,16,23
660         or      $s0,$s0,$acc12
661         or      $s1,$s1,$acc13
662         or      $s2,$s2,$acc14
663         or      $s3,$s3,$acc15
664         xor     $s0,$s0,$t0
665         xor     $s1,$s1,$t1
666         xor     $s2,$s2,$t2
667         xor     $s3,$s3,$t3
668         blr
669         .long   0
670         .byte   0,12,0x14,0,0,0,0,0
671
672 .align  4
673 Lppc_AES_encrypt_compact:
674         lwz     $acc00,240($key)
675         addi    $Tbl1,$Tbl0,2048
676         lwz     $t0,0($key)
677         lis     $mask80,0x8080
678         lwz     $t1,4($key)
679         lis     $mask1b,0x1b1b
680         lwz     $t2,8($key)
681         ori     $mask80,$mask80,0x8080
682         lwz     $t3,12($key)
683         ori     $mask1b,$mask1b,0x1b1b
684         addi    $key,$key,16
685         mtctr   $acc00
686 .align  4
687 Lenc_compact_loop:
688         xor     $s0,$s0,$t0
689         xor     $s1,$s1,$t1
690         rlwinm  $acc00,$s0,`32-24`,24,31
691         xor     $s2,$s2,$t2
692         rlwinm  $acc01,$s1,`32-24`,24,31
693         xor     $s3,$s3,$t3
694         rlwinm  $acc02,$s2,`32-24`,24,31
695         rlwinm  $acc03,$s3,`32-24`,24,31
696         rlwinm  $acc04,$s1,`32-16`,24,31
697         rlwinm  $acc05,$s2,`32-16`,24,31
698         rlwinm  $acc06,$s3,`32-16`,24,31
699         rlwinm  $acc07,$s0,`32-16`,24,31
700         lbzx    $acc00,$Tbl1,$acc00
701         rlwinm  $acc08,$s2,`32-8`,24,31
702         lbzx    $acc01,$Tbl1,$acc01
703         rlwinm  $acc09,$s3,`32-8`,24,31
704         lbzx    $acc02,$Tbl1,$acc02
705         rlwinm  $acc10,$s0,`32-8`,24,31
706         lbzx    $acc03,$Tbl1,$acc03
707         rlwinm  $acc11,$s1,`32-8`,24,31
708         lbzx    $acc04,$Tbl1,$acc04
709         rlwinm  $acc12,$s3,`0`,24,31
710         lbzx    $acc05,$Tbl1,$acc05
711         rlwinm  $acc13,$s0,`0`,24,31
712         lbzx    $acc06,$Tbl1,$acc06
713         rlwinm  $acc14,$s1,`0`,24,31
714         lbzx    $acc07,$Tbl1,$acc07
715         rlwinm  $acc15,$s2,`0`,24,31
716         lbzx    $acc08,$Tbl1,$acc08
717         rlwinm  $s0,$acc00,24,0,7
718         lbzx    $acc09,$Tbl1,$acc09
719         rlwinm  $s1,$acc01,24,0,7
720         lbzx    $acc10,$Tbl1,$acc10
721         rlwinm  $s2,$acc02,24,0,7
722         lbzx    $acc11,$Tbl1,$acc11
723         rlwinm  $s3,$acc03,24,0,7
724         lbzx    $acc12,$Tbl1,$acc12
725         rlwimi  $s0,$acc04,16,8,15
726         lbzx    $acc13,$Tbl1,$acc13
727         rlwimi  $s1,$acc05,16,8,15
728         lbzx    $acc14,$Tbl1,$acc14
729         rlwimi  $s2,$acc06,16,8,15
730         lbzx    $acc15,$Tbl1,$acc15
731         rlwimi  $s3,$acc07,16,8,15
732         rlwimi  $s0,$acc08,8,16,23
733         rlwimi  $s1,$acc09,8,16,23
734         rlwimi  $s2,$acc10,8,16,23
735         rlwimi  $s3,$acc11,8,16,23
736         lwz     $t0,0($key)
737         or      $s0,$s0,$acc12
738         lwz     $t1,4($key)
739         or      $s1,$s1,$acc13
740         lwz     $t2,8($key)
741         or      $s2,$s2,$acc14
742         lwz     $t3,12($key)
743         or      $s3,$s3,$acc15
744
745         addi    $key,$key,16
746         bdz     Lenc_compact_done
747
748         and     $acc00,$s0,$mask80      # r1=r0&0x80808080
749         and     $acc01,$s1,$mask80
750         and     $acc02,$s2,$mask80
751         and     $acc03,$s3,$mask80
752         srwi    $acc04,$acc00,7         # r1>>7
753         andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
754         srwi    $acc05,$acc01,7
755         andc    $acc09,$s1,$mask80
756         srwi    $acc06,$acc02,7
757         andc    $acc10,$s2,$mask80
758         srwi    $acc07,$acc03,7
759         andc    $acc11,$s3,$mask80
760         sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
761         sub     $acc01,$acc01,$acc05
762         sub     $acc02,$acc02,$acc06
763         sub     $acc03,$acc03,$acc07
764         add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
765         add     $acc09,$acc09,$acc09
766         add     $acc10,$acc10,$acc10
767         add     $acc11,$acc11,$acc11
768         and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
769         and     $acc01,$acc01,$mask1b
770         and     $acc02,$acc02,$mask1b
771         and     $acc03,$acc03,$mask1b
772         xor     $acc00,$acc00,$acc08    # r2
773         xor     $acc01,$acc01,$acc09
774          rotlwi $acc12,$s0,16           # ROTATE(r0,16)
775         xor     $acc02,$acc02,$acc10
776          rotlwi $acc13,$s1,16
777         xor     $acc03,$acc03,$acc11
778          rotlwi $acc14,$s2,16
779
780         xor     $s0,$s0,$acc00          # r0^r2
781         rotlwi  $acc15,$s3,16
782         xor     $s1,$s1,$acc01
783         rotrwi  $s0,$s0,24              # ROTATE(r2^r0,24)
784         xor     $s2,$s2,$acc02
785         rotrwi  $s1,$s1,24
786         xor     $s3,$s3,$acc03
787         rotrwi  $s2,$s2,24
788         xor     $s0,$s0,$acc00          # ROTATE(r2^r0,24)^r2
789         rotrwi  $s3,$s3,24
790         xor     $s1,$s1,$acc01
791         xor     $s2,$s2,$acc02
792         xor     $s3,$s3,$acc03
793         rotlwi  $acc08,$acc12,8         # ROTATE(r0,24)
794         xor     $s0,$s0,$acc12          #
795         rotlwi  $acc09,$acc13,8
796         xor     $s1,$s1,$acc13
797         rotlwi  $acc10,$acc14,8
798         xor     $s2,$s2,$acc14
799         rotlwi  $acc11,$acc15,8
800         xor     $s3,$s3,$acc15
801         xor     $s0,$s0,$acc08          #
802         xor     $s1,$s1,$acc09
803         xor     $s2,$s2,$acc10
804         xor     $s3,$s3,$acc11
805
806         b       Lenc_compact_loop
807 .align  4
808 Lenc_compact_done:
809         xor     $s0,$s0,$t0
810         xor     $s1,$s1,$t1
811         xor     $s2,$s2,$t2
812         xor     $s3,$s3,$t3
813         blr
814         .long   0
815         .byte   0,12,0x14,0,0,0,0,0
816 .size   .AES_encrypt,.-.AES_encrypt
817
818 .globl  .AES_decrypt
819 .align  7
820 .AES_decrypt:
821         $STU    $sp,-$FRAME($sp)
822         mflr    r0
823
824         $PUSH   $out,`$FRAME-$SIZE_T*19`($sp)
825         $PUSH   r14,`$FRAME-$SIZE_T*18`($sp)
826         $PUSH   r15,`$FRAME-$SIZE_T*17`($sp)
827         $PUSH   r16,`$FRAME-$SIZE_T*16`($sp)
828         $PUSH   r17,`$FRAME-$SIZE_T*15`($sp)
829         $PUSH   r18,`$FRAME-$SIZE_T*14`($sp)
830         $PUSH   r19,`$FRAME-$SIZE_T*13`($sp)
831         $PUSH   r20,`$FRAME-$SIZE_T*12`($sp)
832         $PUSH   r21,`$FRAME-$SIZE_T*11`($sp)
833         $PUSH   r22,`$FRAME-$SIZE_T*10`($sp)
834         $PUSH   r23,`$FRAME-$SIZE_T*9`($sp)
835         $PUSH   r24,`$FRAME-$SIZE_T*8`($sp)
836         $PUSH   r25,`$FRAME-$SIZE_T*7`($sp)
837         $PUSH   r26,`$FRAME-$SIZE_T*6`($sp)
838         $PUSH   r27,`$FRAME-$SIZE_T*5`($sp)
839         $PUSH   r28,`$FRAME-$SIZE_T*4`($sp)
840         $PUSH   r29,`$FRAME-$SIZE_T*3`($sp)
841         $PUSH   r30,`$FRAME-$SIZE_T*2`($sp)
842         $PUSH   r31,`$FRAME-$SIZE_T*1`($sp)
843         $PUSH   r0,`$FRAME+$LRSAVE`($sp)
844
845         andi.   $t0,$inp,3
846         andi.   $t1,$out,3
847         or.     $t0,$t0,$t1
848         bne     Ldec_unaligned
849
850 Ldec_unaligned_ok:
851 ___
852 $code.=<<___ if (!$LITTLE_ENDIAN);
853         lwz     $s0,0($inp)
854         lwz     $s1,4($inp)
855         lwz     $s2,8($inp)
856         lwz     $s3,12($inp)
857 ___
858 $code.=<<___ if ($LITTLE_ENDIAN);
859         lwz     $t0,0($inp)
860         lwz     $t1,4($inp)
861         lwz     $t2,8($inp)
862         lwz     $t3,12($inp)
863         rotlwi  $s0,$t0,8
864         rotlwi  $s1,$t1,8
865         rotlwi  $s2,$t2,8
866         rotlwi  $s3,$t3,8
867         rlwimi  $s0,$t0,24,0,7
868         rlwimi  $s1,$t1,24,0,7
869         rlwimi  $s2,$t2,24,0,7
870         rlwimi  $s3,$t3,24,0,7
871         rlwimi  $s0,$t0,24,16,23
872         rlwimi  $s1,$t1,24,16,23
873         rlwimi  $s2,$t2,24,16,23
874         rlwimi  $s3,$t3,24,16,23
875 ___
876 $code.=<<___;
877         bl      LAES_Td
878         bl      Lppc_AES_decrypt_compact
879         $POP    $out,`$FRAME-$SIZE_T*19`($sp)
880 ___
881 $code.=<<___ if ($LITTLE_ENDIAN);
882         rotlwi  $t0,$s0,8
883         rotlwi  $t1,$s1,8
884         rotlwi  $t2,$s2,8
885         rotlwi  $t3,$s3,8
886         rlwimi  $t0,$s0,24,0,7
887         rlwimi  $t1,$s1,24,0,7
888         rlwimi  $t2,$s2,24,0,7
889         rlwimi  $t3,$s3,24,0,7
890         rlwimi  $t0,$s0,24,16,23
891         rlwimi  $t1,$s1,24,16,23
892         rlwimi  $t2,$s2,24,16,23
893         rlwimi  $t3,$s3,24,16,23
894         stw     $t0,0($out)
895         stw     $t1,4($out)
896         stw     $t2,8($out)
897         stw     $t3,12($out)
898 ___
899 $code.=<<___ if (!$LITTLE_ENDIAN);
900         stw     $s0,0($out)
901         stw     $s1,4($out)
902         stw     $s2,8($out)
903         stw     $s3,12($out)
904 ___
905 $code.=<<___;
906         b       Ldec_done
907
908 Ldec_unaligned:
909         subfic  $t0,$inp,4096
910         subfic  $t1,$out,4096
911         andi.   $t0,$t0,4096-16
912         beq     Ldec_xpage
913         andi.   $t1,$t1,4096-16
914         bne     Ldec_unaligned_ok
915
916 Ldec_xpage:
917         lbz     $acc00,0($inp)
918         lbz     $acc01,1($inp)
919         lbz     $acc02,2($inp)
920         lbz     $s0,3($inp)
921         lbz     $acc04,4($inp)
922         lbz     $acc05,5($inp)
923         lbz     $acc06,6($inp)
924         lbz     $s1,7($inp)
925         lbz     $acc08,8($inp)
926         lbz     $acc09,9($inp)
927         lbz     $acc10,10($inp)
928         insrwi  $s0,$acc00,8,0
929         lbz     $s2,11($inp)
930         insrwi  $s1,$acc04,8,0
931         lbz     $acc12,12($inp)
932         insrwi  $s0,$acc01,8,8
933         lbz     $acc13,13($inp)
934         insrwi  $s1,$acc05,8,8
935         lbz     $acc14,14($inp)
936         insrwi  $s0,$acc02,8,16
937         lbz     $s3,15($inp)
938         insrwi  $s1,$acc06,8,16
939         insrwi  $s2,$acc08,8,0
940         insrwi  $s3,$acc12,8,0
941         insrwi  $s2,$acc09,8,8
942         insrwi  $s3,$acc13,8,8
943         insrwi  $s2,$acc10,8,16
944         insrwi  $s3,$acc14,8,16
945
946         bl      LAES_Td
947         bl      Lppc_AES_decrypt_compact
948         $POP    $out,`$FRAME-$SIZE_T*19`($sp)
949
950         extrwi  $acc00,$s0,8,0
951         extrwi  $acc01,$s0,8,8
952         stb     $acc00,0($out)
953         extrwi  $acc02,$s0,8,16
954         stb     $acc01,1($out)
955         stb     $acc02,2($out)
956         extrwi  $acc04,$s1,8,0
957         stb     $s0,3($out)
958         extrwi  $acc05,$s1,8,8
959         stb     $acc04,4($out)
960         extrwi  $acc06,$s1,8,16
961         stb     $acc05,5($out)
962         stb     $acc06,6($out)
963         extrwi  $acc08,$s2,8,0
964         stb     $s1,7($out)
965         extrwi  $acc09,$s2,8,8
966         stb     $acc08,8($out)
967         extrwi  $acc10,$s2,8,16
968         stb     $acc09,9($out)
969         stb     $acc10,10($out)
970         extrwi  $acc12,$s3,8,0
971         stb     $s2,11($out)
972         extrwi  $acc13,$s3,8,8
973         stb     $acc12,12($out)
974         extrwi  $acc14,$s3,8,16
975         stb     $acc13,13($out)
976         stb     $acc14,14($out)
977         stb     $s3,15($out)
978
979 Ldec_done:
980         $POP    r0,`$FRAME+$LRSAVE`($sp)
981         $POP    r14,`$FRAME-$SIZE_T*18`($sp)
982         $POP    r15,`$FRAME-$SIZE_T*17`($sp)
983         $POP    r16,`$FRAME-$SIZE_T*16`($sp)
984         $POP    r17,`$FRAME-$SIZE_T*15`($sp)
985         $POP    r18,`$FRAME-$SIZE_T*14`($sp)
986         $POP    r19,`$FRAME-$SIZE_T*13`($sp)
987         $POP    r20,`$FRAME-$SIZE_T*12`($sp)
988         $POP    r21,`$FRAME-$SIZE_T*11`($sp)
989         $POP    r22,`$FRAME-$SIZE_T*10`($sp)
990         $POP    r23,`$FRAME-$SIZE_T*9`($sp)
991         $POP    r24,`$FRAME-$SIZE_T*8`($sp)
992         $POP    r25,`$FRAME-$SIZE_T*7`($sp)
993         $POP    r26,`$FRAME-$SIZE_T*6`($sp)
994         $POP    r27,`$FRAME-$SIZE_T*5`($sp)
995         $POP    r28,`$FRAME-$SIZE_T*4`($sp)
996         $POP    r29,`$FRAME-$SIZE_T*3`($sp)
997         $POP    r30,`$FRAME-$SIZE_T*2`($sp)
998         $POP    r31,`$FRAME-$SIZE_T*1`($sp)
999         mtlr    r0
1000         addi    $sp,$sp,$FRAME
1001         blr
1002         .long   0
1003         .byte   0,12,4,1,0x80,18,3,0
1004         .long   0
1005
1006 .align  5
1007 Lppc_AES_decrypt:
1008         lwz     $acc00,240($key)
1009         addi    $Tbl1,$Tbl0,3
1010         lwz     $t0,0($key)
1011         addi    $Tbl2,$Tbl0,2
1012         lwz     $t1,4($key)
1013         addi    $Tbl3,$Tbl0,1
1014         lwz     $t2,8($key)
1015         addi    $acc00,$acc00,-1
1016         lwz     $t3,12($key)
1017         addi    $key,$key,16
1018         xor     $s0,$s0,$t0
1019         xor     $s1,$s1,$t1
1020         xor     $s2,$s2,$t2
1021         xor     $s3,$s3,$t3
1022         mtctr   $acc00
1023 .align  4
1024 Ldec_loop:
1025         rlwinm  $acc00,$s0,`32-24+3`,21,28
1026         rlwinm  $acc01,$s1,`32-24+3`,21,28
1027         rlwinm  $acc02,$s2,`32-24+3`,21,28
1028         rlwinm  $acc03,$s3,`32-24+3`,21,28
1029         lwz     $t0,0($key)
1030         rlwinm  $acc04,$s3,`32-16+3`,21,28
1031         lwz     $t1,4($key)
1032         rlwinm  $acc05,$s0,`32-16+3`,21,28
1033         lwz     $t2,8($key)
1034         rlwinm  $acc06,$s1,`32-16+3`,21,28
1035         lwz     $t3,12($key)
1036         rlwinm  $acc07,$s2,`32-16+3`,21,28
1037         lwzx    $acc00,$Tbl0,$acc00
1038         rlwinm  $acc08,$s2,`32-8+3`,21,28
1039         lwzx    $acc01,$Tbl0,$acc01
1040         rlwinm  $acc09,$s3,`32-8+3`,21,28
1041         lwzx    $acc02,$Tbl0,$acc02
1042         rlwinm  $acc10,$s0,`32-8+3`,21,28
1043         lwzx    $acc03,$Tbl0,$acc03
1044         rlwinm  $acc11,$s1,`32-8+3`,21,28
1045         lwzx    $acc04,$Tbl1,$acc04
1046         rlwinm  $acc12,$s1,`0+3`,21,28
1047         lwzx    $acc05,$Tbl1,$acc05
1048         rlwinm  $acc13,$s2,`0+3`,21,28
1049         lwzx    $acc06,$Tbl1,$acc06
1050         rlwinm  $acc14,$s3,`0+3`,21,28
1051         lwzx    $acc07,$Tbl1,$acc07
1052         rlwinm  $acc15,$s0,`0+3`,21,28
1053         lwzx    $acc08,$Tbl2,$acc08
1054         xor     $t0,$t0,$acc00
1055         lwzx    $acc09,$Tbl2,$acc09
1056         xor     $t1,$t1,$acc01
1057         lwzx    $acc10,$Tbl2,$acc10
1058         xor     $t2,$t2,$acc02
1059         lwzx    $acc11,$Tbl2,$acc11
1060         xor     $t3,$t3,$acc03
1061         lwzx    $acc12,$Tbl3,$acc12
1062         xor     $t0,$t0,$acc04
1063         lwzx    $acc13,$Tbl3,$acc13
1064         xor     $t1,$t1,$acc05
1065         lwzx    $acc14,$Tbl3,$acc14
1066         xor     $t2,$t2,$acc06
1067         lwzx    $acc15,$Tbl3,$acc15
1068         xor     $t3,$t3,$acc07
1069         xor     $t0,$t0,$acc08
1070         xor     $t1,$t1,$acc09
1071         xor     $t2,$t2,$acc10
1072         xor     $t3,$t3,$acc11
1073         xor     $s0,$t0,$acc12
1074         xor     $s1,$t1,$acc13
1075         xor     $s2,$t2,$acc14
1076         xor     $s3,$t3,$acc15
1077         addi    $key,$key,16
1078         bdnz    Ldec_loop
1079
1080         addi    $Tbl2,$Tbl0,2048
1081         nop
1082         lwz     $t0,0($key)
1083         rlwinm  $acc00,$s0,`32-24`,24,31
1084         lwz     $t1,4($key)
1085         rlwinm  $acc01,$s1,`32-24`,24,31
1086         lwz     $t2,8($key)
1087         rlwinm  $acc02,$s2,`32-24`,24,31
1088         lwz     $t3,12($key)
1089         rlwinm  $acc03,$s3,`32-24`,24,31
1090         lwz     $acc08,`2048+0`($Tbl0)  ! prefetch Td4
1091         rlwinm  $acc04,$s3,`32-16`,24,31
1092         lwz     $acc09,`2048+32`($Tbl0)
1093         rlwinm  $acc05,$s0,`32-16`,24,31
1094         lwz     $acc10,`2048+64`($Tbl0)
1095         lbzx    $acc00,$Tbl2,$acc00
1096         lwz     $acc11,`2048+96`($Tbl0)
1097         lbzx    $acc01,$Tbl2,$acc01
1098         lwz     $acc12,`2048+128`($Tbl0)
1099         rlwinm  $acc06,$s1,`32-16`,24,31
1100         lwz     $acc13,`2048+160`($Tbl0)
1101         rlwinm  $acc07,$s2,`32-16`,24,31
1102         lwz     $acc14,`2048+192`($Tbl0)
1103         rlwinm  $acc08,$s2,`32-8`,24,31
1104         lwz     $acc15,`2048+224`($Tbl0)
1105         rlwinm  $acc09,$s3,`32-8`,24,31
1106         lbzx    $acc02,$Tbl2,$acc02
1107         rlwinm  $acc10,$s0,`32-8`,24,31
1108         lbzx    $acc03,$Tbl2,$acc03
1109         rlwinm  $acc11,$s1,`32-8`,24,31
1110         lbzx    $acc04,$Tbl2,$acc04
1111         rlwinm  $acc12,$s1,`0`,24,31
1112         lbzx    $acc05,$Tbl2,$acc05
1113         rlwinm  $acc13,$s2,`0`,24,31
1114         lbzx    $acc06,$Tbl2,$acc06
1115         rlwinm  $acc14,$s3,`0`,24,31
1116         lbzx    $acc07,$Tbl2,$acc07
1117         rlwinm  $acc15,$s0,`0`,24,31
1118         lbzx    $acc08,$Tbl2,$acc08
1119         rlwinm  $s0,$acc00,24,0,7
1120         lbzx    $acc09,$Tbl2,$acc09
1121         rlwinm  $s1,$acc01,24,0,7
1122         lbzx    $acc10,$Tbl2,$acc10
1123         rlwinm  $s2,$acc02,24,0,7
1124         lbzx    $acc11,$Tbl2,$acc11
1125         rlwinm  $s3,$acc03,24,0,7
1126         lbzx    $acc12,$Tbl2,$acc12
1127         rlwimi  $s0,$acc04,16,8,15
1128         lbzx    $acc13,$Tbl2,$acc13
1129         rlwimi  $s1,$acc05,16,8,15
1130         lbzx    $acc14,$Tbl2,$acc14
1131         rlwimi  $s2,$acc06,16,8,15
1132         lbzx    $acc15,$Tbl2,$acc15
1133         rlwimi  $s3,$acc07,16,8,15
1134         rlwimi  $s0,$acc08,8,16,23
1135         rlwimi  $s1,$acc09,8,16,23
1136         rlwimi  $s2,$acc10,8,16,23
1137         rlwimi  $s3,$acc11,8,16,23
1138         or      $s0,$s0,$acc12
1139         or      $s1,$s1,$acc13
1140         or      $s2,$s2,$acc14
1141         or      $s3,$s3,$acc15
1142         xor     $s0,$s0,$t0
1143         xor     $s1,$s1,$t1
1144         xor     $s2,$s2,$t2
1145         xor     $s3,$s3,$t3
1146         blr
1147         .long   0
1148         .byte   0,12,0x14,0,0,0,0,0
1149
1150 .align  4
1151 Lppc_AES_decrypt_compact:
1152         lwz     $acc00,240($key)
1153         addi    $Tbl1,$Tbl0,2048
1154         lwz     $t0,0($key)
1155         lis     $mask80,0x8080
1156         lwz     $t1,4($key)
1157         lis     $mask1b,0x1b1b
1158         lwz     $t2,8($key)
1159         ori     $mask80,$mask80,0x8080
1160         lwz     $t3,12($key)
1161         ori     $mask1b,$mask1b,0x1b1b
1162         addi    $key,$key,16
1163 ___
1164 $code.=<<___ if ($SIZE_T==8);
1165         insrdi  $mask80,$mask80,32,0
1166         insrdi  $mask1b,$mask1b,32,0
1167 ___
1168 $code.=<<___;
1169         mtctr   $acc00
1170 .align  4
1171 Ldec_compact_loop:
1172         xor     $s0,$s0,$t0
1173         xor     $s1,$s1,$t1
1174         rlwinm  $acc00,$s0,`32-24`,24,31
1175         xor     $s2,$s2,$t2
1176         rlwinm  $acc01,$s1,`32-24`,24,31
1177         xor     $s3,$s3,$t3
1178         rlwinm  $acc02,$s2,`32-24`,24,31
1179         rlwinm  $acc03,$s3,`32-24`,24,31
1180         rlwinm  $acc04,$s3,`32-16`,24,31
1181         rlwinm  $acc05,$s0,`32-16`,24,31
1182         rlwinm  $acc06,$s1,`32-16`,24,31
1183         rlwinm  $acc07,$s2,`32-16`,24,31
1184         lbzx    $acc00,$Tbl1,$acc00
1185         rlwinm  $acc08,$s2,`32-8`,24,31
1186         lbzx    $acc01,$Tbl1,$acc01
1187         rlwinm  $acc09,$s3,`32-8`,24,31
1188         lbzx    $acc02,$Tbl1,$acc02
1189         rlwinm  $acc10,$s0,`32-8`,24,31
1190         lbzx    $acc03,$Tbl1,$acc03
1191         rlwinm  $acc11,$s1,`32-8`,24,31
1192         lbzx    $acc04,$Tbl1,$acc04
1193         rlwinm  $acc12,$s1,`0`,24,31
1194         lbzx    $acc05,$Tbl1,$acc05
1195         rlwinm  $acc13,$s2,`0`,24,31
1196         lbzx    $acc06,$Tbl1,$acc06
1197         rlwinm  $acc14,$s3,`0`,24,31
1198         lbzx    $acc07,$Tbl1,$acc07
1199         rlwinm  $acc15,$s0,`0`,24,31
1200         lbzx    $acc08,$Tbl1,$acc08
1201         rlwinm  $s0,$acc00,24,0,7
1202         lbzx    $acc09,$Tbl1,$acc09
1203         rlwinm  $s1,$acc01,24,0,7
1204         lbzx    $acc10,$Tbl1,$acc10
1205         rlwinm  $s2,$acc02,24,0,7
1206         lbzx    $acc11,$Tbl1,$acc11
1207         rlwinm  $s3,$acc03,24,0,7
1208         lbzx    $acc12,$Tbl1,$acc12
1209         rlwimi  $s0,$acc04,16,8,15
1210         lbzx    $acc13,$Tbl1,$acc13
1211         rlwimi  $s1,$acc05,16,8,15
1212         lbzx    $acc14,$Tbl1,$acc14
1213         rlwimi  $s2,$acc06,16,8,15
1214         lbzx    $acc15,$Tbl1,$acc15
1215         rlwimi  $s3,$acc07,16,8,15
1216         rlwimi  $s0,$acc08,8,16,23
1217         rlwimi  $s1,$acc09,8,16,23
1218         rlwimi  $s2,$acc10,8,16,23
1219         rlwimi  $s3,$acc11,8,16,23
1220         lwz     $t0,0($key)
1221         or      $s0,$s0,$acc12
1222         lwz     $t1,4($key)
1223         or      $s1,$s1,$acc13
1224         lwz     $t2,8($key)
1225         or      $s2,$s2,$acc14
1226         lwz     $t3,12($key)
1227         or      $s3,$s3,$acc15
1228
1229         addi    $key,$key,16
1230         bdz     Ldec_compact_done
1231 ___
1232 $code.=<<___ if ($SIZE_T==8);
1233         # vectorized permutation improves decrypt performance by 10%
1234         insrdi  $s0,$s1,32,0
1235         insrdi  $s2,$s3,32,0
1236
1237         and     $acc00,$s0,$mask80      # r1=r0&0x80808080
1238         and     $acc02,$s2,$mask80
1239         srdi    $acc04,$acc00,7         # r1>>7
1240         srdi    $acc06,$acc02,7
1241         andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
1242         andc    $acc10,$s2,$mask80
1243         sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
1244         sub     $acc02,$acc02,$acc06
1245         add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
1246         add     $acc10,$acc10,$acc10
1247         and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1248         and     $acc02,$acc02,$mask1b
1249         xor     $acc00,$acc00,$acc08    # r2
1250         xor     $acc02,$acc02,$acc10
1251
1252         and     $acc04,$acc00,$mask80   # r1=r2&0x80808080
1253         and     $acc06,$acc02,$mask80
1254         srdi    $acc08,$acc04,7         # r1>>7
1255         srdi    $acc10,$acc06,7
1256         andc    $acc12,$acc00,$mask80   # r2&0x7f7f7f7f
1257         andc    $acc14,$acc02,$mask80
1258         sub     $acc04,$acc04,$acc08    # r1-(r1>>7)
1259         sub     $acc06,$acc06,$acc10
1260         add     $acc12,$acc12,$acc12    # (r2&0x7f7f7f7f)<<1
1261         add     $acc14,$acc14,$acc14
1262         and     $acc04,$acc04,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1263         and     $acc06,$acc06,$mask1b
1264         xor     $acc04,$acc04,$acc12    # r4
1265         xor     $acc06,$acc06,$acc14
1266
1267         and     $acc08,$acc04,$mask80   # r1=r4&0x80808080
1268         and     $acc10,$acc06,$mask80
1269         srdi    $acc12,$acc08,7         # r1>>7
1270         srdi    $acc14,$acc10,7
1271         sub     $acc08,$acc08,$acc12    # r1-(r1>>7)
1272         sub     $acc10,$acc10,$acc14
1273         andc    $acc12,$acc04,$mask80   # r4&0x7f7f7f7f
1274         andc    $acc14,$acc06,$mask80
1275         add     $acc12,$acc12,$acc12    # (r4&0x7f7f7f7f)<<1
1276         add     $acc14,$acc14,$acc14
1277         and     $acc08,$acc08,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1278         and     $acc10,$acc10,$mask1b
1279         xor     $acc08,$acc08,$acc12    # r8
1280         xor     $acc10,$acc10,$acc14
1281
1282         xor     $acc00,$acc00,$s0       # r2^r0
1283         xor     $acc02,$acc02,$s2
1284         xor     $acc04,$acc04,$s0       # r4^r0
1285         xor     $acc06,$acc06,$s2
1286
1287         extrdi  $acc01,$acc00,32,0
1288         extrdi  $acc03,$acc02,32,0
1289         extrdi  $acc05,$acc04,32,0
1290         extrdi  $acc07,$acc06,32,0
1291         extrdi  $acc09,$acc08,32,0
1292         extrdi  $acc11,$acc10,32,0
1293 ___
1294 $code.=<<___ if ($SIZE_T==4);
1295         and     $acc00,$s0,$mask80      # r1=r0&0x80808080
1296         and     $acc01,$s1,$mask80
1297         and     $acc02,$s2,$mask80
1298         and     $acc03,$s3,$mask80
1299         srwi    $acc04,$acc00,7         # r1>>7
1300         andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
1301         srwi    $acc05,$acc01,7
1302         andc    $acc09,$s1,$mask80
1303         srwi    $acc06,$acc02,7
1304         andc    $acc10,$s2,$mask80
1305         srwi    $acc07,$acc03,7
1306         andc    $acc11,$s3,$mask80
1307         sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
1308         sub     $acc01,$acc01,$acc05
1309         sub     $acc02,$acc02,$acc06
1310         sub     $acc03,$acc03,$acc07
1311         add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
1312         add     $acc09,$acc09,$acc09
1313         add     $acc10,$acc10,$acc10
1314         add     $acc11,$acc11,$acc11
1315         and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1316         and     $acc01,$acc01,$mask1b
1317         and     $acc02,$acc02,$mask1b
1318         and     $acc03,$acc03,$mask1b
1319         xor     $acc00,$acc00,$acc08    # r2
1320         xor     $acc01,$acc01,$acc09
1321         xor     $acc02,$acc02,$acc10
1322         xor     $acc03,$acc03,$acc11
1323
1324         and     $acc04,$acc00,$mask80   # r1=r2&0x80808080
1325         and     $acc05,$acc01,$mask80
1326         and     $acc06,$acc02,$mask80
1327         and     $acc07,$acc03,$mask80
1328         srwi    $acc08,$acc04,7         # r1>>7
1329         andc    $acc12,$acc00,$mask80   # r2&0x7f7f7f7f
1330         srwi    $acc09,$acc05,7
1331         andc    $acc13,$acc01,$mask80
1332         srwi    $acc10,$acc06,7
1333         andc    $acc14,$acc02,$mask80
1334         srwi    $acc11,$acc07,7
1335         andc    $acc15,$acc03,$mask80
1336         sub     $acc04,$acc04,$acc08    # r1-(r1>>7)
1337         sub     $acc05,$acc05,$acc09
1338         sub     $acc06,$acc06,$acc10
1339         sub     $acc07,$acc07,$acc11
1340         add     $acc12,$acc12,$acc12    # (r2&0x7f7f7f7f)<<1
1341         add     $acc13,$acc13,$acc13
1342         add     $acc14,$acc14,$acc14
1343         add     $acc15,$acc15,$acc15
1344         and     $acc04,$acc04,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1345         and     $acc05,$acc05,$mask1b
1346         and     $acc06,$acc06,$mask1b
1347         and     $acc07,$acc07,$mask1b
1348         xor     $acc04,$acc04,$acc12    # r4
1349         xor     $acc05,$acc05,$acc13
1350         xor     $acc06,$acc06,$acc14
1351         xor     $acc07,$acc07,$acc15
1352
1353         and     $acc08,$acc04,$mask80   # r1=r4&0x80808080
1354         and     $acc09,$acc05,$mask80
1355         srwi    $acc12,$acc08,7         # r1>>7
1356         and     $acc10,$acc06,$mask80
1357         srwi    $acc13,$acc09,7
1358         and     $acc11,$acc07,$mask80
1359         srwi    $acc14,$acc10,7
1360         sub     $acc08,$acc08,$acc12    # r1-(r1>>7)
1361         srwi    $acc15,$acc11,7
1362         sub     $acc09,$acc09,$acc13
1363         sub     $acc10,$acc10,$acc14
1364         sub     $acc11,$acc11,$acc15
1365         andc    $acc12,$acc04,$mask80   # r4&0x7f7f7f7f
1366         andc    $acc13,$acc05,$mask80
1367         andc    $acc14,$acc06,$mask80
1368         andc    $acc15,$acc07,$mask80
1369         add     $acc12,$acc12,$acc12    # (r4&0x7f7f7f7f)<<1
1370         add     $acc13,$acc13,$acc13
1371         add     $acc14,$acc14,$acc14
1372         add     $acc15,$acc15,$acc15
1373         and     $acc08,$acc08,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1374         and     $acc09,$acc09,$mask1b
1375         and     $acc10,$acc10,$mask1b
1376         and     $acc11,$acc11,$mask1b
1377         xor     $acc08,$acc08,$acc12    # r8
1378         xor     $acc09,$acc09,$acc13
1379         xor     $acc10,$acc10,$acc14
1380         xor     $acc11,$acc11,$acc15
1381
1382         xor     $acc00,$acc00,$s0       # r2^r0
1383         xor     $acc01,$acc01,$s1
1384         xor     $acc02,$acc02,$s2
1385         xor     $acc03,$acc03,$s3
1386         xor     $acc04,$acc04,$s0       # r4^r0
1387         xor     $acc05,$acc05,$s1
1388         xor     $acc06,$acc06,$s2
1389         xor     $acc07,$acc07,$s3
1390 ___
1391 $code.=<<___;
1392         rotrwi  $s0,$s0,8               # = ROTATE(r0,8)
1393         rotrwi  $s1,$s1,8
1394         xor     $s0,$s0,$acc00          # ^= r2^r0
1395         rotrwi  $s2,$s2,8
1396         xor     $s1,$s1,$acc01
1397         rotrwi  $s3,$s3,8
1398         xor     $s2,$s2,$acc02
1399         xor     $s3,$s3,$acc03
1400         xor     $acc00,$acc00,$acc08
1401         xor     $acc01,$acc01,$acc09
1402         xor     $acc02,$acc02,$acc10
1403         xor     $acc03,$acc03,$acc11
1404         xor     $s0,$s0,$acc04          # ^= r4^r0
1405         rotrwi  $acc00,$acc00,24
1406         xor     $s1,$s1,$acc05
1407         rotrwi  $acc01,$acc01,24
1408         xor     $s2,$s2,$acc06
1409         rotrwi  $acc02,$acc02,24
1410         xor     $s3,$s3,$acc07
1411         rotrwi  $acc03,$acc03,24
1412         xor     $acc04,$acc04,$acc08
1413         xor     $acc05,$acc05,$acc09
1414         xor     $acc06,$acc06,$acc10
1415         xor     $acc07,$acc07,$acc11
1416         xor     $s0,$s0,$acc08          # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
1417         rotrwi  $acc04,$acc04,16
1418         xor     $s1,$s1,$acc09
1419         rotrwi  $acc05,$acc05,16
1420         xor     $s2,$s2,$acc10
1421         rotrwi  $acc06,$acc06,16
1422         xor     $s3,$s3,$acc11
1423         rotrwi  $acc07,$acc07,16
1424         xor     $s0,$s0,$acc00          # ^= ROTATE(r8^r2^r0,24)
1425         rotrwi  $acc08,$acc08,8
1426         xor     $s1,$s1,$acc01
1427         rotrwi  $acc09,$acc09,8
1428         xor     $s2,$s2,$acc02
1429         rotrwi  $acc10,$acc10,8
1430         xor     $s3,$s3,$acc03
1431         rotrwi  $acc11,$acc11,8
1432         xor     $s0,$s0,$acc04          # ^= ROTATE(r8^r4^r0,16)
1433         xor     $s1,$s1,$acc05
1434         xor     $s2,$s2,$acc06
1435         xor     $s3,$s3,$acc07
1436         xor     $s0,$s0,$acc08          # ^= ROTATE(r8,8)       
1437         xor     $s1,$s1,$acc09  
1438         xor     $s2,$s2,$acc10  
1439         xor     $s3,$s3,$acc11  
1440
1441         b       Ldec_compact_loop
1442 .align  4
1443 Ldec_compact_done:
1444         xor     $s0,$s0,$t0
1445         xor     $s1,$s1,$t1
1446         xor     $s2,$s2,$t2
1447         xor     $s3,$s3,$t3
1448         blr
1449         .long   0
1450         .byte   0,12,0x14,0,0,0,0,0
1451 .size   .AES_decrypt,.-.AES_decrypt
1452
1453 .asciz  "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
1454 .align  7
1455 ___
1456
1457 $code =~ s/\`([^\`]*)\`/eval $1/gem;
1458 print $code;
1459 close STDOUT;