8cfd4232b81566625c5ec72e734a5ae3b932e7c3
[openssl.git] / crypto / aes / asm / aes-ppc.pl
1 #!/usr/bin/env perl
2
3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9
10 # Needs more work: key setup, page boundaries, CBC routine...
11 #
12 # ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
13 # 128-bit key, which is ~40% better than 64-bit code generated by gcc
14 # 4.0. But these are not the ones currently used! Their "compact"
15 # counterparts are, for security reason. ppc_AES_encrypt_compact runs
16 # at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
17 # at 1/3 of ppc_AES_decrypt.
18
19 # February 2010
20 #
21 # Rescheduling instructions to favour Power6 pipeline gave 10%
22 # performance improvement on the platfrom in question (and marginal
23 # improvement even on others). It should be noted that Power6 fails
24 # to process byte in 18 cycles, only in 23, because it fails to issue
25 # 4 load instructions in two cycles, only in 3. As result non-compact
26 # block subroutines are 25% slower than one would expect. Compact
27 # functions scale better, because they have pure computational part,
28 # which scales perfectly with clock frequency. To be specific
29 # ppc_AES_encrypt_compact operates at 42 cycles per byte, while
30 # ppc_AES_decrypt_compact - at 55 (in 64-bit build).
31
32 $flavour = shift;
33
34 if ($flavour =~ /64/) {
35         $SIZE_T =8;
36         $LRSAVE =2*$SIZE_T;
37         $STU    ="stdu";
38         $POP    ="ld";
39         $PUSH   ="std";
40 } elsif ($flavour =~ /32/) {
41         $SIZE_T =4;
42         $LRSAVE =$SIZE_T;
43         $STU    ="stwu";
44         $POP    ="lwz";
45         $PUSH   ="stw";
46 } else { die "nonsense $flavour"; }
47
48 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
49 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
50 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
51 die "can't locate ppc-xlate.pl";
52
53 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
54
55 $FRAME=32*$SIZE_T;
56
57 sub _data_word()
58 { my $i;
59     while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
60 }
61
62 $sp="r1";
63 $toc="r2";
64 $inp="r3";
65 $out="r4";
66 $key="r5";
67
68 $Tbl0="r3";
69 $Tbl1="r6";
70 $Tbl2="r7";
71 $Tbl3="r2";
72
73 $s0="r8";
74 $s1="r9";
75 $s2="r10";
76 $s3="r11";
77
78 $t0="r12";
79 $t1="r13";
80 $t2="r14";
81 $t3="r15";
82
83 $acc00="r16";
84 $acc01="r17";
85 $acc02="r18";
86 $acc03="r19";
87
88 $acc04="r20";
89 $acc05="r21";
90 $acc06="r22";
91 $acc07="r23";
92
93 $acc08="r24";
94 $acc09="r25";
95 $acc10="r26";
96 $acc11="r27";
97
98 $acc12="r28";
99 $acc13="r29";
100 $acc14="r30";
101 $acc15="r31";
102
103 # stay away from TLS pointer
104 if ($SIZE_T==8) { die if ($t1 ne "r13");  $t1="r0";             }
105 else            { die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0";  }
106 $mask80=$Tbl2;
107 $mask1b=$Tbl3;
108
109 $code.=<<___;
110 .machine        "any"
111 .text
112
113 .align  7
114 LAES_Te:
115         mflr    r0
116         bcl     20,31,\$+4
117         mflr    $Tbl0   ;    vvvvv "distance" between . and 1st data entry
118         addi    $Tbl0,$Tbl0,`128-8`
119         mtlr    r0
120         blr
121         .long   0
122         .byte   0,12,0x14,0,0,0,0,0
123         .space  `64-9*4`
124 LAES_Td:
125         mflr    r0
126         bcl     20,31,\$+4
127         mflr    $Tbl0   ;    vvvvvvvv "distance" between . and 1st data entry
128         addi    $Tbl0,$Tbl0,`128-64-8+2048+256`
129         mtlr    r0
130         blr
131         .long   0
132         .byte   0,12,0x14,0,0,0,0,0
133         .space  `128-64-9*4`
134 ___
135 &_data_word(
136         0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
137         0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
138         0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
139         0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
140         0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
141         0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
142         0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
143         0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
144         0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
145         0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
146         0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
147         0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
148         0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
149         0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
150         0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
151         0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
152         0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
153         0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
154         0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
155         0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
156         0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
157         0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
158         0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
159         0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
160         0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
161         0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
162         0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
163         0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
164         0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
165         0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
166         0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
167         0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
168         0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
169         0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
170         0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
171         0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
172         0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
173         0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
174         0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
175         0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
176         0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
177         0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
178         0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
179         0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
180         0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
181         0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
182         0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
183         0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
184         0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
185         0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
186         0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
187         0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
188         0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
189         0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
190         0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
191         0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
192         0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
193         0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
194         0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
195         0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
196         0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
197         0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
198         0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
199         0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
200 $code.=<<___;
201 .byte   0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
202 .byte   0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
203 .byte   0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
204 .byte   0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
205 .byte   0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
206 .byte   0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
207 .byte   0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
208 .byte   0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
209 .byte   0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
210 .byte   0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
211 .byte   0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
212 .byte   0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
213 .byte   0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
214 .byte   0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
215 .byte   0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
216 .byte   0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
217 .byte   0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
218 .byte   0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
219 .byte   0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
220 .byte   0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
221 .byte   0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
222 .byte   0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
223 .byte   0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
224 .byte   0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
225 .byte   0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
226 .byte   0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
227 .byte   0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
228 .byte   0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
229 .byte   0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
230 .byte   0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
231 .byte   0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
232 .byte   0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
233 ___
234 &_data_word(
235         0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
236         0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
237         0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
238         0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
239         0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
240         0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
241         0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
242         0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
243         0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
244         0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
245         0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
246         0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
247         0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
248         0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
249         0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
250         0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
251         0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
252         0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
253         0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
254         0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
255         0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
256         0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
257         0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
258         0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
259         0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
260         0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
261         0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
262         0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
263         0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
264         0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
265         0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
266         0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
267         0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
268         0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
269         0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
270         0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
271         0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
272         0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
273         0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
274         0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
275         0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
276         0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
277         0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
278         0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
279         0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
280         0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
281         0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
282         0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
283         0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
284         0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
285         0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
286         0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
287         0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
288         0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
289         0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
290         0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
291         0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
292         0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
293         0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
294         0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
295         0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
296         0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
297         0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
298         0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
299 $code.=<<___;
300 .byte   0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
301 .byte   0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
302 .byte   0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
303 .byte   0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
304 .byte   0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
305 .byte   0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
306 .byte   0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
307 .byte   0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
308 .byte   0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
309 .byte   0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
310 .byte   0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
311 .byte   0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
312 .byte   0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
313 .byte   0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
314 .byte   0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
315 .byte   0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
316 .byte   0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
317 .byte   0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
318 .byte   0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
319 .byte   0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
320 .byte   0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
321 .byte   0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
322 .byte   0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
323 .byte   0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
324 .byte   0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
325 .byte   0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
326 .byte   0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
327 .byte   0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
328 .byte   0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
329 .byte   0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
330 .byte   0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
331 .byte   0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
332
333
334 .globl  .AES_encrypt
335 .align  7
336 .AES_encrypt:
337         $STU    $sp,-$FRAME($sp)
338         mflr    r0
339
340         $PUSH   $toc,`$FRAME-$SIZE_T*20`($sp)
341         $PUSH   r13,`$FRAME-$SIZE_T*19`($sp)
342         $PUSH   r14,`$FRAME-$SIZE_T*18`($sp)
343         $PUSH   r15,`$FRAME-$SIZE_T*17`($sp)
344         $PUSH   r16,`$FRAME-$SIZE_T*16`($sp)
345         $PUSH   r17,`$FRAME-$SIZE_T*15`($sp)
346         $PUSH   r18,`$FRAME-$SIZE_T*14`($sp)
347         $PUSH   r19,`$FRAME-$SIZE_T*13`($sp)
348         $PUSH   r20,`$FRAME-$SIZE_T*12`($sp)
349         $PUSH   r21,`$FRAME-$SIZE_T*11`($sp)
350         $PUSH   r22,`$FRAME-$SIZE_T*10`($sp)
351         $PUSH   r23,`$FRAME-$SIZE_T*9`($sp)
352         $PUSH   r24,`$FRAME-$SIZE_T*8`($sp)
353         $PUSH   r25,`$FRAME-$SIZE_T*7`($sp)
354         $PUSH   r26,`$FRAME-$SIZE_T*6`($sp)
355         $PUSH   r27,`$FRAME-$SIZE_T*5`($sp)
356         $PUSH   r28,`$FRAME-$SIZE_T*4`($sp)
357         $PUSH   r29,`$FRAME-$SIZE_T*3`($sp)
358         $PUSH   r30,`$FRAME-$SIZE_T*2`($sp)
359         $PUSH   r31,`$FRAME-$SIZE_T*1`($sp)
360         $PUSH   r0,`$FRAME+$LRSAVE`($sp)
361
362         lwz     $s0,0($inp)
363         lwz     $s1,4($inp)
364         lwz     $s2,8($inp)
365         lwz     $s3,12($inp)
366         bl      LAES_Te
367         bl      Lppc_AES_encrypt_compact
368         stw     $s0,0($out)
369         stw     $s1,4($out)
370         stw     $s2,8($out)
371         stw     $s3,12($out)
372
373         $POP    r0,`$FRAME+$LRSAVE`($sp)
374         $POP    $toc,`$FRAME-$SIZE_T*20`($sp)
375         $POP    r13,`$FRAME-$SIZE_T*19`($sp)
376         $POP    r14,`$FRAME-$SIZE_T*18`($sp)
377         $POP    r15,`$FRAME-$SIZE_T*17`($sp)
378         $POP    r16,`$FRAME-$SIZE_T*16`($sp)
379         $POP    r17,`$FRAME-$SIZE_T*15`($sp)
380         $POP    r18,`$FRAME-$SIZE_T*14`($sp)
381         $POP    r19,`$FRAME-$SIZE_T*13`($sp)
382         $POP    r20,`$FRAME-$SIZE_T*12`($sp)
383         $POP    r21,`$FRAME-$SIZE_T*11`($sp)
384         $POP    r22,`$FRAME-$SIZE_T*10`($sp)
385         $POP    r23,`$FRAME-$SIZE_T*9`($sp)
386         $POP    r24,`$FRAME-$SIZE_T*8`($sp)
387         $POP    r25,`$FRAME-$SIZE_T*7`($sp)
388         $POP    r26,`$FRAME-$SIZE_T*6`($sp)
389         $POP    r27,`$FRAME-$SIZE_T*5`($sp)
390         $POP    r28,`$FRAME-$SIZE_T*4`($sp)
391         $POP    r29,`$FRAME-$SIZE_T*3`($sp)
392         $POP    r30,`$FRAME-$SIZE_T*2`($sp)
393         $POP    r31,`$FRAME-$SIZE_T*1`($sp)
394         mtlr    r0
395         addi    $sp,$sp,$FRAME
396         blr
397         .long   0
398         .byte   0,12,4,1,0x80,18,3,0
399         .long   0
400
401 .align  5
402 Lppc_AES_encrypt:
403         lwz     $acc00,240($key)
404         lwz     $t0,0($key)
405         lwz     $t1,4($key)
406         lwz     $t2,8($key)
407         lwz     $t3,12($key)
408         addi    $Tbl1,$Tbl0,3
409         addi    $Tbl2,$Tbl0,2
410         addi    $Tbl3,$Tbl0,1
411         addi    $acc00,$acc00,-1
412         addi    $key,$key,16
413         xor     $s0,$s0,$t0
414         xor     $s1,$s1,$t1
415         xor     $s2,$s2,$t2
416         xor     $s3,$s3,$t3
417         mtctr   $acc00
418 .align  4
419 Lenc_loop:
420         rlwinm  $acc00,$s0,`32-24+3`,21,28
421         rlwinm  $acc01,$s1,`32-24+3`,21,28
422         rlwinm  $acc02,$s2,`32-24+3`,21,28
423         rlwinm  $acc03,$s3,`32-24+3`,21,28
424         lwz     $t0,0($key)
425         lwz     $t1,4($key)
426         rlwinm  $acc04,$s1,`32-16+3`,21,28
427         rlwinm  $acc05,$s2,`32-16+3`,21,28
428         lwz     $t2,8($key)
429         lwz     $t3,12($key)
430         rlwinm  $acc06,$s3,`32-16+3`,21,28
431         rlwinm  $acc07,$s0,`32-16+3`,21,28
432         lwzx    $acc00,$Tbl0,$acc00
433         lwzx    $acc01,$Tbl0,$acc01
434         rlwinm  $acc08,$s2,`32-8+3`,21,28
435         rlwinm  $acc09,$s3,`32-8+3`,21,28
436         lwzx    $acc02,$Tbl0,$acc02
437         lwzx    $acc03,$Tbl0,$acc03
438         rlwinm  $acc10,$s0,`32-8+3`,21,28
439         rlwinm  $acc11,$s1,`32-8+3`,21,28
440         lwzx    $acc04,$Tbl1,$acc04
441         lwzx    $acc05,$Tbl1,$acc05
442         rlwinm  $acc12,$s3,`0+3`,21,28
443         rlwinm  $acc13,$s0,`0+3`,21,28
444         lwzx    $acc06,$Tbl1,$acc06
445         lwzx    $acc07,$Tbl1,$acc07
446         rlwinm  $acc14,$s1,`0+3`,21,28
447         rlwinm  $acc15,$s2,`0+3`,21,28
448         lwzx    $acc08,$Tbl2,$acc08
449         lwzx    $acc09,$Tbl2,$acc09
450         xor     $t0,$t0,$acc00
451         xor     $t1,$t1,$acc01
452         lwzx    $acc10,$Tbl2,$acc10
453         lwzx    $acc11,$Tbl2,$acc11
454         xor     $t2,$t2,$acc02
455         xor     $t3,$t3,$acc03
456         lwzx    $acc12,$Tbl3,$acc12
457         lwzx    $acc13,$Tbl3,$acc13
458         xor     $t0,$t0,$acc04
459         xor     $t1,$t1,$acc05
460         lwzx    $acc14,$Tbl3,$acc14
461         lwzx    $acc15,$Tbl3,$acc15
462         xor     $t2,$t2,$acc06
463         xor     $t3,$t3,$acc07
464         xor     $t0,$t0,$acc08
465         xor     $t1,$t1,$acc09
466         xor     $t2,$t2,$acc10
467         xor     $t3,$t3,$acc11
468         xor     $s0,$t0,$acc12
469         xor     $s1,$t1,$acc13
470         xor     $s2,$t2,$acc14
471         xor     $s3,$t3,$acc15
472         addi    $key,$key,16
473         bdnz-   Lenc_loop
474
475         addi    $Tbl2,$Tbl0,2048
476         nop
477         lwz     $t0,0($key)
478         lwz     $t1,4($key)
479         rlwinm  $acc00,$s0,`32-24`,24,31
480         rlwinm  $acc01,$s1,`32-24`,24,31
481         lwz     $t2,8($key)
482         lwz     $t3,12($key)
483         rlwinm  $acc02,$s2,`32-24`,24,31
484         rlwinm  $acc03,$s3,`32-24`,24,31
485         lwz     $acc08,`2048+0`($Tbl0)  ! prefetch Te4
486         lwz     $acc09,`2048+32`($Tbl0)
487         rlwinm  $acc04,$s1,`32-16`,24,31
488         rlwinm  $acc05,$s2,`32-16`,24,31
489         lwz     $acc10,`2048+64`($Tbl0)
490         lwz     $acc11,`2048+96`($Tbl0)
491         rlwinm  $acc06,$s3,`32-16`,24,31
492         rlwinm  $acc07,$s0,`32-16`,24,31
493         lwz     $acc12,`2048+128`($Tbl0)
494         lwz     $acc13,`2048+160`($Tbl0)
495         rlwinm  $acc08,$s2,`32-8`,24,31
496         rlwinm  $acc09,$s3,`32-8`,24,31
497         lwz     $acc14,`2048+192`($Tbl0)
498         lwz     $acc15,`2048+224`($Tbl0)
499         rlwinm  $acc10,$s0,`32-8`,24,31
500         rlwinm  $acc11,$s1,`32-8`,24,31
501         lbzx    $acc00,$Tbl2,$acc00
502         lbzx    $acc01,$Tbl2,$acc01
503         rlwinm  $acc12,$s3,`0`,24,31
504         rlwinm  $acc13,$s0,`0`,24,31
505         lbzx    $acc02,$Tbl2,$acc02
506         lbzx    $acc03,$Tbl2,$acc03
507         rlwinm  $acc14,$s1,`0`,24,31
508         rlwinm  $acc15,$s2,`0`,24,31
509         lbzx    $acc04,$Tbl2,$acc04
510         lbzx    $acc05,$Tbl2,$acc05
511         rlwinm  $s0,$acc00,24,0,7
512         rlwinm  $s1,$acc01,24,0,7
513         lbzx    $acc06,$Tbl2,$acc06
514         lbzx    $acc07,$Tbl2,$acc07
515         rlwinm  $s2,$acc02,24,0,7
516         rlwinm  $s3,$acc03,24,0,7
517         lbzx    $acc08,$Tbl2,$acc08
518         lbzx    $acc09,$Tbl2,$acc09
519         rlwimi  $s0,$acc04,16,8,15
520         rlwimi  $s1,$acc05,16,8,15
521         lbzx    $acc10,$Tbl2,$acc10
522         lbzx    $acc11,$Tbl2,$acc11
523         rlwimi  $s2,$acc06,16,8,15
524         rlwimi  $s3,$acc07,16,8,15
525         lbzx    $acc12,$Tbl2,$acc12
526         lbzx    $acc13,$Tbl2,$acc13
527         rlwimi  $s0,$acc08,8,16,23
528         rlwimi  $s1,$acc09,8,16,23
529         lbzx    $acc14,$Tbl2,$acc14
530         lbzx    $acc15,$Tbl2,$acc15
531         rlwimi  $s2,$acc10,8,16,23
532         rlwimi  $s3,$acc11,8,16,23
533         or      $s0,$s0,$acc12
534         or      $s1,$s1,$acc13
535         or      $s2,$s2,$acc14
536         or      $s3,$s3,$acc15
537         xor     $s0,$s0,$t0
538         xor     $s1,$s1,$t1
539         xor     $s2,$s2,$t2
540         xor     $s3,$s3,$t3
541         blr
542         .long   0
543         .byte   0,12,0x14,0,0,0,0,0
544
545 .align  4
546 Lppc_AES_encrypt_compact:
547         lwz     $acc00,240($key)
548         lwz     $t0,0($key)
549         lwz     $t1,4($key)
550         lwz     $t2,8($key)
551         lwz     $t3,12($key)
552         addi    $Tbl1,$Tbl0,2048
553         lis     $mask80,0x8080
554         lis     $mask1b,0x1b1b
555         addi    $key,$key,16
556         ori     $mask80,$mask80,0x8080
557         ori     $mask1b,$mask1b,0x1b1b
558         mtctr   $acc00
559 .align  4
560 Lenc_compact_loop:
561         xor     $s0,$s0,$t0
562         xor     $s1,$s1,$t1
563         xor     $s2,$s2,$t2
564         xor     $s3,$s3,$t3
565         rlwinm  $acc00,$s0,`32-24`,24,31
566         rlwinm  $acc01,$s1,`32-24`,24,31
567         rlwinm  $acc02,$s2,`32-24`,24,31
568         rlwinm  $acc03,$s3,`32-24`,24,31
569         rlwinm  $acc04,$s1,`32-16`,24,31
570         rlwinm  $acc05,$s2,`32-16`,24,31
571         rlwinm  $acc06,$s3,`32-16`,24,31
572         rlwinm  $acc07,$s0,`32-16`,24,31
573         lbzx    $acc00,$Tbl1,$acc00
574         lbzx    $acc01,$Tbl1,$acc01
575         rlwinm  $acc08,$s2,`32-8`,24,31
576         rlwinm  $acc09,$s3,`32-8`,24,31
577         lbzx    $acc02,$Tbl1,$acc02
578         lbzx    $acc03,$Tbl1,$acc03
579         rlwinm  $acc10,$s0,`32-8`,24,31
580         rlwinm  $acc11,$s1,`32-8`,24,31
581         lbzx    $acc04,$Tbl1,$acc04
582         lbzx    $acc05,$Tbl1,$acc05
583         rlwinm  $acc12,$s3,`0`,24,31
584         rlwinm  $acc13,$s0,`0`,24,31
585         lbzx    $acc06,$Tbl1,$acc06
586         lbzx    $acc07,$Tbl1,$acc07
587         rlwinm  $acc14,$s1,`0`,24,31
588         rlwinm  $acc15,$s2,`0`,24,31
589         lbzx    $acc08,$Tbl1,$acc08
590         lbzx    $acc09,$Tbl1,$acc09
591         rlwinm  $s0,$acc00,24,0,7
592         rlwinm  $s1,$acc01,24,0,7
593         lbzx    $acc10,$Tbl1,$acc10
594         lbzx    $acc11,$Tbl1,$acc11
595         rlwinm  $s2,$acc02,24,0,7
596         rlwinm  $s3,$acc03,24,0,7
597         lbzx    $acc12,$Tbl1,$acc12
598         lbzx    $acc13,$Tbl1,$acc13
599         rlwimi  $s0,$acc04,16,8,15
600         rlwimi  $s1,$acc05,16,8,15
601         lbzx    $acc14,$Tbl1,$acc14
602         lbzx    $acc15,$Tbl1,$acc15
603         rlwimi  $s2,$acc06,16,8,15
604         rlwimi  $s3,$acc07,16,8,15
605         rlwimi  $s0,$acc08,8,16,23
606         rlwimi  $s1,$acc09,8,16,23
607         rlwimi  $s2,$acc10,8,16,23
608         rlwimi  $s3,$acc11,8,16,23
609         lwz     $t0,0($key)
610         lwz     $t1,4($key)
611         or      $s0,$s0,$acc12
612         or      $s1,$s1,$acc13
613         lwz     $t2,8($key)
614         lwz     $t3,12($key)
615         or      $s2,$s2,$acc14
616         or      $s3,$s3,$acc15
617
618         addi    $key,$key,16
619         bdz     Lenc_compact_done
620
621         and     $acc00,$s0,$mask80      # r1=r0&0x80808080
622         and     $acc01,$s1,$mask80
623         and     $acc02,$s2,$mask80
624         and     $acc03,$s3,$mask80
625         srwi    $acc04,$acc00,7         # r1>>7
626         srwi    $acc05,$acc01,7
627         srwi    $acc06,$acc02,7
628         srwi    $acc07,$acc03,7
629         andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
630         andc    $acc09,$s1,$mask80
631         andc    $acc10,$s2,$mask80
632         andc    $acc11,$s3,$mask80
633         sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
634         sub     $acc01,$acc01,$acc05
635         sub     $acc02,$acc02,$acc06
636         sub     $acc03,$acc03,$acc07
637         add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
638         add     $acc09,$acc09,$acc09
639         add     $acc10,$acc10,$acc10
640         add     $acc11,$acc11,$acc11
641         and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
642         and     $acc01,$acc01,$mask1b
643         and     $acc02,$acc02,$mask1b
644         and     $acc03,$acc03,$mask1b
645         xor     $acc00,$acc00,$acc08    # r2
646         xor     $acc01,$acc01,$acc09
647         xor     $acc02,$acc02,$acc10
648         xor     $acc03,$acc03,$acc11
649
650         rotlwi  $acc12,$s0,16           # ROTATE(r0,16)
651         rotlwi  $acc13,$s1,16
652         rotlwi  $acc14,$s2,16
653         rotlwi  $acc15,$s3,16
654         xor     $s0,$s0,$acc00          # r0^r2
655         xor     $s1,$s1,$acc01
656         xor     $s2,$s2,$acc02
657         xor     $s3,$s3,$acc03
658         rotrwi  $s0,$s0,24              # ROTATE(r2^r0,24)
659         rotrwi  $s1,$s1,24
660         rotrwi  $s2,$s2,24
661         rotrwi  $s3,$s3,24
662         xor     $s0,$s0,$acc00          # ROTATE(r2^r0,24)^r2
663         xor     $s1,$s1,$acc01
664         xor     $s2,$s2,$acc02
665         xor     $s3,$s3,$acc03
666         rotlwi  $acc08,$acc12,8         # ROTATE(r0,24)
667         rotlwi  $acc09,$acc13,8
668         rotlwi  $acc10,$acc14,8
669         rotlwi  $acc11,$acc15,8
670         xor     $s0,$s0,$acc12          #
671         xor     $s1,$s1,$acc13
672         xor     $s2,$s2,$acc14
673         xor     $s3,$s3,$acc15
674         xor     $s0,$s0,$acc08          #
675         xor     $s1,$s1,$acc09
676         xor     $s2,$s2,$acc10
677         xor     $s3,$s3,$acc11
678
679         b       Lenc_compact_loop
680 .align  4
681 Lenc_compact_done:
682         xor     $s0,$s0,$t0
683         xor     $s1,$s1,$t1
684         xor     $s2,$s2,$t2
685         xor     $s3,$s3,$t3
686         blr
687         .long   0
688         .byte   0,12,0x14,0,0,0,0,0
689
690 .globl  .AES_decrypt
691 .align  7
692 .AES_decrypt:
693         $STU    $sp,-$FRAME($sp)
694         mflr    r0
695
696         $PUSH   $toc,`$FRAME-$SIZE_T*20`($sp)
697         $PUSH   r13,`$FRAME-$SIZE_T*19`($sp)
698         $PUSH   r14,`$FRAME-$SIZE_T*18`($sp)
699         $PUSH   r15,`$FRAME-$SIZE_T*17`($sp)
700         $PUSH   r16,`$FRAME-$SIZE_T*16`($sp)
701         $PUSH   r17,`$FRAME-$SIZE_T*15`($sp)
702         $PUSH   r18,`$FRAME-$SIZE_T*14`($sp)
703         $PUSH   r19,`$FRAME-$SIZE_T*13`($sp)
704         $PUSH   r20,`$FRAME-$SIZE_T*12`($sp)
705         $PUSH   r21,`$FRAME-$SIZE_T*11`($sp)
706         $PUSH   r22,`$FRAME-$SIZE_T*10`($sp)
707         $PUSH   r23,`$FRAME-$SIZE_T*9`($sp)
708         $PUSH   r24,`$FRAME-$SIZE_T*8`($sp)
709         $PUSH   r25,`$FRAME-$SIZE_T*7`($sp)
710         $PUSH   r26,`$FRAME-$SIZE_T*6`($sp)
711         $PUSH   r27,`$FRAME-$SIZE_T*5`($sp)
712         $PUSH   r28,`$FRAME-$SIZE_T*4`($sp)
713         $PUSH   r29,`$FRAME-$SIZE_T*3`($sp)
714         $PUSH   r30,`$FRAME-$SIZE_T*2`($sp)
715         $PUSH   r31,`$FRAME-$SIZE_T*1`($sp)
716         $PUSH   r0,`$FRAME+$LRSAVE`($sp)
717
718         lwz     $s0,0($inp)
719         lwz     $s1,4($inp)
720         lwz     $s2,8($inp)
721         lwz     $s3,12($inp)
722         bl      LAES_Td
723         bl      Lppc_AES_decrypt_compact
724         stw     $s0,0($out)
725         stw     $s1,4($out)
726         stw     $s2,8($out)
727         stw     $s3,12($out)
728
729         $POP    r0,`$FRAME+$LRSAVE`($sp)
730         $POP    $toc,`$FRAME-$SIZE_T*20`($sp)
731         $POP    r13,`$FRAME-$SIZE_T*19`($sp)
732         $POP    r14,`$FRAME-$SIZE_T*18`($sp)
733         $POP    r15,`$FRAME-$SIZE_T*17`($sp)
734         $POP    r16,`$FRAME-$SIZE_T*16`($sp)
735         $POP    r17,`$FRAME-$SIZE_T*15`($sp)
736         $POP    r18,`$FRAME-$SIZE_T*14`($sp)
737         $POP    r19,`$FRAME-$SIZE_T*13`($sp)
738         $POP    r20,`$FRAME-$SIZE_T*12`($sp)
739         $POP    r21,`$FRAME-$SIZE_T*11`($sp)
740         $POP    r22,`$FRAME-$SIZE_T*10`($sp)
741         $POP    r23,`$FRAME-$SIZE_T*9`($sp)
742         $POP    r24,`$FRAME-$SIZE_T*8`($sp)
743         $POP    r25,`$FRAME-$SIZE_T*7`($sp)
744         $POP    r26,`$FRAME-$SIZE_T*6`($sp)
745         $POP    r27,`$FRAME-$SIZE_T*5`($sp)
746         $POP    r28,`$FRAME-$SIZE_T*4`($sp)
747         $POP    r29,`$FRAME-$SIZE_T*3`($sp)
748         $POP    r30,`$FRAME-$SIZE_T*2`($sp)
749         $POP    r31,`$FRAME-$SIZE_T*1`($sp)
750         mtlr    r0
751         addi    $sp,$sp,$FRAME
752         blr
753         .long   0
754         .byte   0,12,4,1,0x80,18,3,0
755         .long   0
756
757 .align  5
758 Lppc_AES_decrypt:
759         lwz     $acc00,240($key)
760         lwz     $t0,0($key)
761         lwz     $t1,4($key)
762         lwz     $t2,8($key)
763         lwz     $t3,12($key)
764         addi    $Tbl1,$Tbl0,3
765         addi    $Tbl2,$Tbl0,2
766         addi    $Tbl3,$Tbl0,1
767         addi    $acc00,$acc00,-1
768         addi    $key,$key,16
769         xor     $s0,$s0,$t0
770         xor     $s1,$s1,$t1
771         xor     $s2,$s2,$t2
772         xor     $s3,$s3,$t3
773         mtctr   $acc00
774 .align  4
775 Ldec_loop:
776         rlwinm  $acc00,$s0,`32-24+3`,21,28
777         rlwinm  $acc01,$s1,`32-24+3`,21,28
778         rlwinm  $acc02,$s2,`32-24+3`,21,28
779         rlwinm  $acc03,$s3,`32-24+3`,21,28
780         lwz     $t0,0($key)
781         lwz     $t1,4($key)
782         rlwinm  $acc04,$s3,`32-16+3`,21,28
783         rlwinm  $acc05,$s0,`32-16+3`,21,28
784         lwz     $t2,8($key)
785         lwz     $t3,12($key)
786         rlwinm  $acc06,$s1,`32-16+3`,21,28
787         rlwinm  $acc07,$s2,`32-16+3`,21,28
788         lwzx    $acc00,$Tbl0,$acc00
789         lwzx    $acc01,$Tbl0,$acc01
790         rlwinm  $acc08,$s2,`32-8+3`,21,28
791         rlwinm  $acc09,$s3,`32-8+3`,21,28
792         lwzx    $acc02,$Tbl0,$acc02
793         lwzx    $acc03,$Tbl0,$acc03
794         rlwinm  $acc10,$s0,`32-8+3`,21,28
795         rlwinm  $acc11,$s1,`32-8+3`,21,28
796         lwzx    $acc04,$Tbl1,$acc04
797         lwzx    $acc05,$Tbl1,$acc05
798         rlwinm  $acc12,$s1,`0+3`,21,28
799         rlwinm  $acc13,$s2,`0+3`,21,28
800         lwzx    $acc06,$Tbl1,$acc06
801         lwzx    $acc07,$Tbl1,$acc07
802         rlwinm  $acc14,$s3,`0+3`,21,28
803         rlwinm  $acc15,$s0,`0+3`,21,28
804         lwzx    $acc08,$Tbl2,$acc08
805         lwzx    $acc09,$Tbl2,$acc09
806         xor     $t0,$t0,$acc00
807         xor     $t1,$t1,$acc01
808         lwzx    $acc10,$Tbl2,$acc10
809         lwzx    $acc11,$Tbl2,$acc11
810         xor     $t2,$t2,$acc02
811         xor     $t3,$t3,$acc03
812         lwzx    $acc12,$Tbl3,$acc12
813         lwzx    $acc13,$Tbl3,$acc13
814         xor     $t0,$t0,$acc04
815         xor     $t1,$t1,$acc05
816         lwzx    $acc14,$Tbl3,$acc14
817         lwzx    $acc15,$Tbl3,$acc15
818         xor     $t2,$t2,$acc06
819         xor     $t3,$t3,$acc07
820         xor     $t0,$t0,$acc08
821         xor     $t1,$t1,$acc09
822         xor     $t2,$t2,$acc10
823         xor     $t3,$t3,$acc11
824         xor     $s0,$t0,$acc12
825         xor     $s1,$t1,$acc13
826         xor     $s2,$t2,$acc14
827         xor     $s3,$t3,$acc15
828         addi    $key,$key,16
829         bdnz-   Ldec_loop
830
831         addi    $Tbl2,$Tbl0,2048
832         nop
833         lwz     $t0,0($key)
834         lwz     $t1,4($key)
835         rlwinm  $acc00,$s0,`32-24`,24,31
836         rlwinm  $acc01,$s1,`32-24`,24,31
837         lwz     $t2,8($key)
838         lwz     $t3,12($key)
839         rlwinm  $acc02,$s2,`32-24`,24,31
840         rlwinm  $acc03,$s3,`32-24`,24,31
841         lwz     $acc08,`2048+0`($Tbl0)  ! prefetch Td4
842         lwz     $acc09,`2048+32`($Tbl0)
843         rlwinm  $acc04,$s3,`32-16`,24,31
844         rlwinm  $acc05,$s0,`32-16`,24,31
845         lwz     $acc10,`2048+64`($Tbl0)
846         lwz     $acc11,`2048+96`($Tbl0)
847         lbzx    $acc00,$Tbl2,$acc00
848         lbzx    $acc01,$Tbl2,$acc01
849         lwz     $acc12,`2048+128`($Tbl0)
850         lwz     $acc13,`2048+160`($Tbl0)
851         rlwinm  $acc06,$s1,`32-16`,24,31
852         rlwinm  $acc07,$s2,`32-16`,24,31
853         lwz     $acc14,`2048+192`($Tbl0)
854         lwz     $acc15,`2048+224`($Tbl0)
855         rlwinm  $acc08,$s2,`32-8`,24,31
856         rlwinm  $acc09,$s3,`32-8`,24,31
857         lbzx    $acc02,$Tbl2,$acc02
858         lbzx    $acc03,$Tbl2,$acc03
859         rlwinm  $acc10,$s0,`32-8`,24,31
860         rlwinm  $acc11,$s1,`32-8`,24,31
861         lbzx    $acc04,$Tbl2,$acc04
862         lbzx    $acc05,$Tbl2,$acc05
863         rlwinm  $acc12,$s1,`0`,24,31
864         rlwinm  $acc13,$s2,`0`,24,31
865         lbzx    $acc06,$Tbl2,$acc06
866         lbzx    $acc07,$Tbl2,$acc07
867         rlwinm  $acc14,$s3,`0`,24,31
868         rlwinm  $acc15,$s0,`0`,24,31
869         lbzx    $acc08,$Tbl2,$acc08
870         lbzx    $acc09,$Tbl2,$acc09
871         rlwinm  $s0,$acc00,24,0,7
872         rlwinm  $s1,$acc01,24,0,7
873         lbzx    $acc10,$Tbl2,$acc10
874         lbzx    $acc11,$Tbl2,$acc11
875         rlwinm  $s2,$acc02,24,0,7
876         rlwinm  $s3,$acc03,24,0,7
877         lbzx    $acc12,$Tbl2,$acc12
878         lbzx    $acc13,$Tbl2,$acc13
879         rlwimi  $s0,$acc04,16,8,15
880         rlwimi  $s1,$acc05,16,8,15
881         lbzx    $acc14,$Tbl2,$acc14
882         lbzx    $acc15,$Tbl2,$acc15
883         rlwimi  $s2,$acc06,16,8,15
884         rlwimi  $s3,$acc07,16,8,15
885         rlwimi  $s0,$acc08,8,16,23
886         rlwimi  $s1,$acc09,8,16,23
887         rlwimi  $s2,$acc10,8,16,23
888         rlwimi  $s3,$acc11,8,16,23
889         or      $s0,$s0,$acc12
890         or      $s1,$s1,$acc13
891         or      $s2,$s2,$acc14
892         or      $s3,$s3,$acc15
893         xor     $s0,$s0,$t0
894         xor     $s1,$s1,$t1
895         xor     $s2,$s2,$t2
896         xor     $s3,$s3,$t3
897         blr
898         .long   0
899         .byte   0,12,0x14,0,0,0,0,0
900
901 .align  4
902 Lppc_AES_decrypt_compact:
903         lwz     $acc00,240($key)
904         lwz     $t0,0($key)
905         lwz     $t1,4($key)
906         lwz     $t2,8($key)
907         lwz     $t3,12($key)
908         addi    $Tbl1,$Tbl0,2048
909         lis     $mask80,0x8080
910         lis     $mask1b,0x1b1b
911         addi    $key,$key,16
912         ori     $mask80,$mask80,0x8080
913         ori     $mask1b,$mask1b,0x1b1b
914 ___
915 $code.=<<___ if ($SIZE_T==8);
916         insrdi  $mask80,$mask80,32,0
917         insrdi  $mask1b,$mask1b,32,0
918 ___
919 $code.=<<___;
920         mtctr   $acc00
921 .align  4
922 Ldec_compact_loop:
923         xor     $s0,$s0,$t0
924         xor     $s1,$s1,$t1
925         xor     $s2,$s2,$t2
926         xor     $s3,$s3,$t3
927         rlwinm  $acc00,$s0,`32-24`,24,31
928         rlwinm  $acc01,$s1,`32-24`,24,31
929         rlwinm  $acc02,$s2,`32-24`,24,31
930         rlwinm  $acc03,$s3,`32-24`,24,31
931         rlwinm  $acc04,$s3,`32-16`,24,31
932         rlwinm  $acc05,$s0,`32-16`,24,31
933         rlwinm  $acc06,$s1,`32-16`,24,31
934         rlwinm  $acc07,$s2,`32-16`,24,31
935         lbzx    $acc00,$Tbl1,$acc00
936         lbzx    $acc01,$Tbl1,$acc01
937         rlwinm  $acc08,$s2,`32-8`,24,31
938         rlwinm  $acc09,$s3,`32-8`,24,31
939         lbzx    $acc02,$Tbl1,$acc02
940         lbzx    $acc03,$Tbl1,$acc03
941         rlwinm  $acc10,$s0,`32-8`,24,31
942         rlwinm  $acc11,$s1,`32-8`,24,31
943         lbzx    $acc04,$Tbl1,$acc04
944         lbzx    $acc05,$Tbl1,$acc05
945         rlwinm  $acc12,$s1,`0`,24,31
946         rlwinm  $acc13,$s2,`0`,24,31
947         lbzx    $acc06,$Tbl1,$acc06
948         lbzx    $acc07,$Tbl1,$acc07
949         rlwinm  $acc14,$s3,`0`,24,31
950         rlwinm  $acc15,$s0,`0`,24,31
951         lbzx    $acc08,$Tbl1,$acc08
952         lbzx    $acc09,$Tbl1,$acc09
953         rlwinm  $s0,$acc00,24,0,7
954         rlwinm  $s1,$acc01,24,0,7
955         lbzx    $acc10,$Tbl1,$acc10
956         lbzx    $acc11,$Tbl1,$acc11
957         rlwinm  $s2,$acc02,24,0,7
958         rlwinm  $s3,$acc03,24,0,7
959         lbzx    $acc12,$Tbl1,$acc12
960         lbzx    $acc13,$Tbl1,$acc13
961         rlwimi  $s0,$acc04,16,8,15
962         rlwimi  $s1,$acc05,16,8,15
963         lbzx    $acc14,$Tbl1,$acc14
964         lbzx    $acc15,$Tbl1,$acc15
965         rlwimi  $s2,$acc06,16,8,15
966         rlwimi  $s3,$acc07,16,8,15
967         rlwimi  $s0,$acc08,8,16,23
968         rlwimi  $s1,$acc09,8,16,23
969         rlwimi  $s2,$acc10,8,16,23
970         rlwimi  $s3,$acc11,8,16,23
971         lwz     $t0,0($key)
972         lwz     $t1,4($key)
973         or      $s0,$s0,$acc12
974         or      $s1,$s1,$acc13
975         lwz     $t2,8($key)
976         lwz     $t3,12($key)
977         or      $s2,$s2,$acc14
978         or      $s3,$s3,$acc15
979
980         addi    $key,$key,16
981         bdz     Ldec_compact_done
982 ___
983 $code.=<<___ if ($SIZE_T==8);
984         # vectorized permutation improves decrypt performance by 10%
985         insrdi  $s0,$s1,32,0
986         insrdi  $s2,$s3,32,0
987
988         and     $acc00,$s0,$mask80      # r1=r0&0x80808080
989         and     $acc02,$s2,$mask80
990         srdi    $acc04,$acc00,7         # r1>>7
991         srdi    $acc06,$acc02,7
992         andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
993         andc    $acc10,$s2,$mask80
994         sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
995         sub     $acc02,$acc02,$acc06
996         add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
997         add     $acc10,$acc10,$acc10
998         and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
999         and     $acc02,$acc02,$mask1b
1000         xor     $acc00,$acc00,$acc08    # r2
1001         xor     $acc02,$acc02,$acc10
1002
1003         and     $acc04,$acc00,$mask80   # r1=r2&0x80808080
1004         and     $acc06,$acc02,$mask80
1005         srdi    $acc08,$acc04,7         # r1>>7
1006         srdi    $acc10,$acc06,7
1007         andc    $acc12,$acc00,$mask80   # r2&0x7f7f7f7f
1008         andc    $acc14,$acc02,$mask80
1009         sub     $acc04,$acc04,$acc08    # r1-(r1>>7)
1010         sub     $acc06,$acc06,$acc10
1011         add     $acc12,$acc12,$acc12    # (r2&0x7f7f7f7f)<<1
1012         add     $acc14,$acc14,$acc14
1013         and     $acc04,$acc04,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1014         and     $acc06,$acc06,$mask1b
1015         xor     $acc04,$acc04,$acc12    # r4
1016         xor     $acc06,$acc06,$acc14
1017
1018         and     $acc08,$acc04,$mask80   # r1=r4&0x80808080
1019         and     $acc10,$acc06,$mask80
1020         srdi    $acc12,$acc08,7         # r1>>7
1021         srdi    $acc14,$acc10,7
1022         sub     $acc08,$acc08,$acc12    # r1-(r1>>7)
1023         sub     $acc10,$acc10,$acc14
1024         andc    $acc12,$acc04,$mask80   # r4&0x7f7f7f7f
1025         andc    $acc14,$acc06,$mask80
1026         add     $acc12,$acc12,$acc12    # (r4&0x7f7f7f7f)<<1
1027         add     $acc14,$acc14,$acc14
1028         and     $acc08,$acc08,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1029         and     $acc10,$acc10,$mask1b
1030         xor     $acc08,$acc08,$acc12    # r8
1031         xor     $acc10,$acc10,$acc14
1032
1033         xor     $acc00,$acc00,$s0       # r2^r0
1034         xor     $acc02,$acc02,$s2
1035         xor     $acc04,$acc04,$s0       # r4^r0
1036         xor     $acc06,$acc06,$s2
1037
1038         extrdi  $acc01,$acc00,32,0
1039         extrdi  $acc03,$acc02,32,0
1040         extrdi  $acc05,$acc04,32,0
1041         extrdi  $acc07,$acc06,32,0
1042         extrdi  $acc09,$acc08,32,0
1043         extrdi  $acc11,$acc10,32,0
1044 ___
1045 $code.=<<___ if ($SIZE_T==4);
1046         and     $acc00,$s0,$mask80      # r1=r0&0x80808080
1047         and     $acc01,$s1,$mask80
1048         and     $acc02,$s2,$mask80
1049         and     $acc03,$s3,$mask80
1050         srwi    $acc04,$acc00,7         # r1>>7
1051         srwi    $acc05,$acc01,7
1052         srwi    $acc06,$acc02,7
1053         srwi    $acc07,$acc03,7
1054         andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
1055         andc    $acc09,$s1,$mask80
1056         andc    $acc10,$s2,$mask80
1057         andc    $acc11,$s3,$mask80
1058         sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
1059         sub     $acc01,$acc01,$acc05
1060         sub     $acc02,$acc02,$acc06
1061         sub     $acc03,$acc03,$acc07
1062         add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
1063         add     $acc09,$acc09,$acc09
1064         add     $acc10,$acc10,$acc10
1065         add     $acc11,$acc11,$acc11
1066         and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1067         and     $acc01,$acc01,$mask1b
1068         and     $acc02,$acc02,$mask1b
1069         and     $acc03,$acc03,$mask1b
1070         xor     $acc00,$acc00,$acc08    # r2
1071         xor     $acc01,$acc01,$acc09
1072         xor     $acc02,$acc02,$acc10
1073         xor     $acc03,$acc03,$acc11
1074
1075         and     $acc04,$acc00,$mask80   # r1=r2&0x80808080
1076         and     $acc05,$acc01,$mask80
1077         and     $acc06,$acc02,$mask80
1078         and     $acc07,$acc03,$mask80
1079         srwi    $acc08,$acc04,7         # r1>>7
1080         srwi    $acc09,$acc05,7
1081         srwi    $acc10,$acc06,7
1082         srwi    $acc11,$acc07,7
1083         andc    $acc12,$acc00,$mask80   # r2&0x7f7f7f7f
1084         andc    $acc13,$acc01,$mask80
1085         andc    $acc14,$acc02,$mask80
1086         andc    $acc15,$acc03,$mask80
1087         sub     $acc04,$acc04,$acc08    # r1-(r1>>7)
1088         sub     $acc05,$acc05,$acc09
1089         sub     $acc06,$acc06,$acc10
1090         sub     $acc07,$acc07,$acc11
1091         add     $acc12,$acc12,$acc12    # (r2&0x7f7f7f7f)<<1
1092         add     $acc13,$acc13,$acc13
1093         add     $acc14,$acc14,$acc14
1094         add     $acc15,$acc15,$acc15
1095         and     $acc04,$acc04,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1096         and     $acc05,$acc05,$mask1b
1097         and     $acc06,$acc06,$mask1b
1098         and     $acc07,$acc07,$mask1b
1099         xor     $acc04,$acc04,$acc12    # r4
1100         xor     $acc05,$acc05,$acc13
1101         xor     $acc06,$acc06,$acc14
1102         xor     $acc07,$acc07,$acc15
1103
1104         and     $acc08,$acc04,$mask80   # r1=r4&0x80808080
1105         and     $acc09,$acc05,$mask80
1106         and     $acc10,$acc06,$mask80
1107         and     $acc11,$acc07,$mask80
1108         srwi    $acc12,$acc08,7         # r1>>7
1109         srwi    $acc13,$acc09,7
1110         srwi    $acc14,$acc10,7
1111         srwi    $acc15,$acc11,7
1112         sub     $acc08,$acc08,$acc12    # r1-(r1>>7)
1113         sub     $acc09,$acc09,$acc13
1114         sub     $acc10,$acc10,$acc14
1115         sub     $acc11,$acc11,$acc15
1116         andc    $acc12,$acc04,$mask80   # r4&0x7f7f7f7f
1117         andc    $acc13,$acc05,$mask80
1118         andc    $acc14,$acc06,$mask80
1119         andc    $acc15,$acc07,$mask80
1120         add     $acc12,$acc12,$acc12    # (r4&0x7f7f7f7f)<<1
1121         add     $acc13,$acc13,$acc13
1122         add     $acc14,$acc14,$acc14
1123         add     $acc15,$acc15,$acc15
1124         and     $acc08,$acc08,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
1125         and     $acc09,$acc09,$mask1b
1126         and     $acc10,$acc10,$mask1b
1127         and     $acc11,$acc11,$mask1b
1128         xor     $acc08,$acc08,$acc12    # r8
1129         xor     $acc09,$acc09,$acc13
1130         xor     $acc10,$acc10,$acc14
1131         xor     $acc11,$acc11,$acc15
1132
1133         xor     $acc00,$acc00,$s0       # r2^r0
1134         xor     $acc01,$acc01,$s1
1135         xor     $acc02,$acc02,$s2
1136         xor     $acc03,$acc03,$s3
1137         xor     $acc04,$acc04,$s0       # r4^r0
1138         xor     $acc05,$acc05,$s1
1139         xor     $acc06,$acc06,$s2
1140         xor     $acc07,$acc07,$s3
1141 ___
1142 $code.=<<___;
1143         rotrwi  $s0,$s0,8               # = ROTATE(r0,8)
1144         rotrwi  $s1,$s1,8
1145         rotrwi  $s2,$s2,8
1146         rotrwi  $s3,$s3,8
1147         xor     $s0,$s0,$acc00          # ^= r2^r0
1148         xor     $s1,$s1,$acc01
1149         xor     $s2,$s2,$acc02
1150         xor     $s3,$s3,$acc03
1151         xor     $acc00,$acc00,$acc08
1152         xor     $acc01,$acc01,$acc09
1153         xor     $acc02,$acc02,$acc10
1154         xor     $acc03,$acc03,$acc11
1155         xor     $s0,$s0,$acc04          # ^= r4^r0
1156         xor     $s1,$s1,$acc05
1157         xor     $s2,$s2,$acc06
1158         xor     $s3,$s3,$acc07
1159         rotrwi  $acc00,$acc00,24
1160         rotrwi  $acc01,$acc01,24
1161         rotrwi  $acc02,$acc02,24
1162         rotrwi  $acc03,$acc03,24
1163         xor     $acc04,$acc04,$acc08
1164         xor     $acc05,$acc05,$acc09
1165         xor     $acc06,$acc06,$acc10
1166         xor     $acc07,$acc07,$acc11
1167         xor     $s0,$s0,$acc08          # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
1168         xor     $s1,$s1,$acc09
1169         xor     $s2,$s2,$acc10
1170         xor     $s3,$s3,$acc11
1171         rotrwi  $acc04,$acc04,16
1172         rotrwi  $acc05,$acc05,16
1173         rotrwi  $acc06,$acc06,16
1174         rotrwi  $acc07,$acc07,16
1175         xor     $s0,$s0,$acc00          # ^= ROTATE(r8^r2^r0,24)
1176         xor     $s1,$s1,$acc01
1177         xor     $s2,$s2,$acc02
1178         xor     $s3,$s3,$acc03
1179         rotrwi  $acc08,$acc08,8
1180         rotrwi  $acc09,$acc09,8
1181         rotrwi  $acc10,$acc10,8
1182         rotrwi  $acc11,$acc11,8
1183         xor     $s0,$s0,$acc04          # ^= ROTATE(r8^r4^r0,16)
1184         xor     $s1,$s1,$acc05
1185         xor     $s2,$s2,$acc06
1186         xor     $s3,$s3,$acc07
1187         xor     $s0,$s0,$acc08          # ^= ROTATE(r8,8)       
1188         xor     $s1,$s1,$acc09  
1189         xor     $s2,$s2,$acc10  
1190         xor     $s3,$s3,$acc11  
1191
1192         b       Ldec_compact_loop
1193 .align  4
1194 Ldec_compact_done:
1195         xor     $s0,$s0,$t0
1196         xor     $s1,$s1,$t1
1197         xor     $s2,$s2,$t2
1198         xor     $s3,$s3,$t3
1199         blr
1200         .long   0
1201         .byte   0,12,0x14,0,0,0,0,0
1202
1203 .asciz  "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
1204 .align  7
1205 ___
1206
1207 $code =~ s/\`([^\`]*)\`/eval $1/gem;
1208 print $code;
1209 close STDOUT;