3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
10 # Needs more work: key setup, CBC routine...
12 # ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
13 # 128-bit key, which is ~40% better than 64-bit code generated by gcc
14 # 4.0. But these are not the ones currently used! Their "compact"
15 # counterparts are, for security reason. ppc_AES_encrypt_compact runs
16 # at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
17 # at 1/3 of ppc_AES_decrypt.
21 # Rescheduling instructions to favour Power6 pipeline gave 10%
22 # performance improvement on the platfrom in question (and marginal
23 # improvement even on others). It should be noted that Power6 fails
24 # to process byte in 18 cycles, only in 23, because it fails to issue
25 # 4 load instructions in two cycles, only in 3. As result non-compact
26 # block subroutines are 25% slower than one would expect. Compact
27 # functions scale better, because they have pure computational part,
28 # which scales perfectly with clock frequency. To be specific
29 # ppc_AES_encrypt_compact operates at 42 cycles per byte, while
30 # ppc_AES_decrypt_compact - at 55 (in 64-bit build).
34 if ($flavour =~ /64/) {
40 } elsif ($flavour =~ /32/) {
46 } else { die "nonsense $flavour"; }
49 if ($flavour =~ /le$/) {
50 die "little-endian is 64-bit only: $flavour" if ($SIZE_T == 4);
54 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
55 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
56 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
57 die "can't locate ppc-xlate.pl";
59 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
65 while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
77 $Tbl3=$out; # stay away from "r2"; $out is offloaded to stack
85 $t1="r0"; # stay away from "r13";
120 mflr $Tbl0 ; vvvvv "distance" between . and 1st data entry
121 addi $Tbl0,$Tbl0,`128-8`
125 .byte 0,12,0x14,0,0,0,0,0
130 mflr $Tbl0 ; vvvvvvvv "distance" between . and 1st data entry
131 addi $Tbl0,$Tbl0,`128-64-8+2048+256`
135 .byte 0,12,0x14,0,0,0,0,0
139 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
140 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
141 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
142 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
143 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
144 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
145 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
146 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
147 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
148 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
149 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
150 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
151 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
152 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
153 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
154 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
155 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
156 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
157 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
158 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
159 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
160 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
161 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
162 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
163 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
164 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
165 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
166 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
167 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
168 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
169 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
170 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
171 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
172 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
173 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
174 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
175 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
176 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
177 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
178 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
179 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
180 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
181 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
182 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
183 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
184 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
185 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
186 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
187 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
188 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
189 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
190 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
191 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
192 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
193 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
194 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
195 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
196 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
197 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
198 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
199 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
200 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
201 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
202 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
204 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
205 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
206 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
207 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
208 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
209 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
210 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
211 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
212 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
213 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
214 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
215 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
216 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
217 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
218 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
219 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
220 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
221 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
222 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
223 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
224 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
225 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
226 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
227 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
228 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
229 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
230 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
231 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
232 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
233 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
234 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
235 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
238 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
239 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
240 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
241 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
242 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
243 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
244 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
245 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
246 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
247 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
248 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
249 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
250 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
251 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
252 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
253 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
254 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
255 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
256 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
257 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
258 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
259 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
260 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
261 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
262 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
263 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
264 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
265 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
266 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
267 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
268 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
269 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
270 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
271 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
272 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
273 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
274 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
275 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
276 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
277 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
278 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
279 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
280 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
281 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
282 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
283 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
284 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
285 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
286 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
287 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
288 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
289 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
290 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
291 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
292 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
293 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
294 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
295 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
296 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
297 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
298 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
299 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
300 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
301 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
303 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
304 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
305 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
306 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
307 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
308 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
309 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
310 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
311 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
312 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
313 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
314 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
315 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
316 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
317 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
318 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
319 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
320 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
321 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
322 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
323 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
324 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
325 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
326 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
327 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
328 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
329 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
330 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
331 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
332 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
333 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
334 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
340 $STU $sp,-$FRAME($sp)
343 $PUSH $out,`$FRAME-$SIZE_T*19`($sp)
344 $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
345 $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
346 $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
347 $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
348 $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
349 $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
350 $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
351 $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
352 $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
353 $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
354 $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
355 $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
356 $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
357 $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
358 $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
359 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
360 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
361 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
362 $PUSH r0,`$FRAME+$LRSAVE`($sp)
371 $code.=<<___ if (!$LITTLE_ENDIAN);
377 $code.=<<___ if ($LITTLE_ENDIAN);
386 rlwimi $s0,$t0,24,0,7
387 rlwimi $s1,$t1,24,0,7
388 rlwimi $s2,$t2,24,0,7
389 rlwimi $s3,$t3,24,0,7
390 rlwimi $s0,$t0,24,16,23
391 rlwimi $s1,$t1,24,16,23
392 rlwimi $s2,$t2,24,16,23
393 rlwimi $s3,$t3,24,16,23
397 bl Lppc_AES_encrypt_compact
398 $POP $out,`$FRAME-$SIZE_T*19`($sp)
400 $code.=<<___ if ($LITTLE_ENDIAN);
405 rlwimi $t0,$s0,24,0,7
406 rlwimi $t1,$s1,24,0,7
407 rlwimi $t2,$s2,24,0,7
408 rlwimi $t3,$s3,24,0,7
409 rlwimi $t0,$s0,24,16,23
410 rlwimi $t1,$s1,24,16,23
411 rlwimi $t2,$s2,24,16,23
412 rlwimi $t3,$s3,24,16,23
418 $code.=<<___ if (!$LITTLE_ENDIAN);
430 andi. $t0,$t0,4096-16
432 andi. $t1,$t1,4096-16
433 bne Lenc_unaligned_ok
447 insrwi $s0,$acc00,8,0
449 insrwi $s1,$acc04,8,0
451 insrwi $s0,$acc01,8,8
453 insrwi $s1,$acc05,8,8
455 insrwi $s0,$acc02,8,16
457 insrwi $s1,$acc06,8,16
458 insrwi $s2,$acc08,8,0
459 insrwi $s3,$acc12,8,0
460 insrwi $s2,$acc09,8,8
461 insrwi $s3,$acc13,8,8
462 insrwi $s2,$acc10,8,16
463 insrwi $s3,$acc14,8,16
466 bl Lppc_AES_encrypt_compact
467 $POP $out,`$FRAME-$SIZE_T*19`($sp)
469 extrwi $acc00,$s0,8,0
470 extrwi $acc01,$s0,8,8
472 extrwi $acc02,$s0,8,16
475 extrwi $acc04,$s1,8,0
477 extrwi $acc05,$s1,8,8
479 extrwi $acc06,$s1,8,16
482 extrwi $acc08,$s2,8,0
484 extrwi $acc09,$s2,8,8
486 extrwi $acc10,$s2,8,16
489 extrwi $acc12,$s3,8,0
491 extrwi $acc13,$s3,8,8
493 extrwi $acc14,$s3,8,16
499 $POP r0,`$FRAME+$LRSAVE`($sp)
500 $POP r14,`$FRAME-$SIZE_T*18`($sp)
501 $POP r15,`$FRAME-$SIZE_T*17`($sp)
502 $POP r16,`$FRAME-$SIZE_T*16`($sp)
503 $POP r17,`$FRAME-$SIZE_T*15`($sp)
504 $POP r18,`$FRAME-$SIZE_T*14`($sp)
505 $POP r19,`$FRAME-$SIZE_T*13`($sp)
506 $POP r20,`$FRAME-$SIZE_T*12`($sp)
507 $POP r21,`$FRAME-$SIZE_T*11`($sp)
508 $POP r22,`$FRAME-$SIZE_T*10`($sp)
509 $POP r23,`$FRAME-$SIZE_T*9`($sp)
510 $POP r24,`$FRAME-$SIZE_T*8`($sp)
511 $POP r25,`$FRAME-$SIZE_T*7`($sp)
512 $POP r26,`$FRAME-$SIZE_T*6`($sp)
513 $POP r27,`$FRAME-$SIZE_T*5`($sp)
514 $POP r28,`$FRAME-$SIZE_T*4`($sp)
515 $POP r29,`$FRAME-$SIZE_T*3`($sp)
516 $POP r30,`$FRAME-$SIZE_T*2`($sp)
517 $POP r31,`$FRAME-$SIZE_T*1`($sp)
522 .byte 0,12,4,1,0x80,18,3,0
534 addi $acc00,$acc00,-1
544 rlwinm $acc00,$s0,`32-24+3`,21,28
545 rlwinm $acc01,$s1,`32-24+3`,21,28
546 rlwinm $acc02,$s2,`32-24+3`,21,28
547 rlwinm $acc03,$s3,`32-24+3`,21,28
549 rlwinm $acc04,$s1,`32-16+3`,21,28
551 rlwinm $acc05,$s2,`32-16+3`,21,28
553 rlwinm $acc06,$s3,`32-16+3`,21,28
555 rlwinm $acc07,$s0,`32-16+3`,21,28
556 lwzx $acc00,$Tbl0,$acc00
557 rlwinm $acc08,$s2,`32-8+3`,21,28
558 lwzx $acc01,$Tbl0,$acc01
559 rlwinm $acc09,$s3,`32-8+3`,21,28
560 lwzx $acc02,$Tbl0,$acc02
561 rlwinm $acc10,$s0,`32-8+3`,21,28
562 lwzx $acc03,$Tbl0,$acc03
563 rlwinm $acc11,$s1,`32-8+3`,21,28
564 lwzx $acc04,$Tbl1,$acc04
565 rlwinm $acc12,$s3,`0+3`,21,28
566 lwzx $acc05,$Tbl1,$acc05
567 rlwinm $acc13,$s0,`0+3`,21,28
568 lwzx $acc06,$Tbl1,$acc06
569 rlwinm $acc14,$s1,`0+3`,21,28
570 lwzx $acc07,$Tbl1,$acc07
571 rlwinm $acc15,$s2,`0+3`,21,28
572 lwzx $acc08,$Tbl2,$acc08
574 lwzx $acc09,$Tbl2,$acc09
576 lwzx $acc10,$Tbl2,$acc10
578 lwzx $acc11,$Tbl2,$acc11
580 lwzx $acc12,$Tbl3,$acc12
582 lwzx $acc13,$Tbl3,$acc13
584 lwzx $acc14,$Tbl3,$acc14
586 lwzx $acc15,$Tbl3,$acc15
599 addi $Tbl2,$Tbl0,2048
602 rlwinm $acc00,$s0,`32-24`,24,31
604 rlwinm $acc01,$s1,`32-24`,24,31
606 rlwinm $acc02,$s2,`32-24`,24,31
608 rlwinm $acc03,$s3,`32-24`,24,31
609 lwz $acc08,`2048+0`($Tbl0) ! prefetch Te4
610 rlwinm $acc04,$s1,`32-16`,24,31
611 lwz $acc09,`2048+32`($Tbl0)
612 rlwinm $acc05,$s2,`32-16`,24,31
613 lwz $acc10,`2048+64`($Tbl0)
614 rlwinm $acc06,$s3,`32-16`,24,31
615 lwz $acc11,`2048+96`($Tbl0)
616 rlwinm $acc07,$s0,`32-16`,24,31
617 lwz $acc12,`2048+128`($Tbl0)
618 rlwinm $acc08,$s2,`32-8`,24,31
619 lwz $acc13,`2048+160`($Tbl0)
620 rlwinm $acc09,$s3,`32-8`,24,31
621 lwz $acc14,`2048+192`($Tbl0)
622 rlwinm $acc10,$s0,`32-8`,24,31
623 lwz $acc15,`2048+224`($Tbl0)
624 rlwinm $acc11,$s1,`32-8`,24,31
625 lbzx $acc00,$Tbl2,$acc00
626 rlwinm $acc12,$s3,`0`,24,31
627 lbzx $acc01,$Tbl2,$acc01
628 rlwinm $acc13,$s0,`0`,24,31
629 lbzx $acc02,$Tbl2,$acc02
630 rlwinm $acc14,$s1,`0`,24,31
631 lbzx $acc03,$Tbl2,$acc03
632 rlwinm $acc15,$s2,`0`,24,31
633 lbzx $acc04,$Tbl2,$acc04
634 rlwinm $s0,$acc00,24,0,7
635 lbzx $acc05,$Tbl2,$acc05
636 rlwinm $s1,$acc01,24,0,7
637 lbzx $acc06,$Tbl2,$acc06
638 rlwinm $s2,$acc02,24,0,7
639 lbzx $acc07,$Tbl2,$acc07
640 rlwinm $s3,$acc03,24,0,7
641 lbzx $acc08,$Tbl2,$acc08
642 rlwimi $s0,$acc04,16,8,15
643 lbzx $acc09,$Tbl2,$acc09
644 rlwimi $s1,$acc05,16,8,15
645 lbzx $acc10,$Tbl2,$acc10
646 rlwimi $s2,$acc06,16,8,15
647 lbzx $acc11,$Tbl2,$acc11
648 rlwimi $s3,$acc07,16,8,15
649 lbzx $acc12,$Tbl2,$acc12
650 rlwimi $s0,$acc08,8,16,23
651 lbzx $acc13,$Tbl2,$acc13
652 rlwimi $s1,$acc09,8,16,23
653 lbzx $acc14,$Tbl2,$acc14
654 rlwimi $s2,$acc10,8,16,23
655 lbzx $acc15,$Tbl2,$acc15
656 rlwimi $s3,$acc11,8,16,23
667 .byte 0,12,0x14,0,0,0,0,0
670 Lppc_AES_encrypt_compact:
672 addi $Tbl1,$Tbl0,2048
678 ori $mask80,$mask80,0x8080
680 ori $mask1b,$mask1b,0x1b1b
687 rlwinm $acc00,$s0,`32-24`,24,31
689 rlwinm $acc01,$s1,`32-24`,24,31
691 rlwinm $acc02,$s2,`32-24`,24,31
692 rlwinm $acc03,$s3,`32-24`,24,31
693 rlwinm $acc04,$s1,`32-16`,24,31
694 rlwinm $acc05,$s2,`32-16`,24,31
695 rlwinm $acc06,$s3,`32-16`,24,31
696 rlwinm $acc07,$s0,`32-16`,24,31
697 lbzx $acc00,$Tbl1,$acc00
698 rlwinm $acc08,$s2,`32-8`,24,31
699 lbzx $acc01,$Tbl1,$acc01
700 rlwinm $acc09,$s3,`32-8`,24,31
701 lbzx $acc02,$Tbl1,$acc02
702 rlwinm $acc10,$s0,`32-8`,24,31
703 lbzx $acc03,$Tbl1,$acc03
704 rlwinm $acc11,$s1,`32-8`,24,31
705 lbzx $acc04,$Tbl1,$acc04
706 rlwinm $acc12,$s3,`0`,24,31
707 lbzx $acc05,$Tbl1,$acc05
708 rlwinm $acc13,$s0,`0`,24,31
709 lbzx $acc06,$Tbl1,$acc06
710 rlwinm $acc14,$s1,`0`,24,31
711 lbzx $acc07,$Tbl1,$acc07
712 rlwinm $acc15,$s2,`0`,24,31
713 lbzx $acc08,$Tbl1,$acc08
714 rlwinm $s0,$acc00,24,0,7
715 lbzx $acc09,$Tbl1,$acc09
716 rlwinm $s1,$acc01,24,0,7
717 lbzx $acc10,$Tbl1,$acc10
718 rlwinm $s2,$acc02,24,0,7
719 lbzx $acc11,$Tbl1,$acc11
720 rlwinm $s3,$acc03,24,0,7
721 lbzx $acc12,$Tbl1,$acc12
722 rlwimi $s0,$acc04,16,8,15
723 lbzx $acc13,$Tbl1,$acc13
724 rlwimi $s1,$acc05,16,8,15
725 lbzx $acc14,$Tbl1,$acc14
726 rlwimi $s2,$acc06,16,8,15
727 lbzx $acc15,$Tbl1,$acc15
728 rlwimi $s3,$acc07,16,8,15
729 rlwimi $s0,$acc08,8,16,23
730 rlwimi $s1,$acc09,8,16,23
731 rlwimi $s2,$acc10,8,16,23
732 rlwimi $s3,$acc11,8,16,23
743 bdz Lenc_compact_done
745 and $acc00,$s0,$mask80 # r1=r0&0x80808080
746 and $acc01,$s1,$mask80
747 and $acc02,$s2,$mask80
748 and $acc03,$s3,$mask80
749 srwi $acc04,$acc00,7 # r1>>7
750 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
752 andc $acc09,$s1,$mask80
754 andc $acc10,$s2,$mask80
756 andc $acc11,$s3,$mask80
757 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
758 sub $acc01,$acc01,$acc05
759 sub $acc02,$acc02,$acc06
760 sub $acc03,$acc03,$acc07
761 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
762 add $acc09,$acc09,$acc09
763 add $acc10,$acc10,$acc10
764 add $acc11,$acc11,$acc11
765 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
766 and $acc01,$acc01,$mask1b
767 and $acc02,$acc02,$mask1b
768 and $acc03,$acc03,$mask1b
769 xor $acc00,$acc00,$acc08 # r2
770 xor $acc01,$acc01,$acc09
771 rotlwi $acc12,$s0,16 # ROTATE(r0,16)
772 xor $acc02,$acc02,$acc10
774 xor $acc03,$acc03,$acc11
777 xor $s0,$s0,$acc00 # r0^r2
780 rotrwi $s0,$s0,24 # ROTATE(r2^r0,24)
785 xor $s0,$s0,$acc00 # ROTATE(r2^r0,24)^r2
790 rotlwi $acc08,$acc12,8 # ROTATE(r0,24)
792 rotlwi $acc09,$acc13,8
794 rotlwi $acc10,$acc14,8
796 rotlwi $acc11,$acc15,8
812 .byte 0,12,0x14,0,0,0,0,0
813 .size .AES_encrypt,.-.AES_encrypt
818 $STU $sp,-$FRAME($sp)
821 $PUSH $out,`$FRAME-$SIZE_T*19`($sp)
822 $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
823 $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
824 $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
825 $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
826 $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
827 $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
828 $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
829 $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
830 $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
831 $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
832 $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
833 $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
834 $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
835 $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
836 $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
837 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
838 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
839 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
840 $PUSH r0,`$FRAME+$LRSAVE`($sp)
849 $code.=<<___ if (!$LITTLE_ENDIAN);
855 $code.=<<___ if ($LITTLE_ENDIAN);
864 rlwimi $s0,$t0,24,0,7
865 rlwimi $s1,$t1,24,0,7
866 rlwimi $s2,$t2,24,0,7
867 rlwimi $s3,$t3,24,0,7
868 rlwimi $s0,$t0,24,16,23
869 rlwimi $s1,$t1,24,16,23
870 rlwimi $s2,$t2,24,16,23
871 rlwimi $s3,$t3,24,16,23
875 bl Lppc_AES_decrypt_compact
876 $POP $out,`$FRAME-$SIZE_T*19`($sp)
878 $code.=<<___ if ($LITTLE_ENDIAN);
883 rlwimi $t0,$s0,24,0,7
884 rlwimi $t1,$s1,24,0,7
885 rlwimi $t2,$s2,24,0,7
886 rlwimi $t3,$s3,24,0,7
887 rlwimi $t0,$s0,24,16,23
888 rlwimi $t1,$s1,24,16,23
889 rlwimi $t2,$s2,24,16,23
890 rlwimi $t3,$s3,24,16,23
896 $code.=<<___ if (!$LITTLE_ENDIAN);
908 andi. $t0,$t0,4096-16
910 andi. $t1,$t1,4096-16
911 bne Ldec_unaligned_ok
925 insrwi $s0,$acc00,8,0
927 insrwi $s1,$acc04,8,0
929 insrwi $s0,$acc01,8,8
931 insrwi $s1,$acc05,8,8
933 insrwi $s0,$acc02,8,16
935 insrwi $s1,$acc06,8,16
936 insrwi $s2,$acc08,8,0
937 insrwi $s3,$acc12,8,0
938 insrwi $s2,$acc09,8,8
939 insrwi $s3,$acc13,8,8
940 insrwi $s2,$acc10,8,16
941 insrwi $s3,$acc14,8,16
944 bl Lppc_AES_decrypt_compact
945 $POP $out,`$FRAME-$SIZE_T*19`($sp)
947 extrwi $acc00,$s0,8,0
948 extrwi $acc01,$s0,8,8
950 extrwi $acc02,$s0,8,16
953 extrwi $acc04,$s1,8,0
955 extrwi $acc05,$s1,8,8
957 extrwi $acc06,$s1,8,16
960 extrwi $acc08,$s2,8,0
962 extrwi $acc09,$s2,8,8
964 extrwi $acc10,$s2,8,16
967 extrwi $acc12,$s3,8,0
969 extrwi $acc13,$s3,8,8
971 extrwi $acc14,$s3,8,16
977 $POP r0,`$FRAME+$LRSAVE`($sp)
978 $POP r14,`$FRAME-$SIZE_T*18`($sp)
979 $POP r15,`$FRAME-$SIZE_T*17`($sp)
980 $POP r16,`$FRAME-$SIZE_T*16`($sp)
981 $POP r17,`$FRAME-$SIZE_T*15`($sp)
982 $POP r18,`$FRAME-$SIZE_T*14`($sp)
983 $POP r19,`$FRAME-$SIZE_T*13`($sp)
984 $POP r20,`$FRAME-$SIZE_T*12`($sp)
985 $POP r21,`$FRAME-$SIZE_T*11`($sp)
986 $POP r22,`$FRAME-$SIZE_T*10`($sp)
987 $POP r23,`$FRAME-$SIZE_T*9`($sp)
988 $POP r24,`$FRAME-$SIZE_T*8`($sp)
989 $POP r25,`$FRAME-$SIZE_T*7`($sp)
990 $POP r26,`$FRAME-$SIZE_T*6`($sp)
991 $POP r27,`$FRAME-$SIZE_T*5`($sp)
992 $POP r28,`$FRAME-$SIZE_T*4`($sp)
993 $POP r29,`$FRAME-$SIZE_T*3`($sp)
994 $POP r30,`$FRAME-$SIZE_T*2`($sp)
995 $POP r31,`$FRAME-$SIZE_T*1`($sp)
1000 .byte 0,12,4,1,0x80,18,3,0
1005 lwz $acc00,240($key)
1012 addi $acc00,$acc00,-1
1022 rlwinm $acc00,$s0,`32-24+3`,21,28
1023 rlwinm $acc01,$s1,`32-24+3`,21,28
1024 rlwinm $acc02,$s2,`32-24+3`,21,28
1025 rlwinm $acc03,$s3,`32-24+3`,21,28
1027 rlwinm $acc04,$s3,`32-16+3`,21,28
1029 rlwinm $acc05,$s0,`32-16+3`,21,28
1031 rlwinm $acc06,$s1,`32-16+3`,21,28
1033 rlwinm $acc07,$s2,`32-16+3`,21,28
1034 lwzx $acc00,$Tbl0,$acc00
1035 rlwinm $acc08,$s2,`32-8+3`,21,28
1036 lwzx $acc01,$Tbl0,$acc01
1037 rlwinm $acc09,$s3,`32-8+3`,21,28
1038 lwzx $acc02,$Tbl0,$acc02
1039 rlwinm $acc10,$s0,`32-8+3`,21,28
1040 lwzx $acc03,$Tbl0,$acc03
1041 rlwinm $acc11,$s1,`32-8+3`,21,28
1042 lwzx $acc04,$Tbl1,$acc04
1043 rlwinm $acc12,$s1,`0+3`,21,28
1044 lwzx $acc05,$Tbl1,$acc05
1045 rlwinm $acc13,$s2,`0+3`,21,28
1046 lwzx $acc06,$Tbl1,$acc06
1047 rlwinm $acc14,$s3,`0+3`,21,28
1048 lwzx $acc07,$Tbl1,$acc07
1049 rlwinm $acc15,$s0,`0+3`,21,28
1050 lwzx $acc08,$Tbl2,$acc08
1052 lwzx $acc09,$Tbl2,$acc09
1054 lwzx $acc10,$Tbl2,$acc10
1056 lwzx $acc11,$Tbl2,$acc11
1058 lwzx $acc12,$Tbl3,$acc12
1060 lwzx $acc13,$Tbl3,$acc13
1062 lwzx $acc14,$Tbl3,$acc14
1064 lwzx $acc15,$Tbl3,$acc15
1077 addi $Tbl2,$Tbl0,2048
1080 rlwinm $acc00,$s0,`32-24`,24,31
1082 rlwinm $acc01,$s1,`32-24`,24,31
1084 rlwinm $acc02,$s2,`32-24`,24,31
1086 rlwinm $acc03,$s3,`32-24`,24,31
1087 lwz $acc08,`2048+0`($Tbl0) ! prefetch Td4
1088 rlwinm $acc04,$s3,`32-16`,24,31
1089 lwz $acc09,`2048+32`($Tbl0)
1090 rlwinm $acc05,$s0,`32-16`,24,31
1091 lwz $acc10,`2048+64`($Tbl0)
1092 lbzx $acc00,$Tbl2,$acc00
1093 lwz $acc11,`2048+96`($Tbl0)
1094 lbzx $acc01,$Tbl2,$acc01
1095 lwz $acc12,`2048+128`($Tbl0)
1096 rlwinm $acc06,$s1,`32-16`,24,31
1097 lwz $acc13,`2048+160`($Tbl0)
1098 rlwinm $acc07,$s2,`32-16`,24,31
1099 lwz $acc14,`2048+192`($Tbl0)
1100 rlwinm $acc08,$s2,`32-8`,24,31
1101 lwz $acc15,`2048+224`($Tbl0)
1102 rlwinm $acc09,$s3,`32-8`,24,31
1103 lbzx $acc02,$Tbl2,$acc02
1104 rlwinm $acc10,$s0,`32-8`,24,31
1105 lbzx $acc03,$Tbl2,$acc03
1106 rlwinm $acc11,$s1,`32-8`,24,31
1107 lbzx $acc04,$Tbl2,$acc04
1108 rlwinm $acc12,$s1,`0`,24,31
1109 lbzx $acc05,$Tbl2,$acc05
1110 rlwinm $acc13,$s2,`0`,24,31
1111 lbzx $acc06,$Tbl2,$acc06
1112 rlwinm $acc14,$s3,`0`,24,31
1113 lbzx $acc07,$Tbl2,$acc07
1114 rlwinm $acc15,$s0,`0`,24,31
1115 lbzx $acc08,$Tbl2,$acc08
1116 rlwinm $s0,$acc00,24,0,7
1117 lbzx $acc09,$Tbl2,$acc09
1118 rlwinm $s1,$acc01,24,0,7
1119 lbzx $acc10,$Tbl2,$acc10
1120 rlwinm $s2,$acc02,24,0,7
1121 lbzx $acc11,$Tbl2,$acc11
1122 rlwinm $s3,$acc03,24,0,7
1123 lbzx $acc12,$Tbl2,$acc12
1124 rlwimi $s0,$acc04,16,8,15
1125 lbzx $acc13,$Tbl2,$acc13
1126 rlwimi $s1,$acc05,16,8,15
1127 lbzx $acc14,$Tbl2,$acc14
1128 rlwimi $s2,$acc06,16,8,15
1129 lbzx $acc15,$Tbl2,$acc15
1130 rlwimi $s3,$acc07,16,8,15
1131 rlwimi $s0,$acc08,8,16,23
1132 rlwimi $s1,$acc09,8,16,23
1133 rlwimi $s2,$acc10,8,16,23
1134 rlwimi $s3,$acc11,8,16,23
1145 .byte 0,12,0x14,0,0,0,0,0
1148 Lppc_AES_decrypt_compact:
1149 lwz $acc00,240($key)
1150 addi $Tbl1,$Tbl0,2048
1156 ori $mask80,$mask80,0x8080
1158 ori $mask1b,$mask1b,0x1b1b
1161 $code.=<<___ if ($SIZE_T==8);
1162 insrdi $mask80,$mask80,32,0
1163 insrdi $mask1b,$mask1b,32,0
1171 rlwinm $acc00,$s0,`32-24`,24,31
1173 rlwinm $acc01,$s1,`32-24`,24,31
1175 rlwinm $acc02,$s2,`32-24`,24,31
1176 rlwinm $acc03,$s3,`32-24`,24,31
1177 rlwinm $acc04,$s3,`32-16`,24,31
1178 rlwinm $acc05,$s0,`32-16`,24,31
1179 rlwinm $acc06,$s1,`32-16`,24,31
1180 rlwinm $acc07,$s2,`32-16`,24,31
1181 lbzx $acc00,$Tbl1,$acc00
1182 rlwinm $acc08,$s2,`32-8`,24,31
1183 lbzx $acc01,$Tbl1,$acc01
1184 rlwinm $acc09,$s3,`32-8`,24,31
1185 lbzx $acc02,$Tbl1,$acc02
1186 rlwinm $acc10,$s0,`32-8`,24,31
1187 lbzx $acc03,$Tbl1,$acc03
1188 rlwinm $acc11,$s1,`32-8`,24,31
1189 lbzx $acc04,$Tbl1,$acc04
1190 rlwinm $acc12,$s1,`0`,24,31
1191 lbzx $acc05,$Tbl1,$acc05
1192 rlwinm $acc13,$s2,`0`,24,31
1193 lbzx $acc06,$Tbl1,$acc06
1194 rlwinm $acc14,$s3,`0`,24,31
1195 lbzx $acc07,$Tbl1,$acc07
1196 rlwinm $acc15,$s0,`0`,24,31
1197 lbzx $acc08,$Tbl1,$acc08
1198 rlwinm $s0,$acc00,24,0,7
1199 lbzx $acc09,$Tbl1,$acc09
1200 rlwinm $s1,$acc01,24,0,7
1201 lbzx $acc10,$Tbl1,$acc10
1202 rlwinm $s2,$acc02,24,0,7
1203 lbzx $acc11,$Tbl1,$acc11
1204 rlwinm $s3,$acc03,24,0,7
1205 lbzx $acc12,$Tbl1,$acc12
1206 rlwimi $s0,$acc04,16,8,15
1207 lbzx $acc13,$Tbl1,$acc13
1208 rlwimi $s1,$acc05,16,8,15
1209 lbzx $acc14,$Tbl1,$acc14
1210 rlwimi $s2,$acc06,16,8,15
1211 lbzx $acc15,$Tbl1,$acc15
1212 rlwimi $s3,$acc07,16,8,15
1213 rlwimi $s0,$acc08,8,16,23
1214 rlwimi $s1,$acc09,8,16,23
1215 rlwimi $s2,$acc10,8,16,23
1216 rlwimi $s3,$acc11,8,16,23
1227 bdz Ldec_compact_done
1229 $code.=<<___ if ($SIZE_T==8);
1230 # vectorized permutation improves decrypt performance by 10%
1234 and $acc00,$s0,$mask80 # r1=r0&0x80808080
1235 and $acc02,$s2,$mask80
1236 srdi $acc04,$acc00,7 # r1>>7
1237 srdi $acc06,$acc02,7
1238 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
1239 andc $acc10,$s2,$mask80
1240 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
1241 sub $acc02,$acc02,$acc06
1242 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
1243 add $acc10,$acc10,$acc10
1244 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1245 and $acc02,$acc02,$mask1b
1246 xor $acc00,$acc00,$acc08 # r2
1247 xor $acc02,$acc02,$acc10
1249 and $acc04,$acc00,$mask80 # r1=r2&0x80808080
1250 and $acc06,$acc02,$mask80
1251 srdi $acc08,$acc04,7 # r1>>7
1252 srdi $acc10,$acc06,7
1253 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
1254 andc $acc14,$acc02,$mask80
1255 sub $acc04,$acc04,$acc08 # r1-(r1>>7)
1256 sub $acc06,$acc06,$acc10
1257 add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1
1258 add $acc14,$acc14,$acc14
1259 and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1260 and $acc06,$acc06,$mask1b
1261 xor $acc04,$acc04,$acc12 # r4
1262 xor $acc06,$acc06,$acc14
1264 and $acc08,$acc04,$mask80 # r1=r4&0x80808080
1265 and $acc10,$acc06,$mask80
1266 srdi $acc12,$acc08,7 # r1>>7
1267 srdi $acc14,$acc10,7
1268 sub $acc08,$acc08,$acc12 # r1-(r1>>7)
1269 sub $acc10,$acc10,$acc14
1270 andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f
1271 andc $acc14,$acc06,$mask80
1272 add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1
1273 add $acc14,$acc14,$acc14
1274 and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1275 and $acc10,$acc10,$mask1b
1276 xor $acc08,$acc08,$acc12 # r8
1277 xor $acc10,$acc10,$acc14
1279 xor $acc00,$acc00,$s0 # r2^r0
1280 xor $acc02,$acc02,$s2
1281 xor $acc04,$acc04,$s0 # r4^r0
1282 xor $acc06,$acc06,$s2
1284 extrdi $acc01,$acc00,32,0
1285 extrdi $acc03,$acc02,32,0
1286 extrdi $acc05,$acc04,32,0
1287 extrdi $acc07,$acc06,32,0
1288 extrdi $acc09,$acc08,32,0
1289 extrdi $acc11,$acc10,32,0
1291 $code.=<<___ if ($SIZE_T==4);
1292 and $acc00,$s0,$mask80 # r1=r0&0x80808080
1293 and $acc01,$s1,$mask80
1294 and $acc02,$s2,$mask80
1295 and $acc03,$s3,$mask80
1296 srwi $acc04,$acc00,7 # r1>>7
1297 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
1298 srwi $acc05,$acc01,7
1299 andc $acc09,$s1,$mask80
1300 srwi $acc06,$acc02,7
1301 andc $acc10,$s2,$mask80
1302 srwi $acc07,$acc03,7
1303 andc $acc11,$s3,$mask80
1304 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
1305 sub $acc01,$acc01,$acc05
1306 sub $acc02,$acc02,$acc06
1307 sub $acc03,$acc03,$acc07
1308 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
1309 add $acc09,$acc09,$acc09
1310 add $acc10,$acc10,$acc10
1311 add $acc11,$acc11,$acc11
1312 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1313 and $acc01,$acc01,$mask1b
1314 and $acc02,$acc02,$mask1b
1315 and $acc03,$acc03,$mask1b
1316 xor $acc00,$acc00,$acc08 # r2
1317 xor $acc01,$acc01,$acc09
1318 xor $acc02,$acc02,$acc10
1319 xor $acc03,$acc03,$acc11
1321 and $acc04,$acc00,$mask80 # r1=r2&0x80808080
1322 and $acc05,$acc01,$mask80
1323 and $acc06,$acc02,$mask80
1324 and $acc07,$acc03,$mask80
1325 srwi $acc08,$acc04,7 # r1>>7
1326 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
1327 srwi $acc09,$acc05,7
1328 andc $acc13,$acc01,$mask80
1329 srwi $acc10,$acc06,7
1330 andc $acc14,$acc02,$mask80
1331 srwi $acc11,$acc07,7
1332 andc $acc15,$acc03,$mask80
1333 sub $acc04,$acc04,$acc08 # r1-(r1>>7)
1334 sub $acc05,$acc05,$acc09
1335 sub $acc06,$acc06,$acc10
1336 sub $acc07,$acc07,$acc11
1337 add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1
1338 add $acc13,$acc13,$acc13
1339 add $acc14,$acc14,$acc14
1340 add $acc15,$acc15,$acc15
1341 and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1342 and $acc05,$acc05,$mask1b
1343 and $acc06,$acc06,$mask1b
1344 and $acc07,$acc07,$mask1b
1345 xor $acc04,$acc04,$acc12 # r4
1346 xor $acc05,$acc05,$acc13
1347 xor $acc06,$acc06,$acc14
1348 xor $acc07,$acc07,$acc15
1350 and $acc08,$acc04,$mask80 # r1=r4&0x80808080
1351 and $acc09,$acc05,$mask80
1352 srwi $acc12,$acc08,7 # r1>>7
1353 and $acc10,$acc06,$mask80
1354 srwi $acc13,$acc09,7
1355 and $acc11,$acc07,$mask80
1356 srwi $acc14,$acc10,7
1357 sub $acc08,$acc08,$acc12 # r1-(r1>>7)
1358 srwi $acc15,$acc11,7
1359 sub $acc09,$acc09,$acc13
1360 sub $acc10,$acc10,$acc14
1361 sub $acc11,$acc11,$acc15
1362 andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f
1363 andc $acc13,$acc05,$mask80
1364 andc $acc14,$acc06,$mask80
1365 andc $acc15,$acc07,$mask80
1366 add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1
1367 add $acc13,$acc13,$acc13
1368 add $acc14,$acc14,$acc14
1369 add $acc15,$acc15,$acc15
1370 and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1371 and $acc09,$acc09,$mask1b
1372 and $acc10,$acc10,$mask1b
1373 and $acc11,$acc11,$mask1b
1374 xor $acc08,$acc08,$acc12 # r8
1375 xor $acc09,$acc09,$acc13
1376 xor $acc10,$acc10,$acc14
1377 xor $acc11,$acc11,$acc15
1379 xor $acc00,$acc00,$s0 # r2^r0
1380 xor $acc01,$acc01,$s1
1381 xor $acc02,$acc02,$s2
1382 xor $acc03,$acc03,$s3
1383 xor $acc04,$acc04,$s0 # r4^r0
1384 xor $acc05,$acc05,$s1
1385 xor $acc06,$acc06,$s2
1386 xor $acc07,$acc07,$s3
1389 rotrwi $s0,$s0,8 # = ROTATE(r0,8)
1391 xor $s0,$s0,$acc00 # ^= r2^r0
1397 xor $acc00,$acc00,$acc08
1398 xor $acc01,$acc01,$acc09
1399 xor $acc02,$acc02,$acc10
1400 xor $acc03,$acc03,$acc11
1401 xor $s0,$s0,$acc04 # ^= r4^r0
1402 rotrwi $acc00,$acc00,24
1404 rotrwi $acc01,$acc01,24
1406 rotrwi $acc02,$acc02,24
1408 rotrwi $acc03,$acc03,24
1409 xor $acc04,$acc04,$acc08
1410 xor $acc05,$acc05,$acc09
1411 xor $acc06,$acc06,$acc10
1412 xor $acc07,$acc07,$acc11
1413 xor $s0,$s0,$acc08 # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
1414 rotrwi $acc04,$acc04,16
1416 rotrwi $acc05,$acc05,16
1418 rotrwi $acc06,$acc06,16
1420 rotrwi $acc07,$acc07,16
1421 xor $s0,$s0,$acc00 # ^= ROTATE(r8^r2^r0,24)
1422 rotrwi $acc08,$acc08,8
1424 rotrwi $acc09,$acc09,8
1426 rotrwi $acc10,$acc10,8
1428 rotrwi $acc11,$acc11,8
1429 xor $s0,$s0,$acc04 # ^= ROTATE(r8^r4^r0,16)
1433 xor $s0,$s0,$acc08 # ^= ROTATE(r8,8)
1447 .byte 0,12,0x14,0,0,0,0,0
1448 .size .AES_decrypt,.-.AES_decrypt
1450 .asciz "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
1454 $code =~ s/\`([^\`]*)\`/eval $1/gem;