3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
10 # Needs more work: key setup, CBC routine...
12 # ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
13 # 128-bit key, which is ~40% better than 64-bit code generated by gcc
14 # 4.0. But these are not the ones currently used! Their "compact"
15 # counterparts are, for security reason. ppc_AES_encrypt_compact runs
16 # at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
17 # at 1/3 of ppc_AES_decrypt.
21 # Rescheduling instructions to favour Power6 pipeline gave 10%
22 # performance improvement on the platfrom in question (and marginal
23 # improvement even on others). It should be noted that Power6 fails
24 # to process byte in 18 cycles, only in 23, because it fails to issue
25 # 4 load instructions in two cycles, only in 3. As result non-compact
26 # block subroutines are 25% slower than one would expect. Compact
27 # functions scale better, because they have pure computational part,
28 # which scales perfectly with clock frequency. To be specific
29 # ppc_AES_encrypt_compact operates at 42 cycles per byte, while
30 # ppc_AES_decrypt_compact - at 55 (in 64-bit build).
34 if ($flavour =~ /64/) {
40 } elsif ($flavour =~ /32/) {
46 } else { die "nonsense $flavour"; }
48 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
49 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
50 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
51 die "can't locate ppc-xlate.pl";
53 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
59 while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
71 $Tbl3=$out; # stay away from "r2"; $out is offloaded to stack
79 $t1="r0"; # stay away from "r13";
114 mflr $Tbl0 ; vvvvv "distance" between . and 1st data entry
115 addi $Tbl0,$Tbl0,`128-8`
119 .byte 0,12,0x14,0,0,0,0,0
124 mflr $Tbl0 ; vvvvvvvv "distance" between . and 1st data entry
125 addi $Tbl0,$Tbl0,`128-64-8+2048+256`
129 .byte 0,12,0x14,0,0,0,0,0
133 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
134 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
135 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
136 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
137 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
138 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
139 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
140 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
141 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
142 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
143 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
144 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
145 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
146 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
147 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
148 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
149 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
150 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
151 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
152 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
153 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
154 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
155 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
156 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
157 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
158 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
159 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
160 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
161 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
162 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
163 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
164 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
165 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
166 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
167 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
168 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
169 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
170 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
171 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
172 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
173 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
174 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
175 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
176 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
177 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
178 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
179 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
180 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
181 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
182 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
183 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
184 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
185 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
186 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
187 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
188 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
189 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
190 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
191 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
192 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
193 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
194 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
195 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
196 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
198 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
199 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
200 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
201 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
202 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
203 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
204 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
205 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
206 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
207 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
208 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
209 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
210 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
211 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
212 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
213 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
214 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
215 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
216 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
217 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
218 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
219 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
220 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
221 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
222 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
223 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
224 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
225 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
226 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
227 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
228 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
229 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
232 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
233 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
234 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
235 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
236 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
237 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
238 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
239 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
240 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
241 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
242 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
243 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
244 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
245 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
246 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
247 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
248 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
249 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
250 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
251 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
252 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
253 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
254 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
255 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
256 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
257 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
258 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
259 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
260 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
261 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
262 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
263 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
264 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
265 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
266 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
267 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
268 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
269 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
270 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
271 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
272 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
273 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
274 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
275 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
276 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
277 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
278 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
279 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
280 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
281 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
282 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
283 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
284 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
285 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
286 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
287 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
288 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
289 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
290 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
291 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
292 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
293 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
294 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
295 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
297 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
298 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
299 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
300 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
301 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
302 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
303 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
304 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
305 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
306 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
307 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
308 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
309 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
310 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
311 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
312 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
313 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
314 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
315 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
316 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
317 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
318 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
319 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
320 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
321 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
322 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
323 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
324 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
325 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
326 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
327 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
328 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
334 $STU $sp,-$FRAME($sp)
337 $PUSH $out,`$FRAME-$SIZE_T*19`($sp)
338 $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
339 $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
340 $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
341 $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
342 $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
343 $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
344 $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
345 $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
346 $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
347 $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
348 $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
349 $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
350 $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
351 $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
352 $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
353 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
354 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
355 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
356 $PUSH r0,`$FRAME+$LRSAVE`($sp)
369 bl Lppc_AES_encrypt_compact
370 $POP $out,`$FRAME-$SIZE_T*19`($sp)
380 andi. $t0,$t0,4096-16
382 andi. $t1,$t1,4096-16
383 bne Lenc_unaligned_ok
397 insrwi $s0,$acc00,8,0
399 insrwi $s1,$acc04,8,0
401 insrwi $s0,$acc01,8,8
403 insrwi $s1,$acc05,8,8
405 insrwi $s0,$acc02,8,16
407 insrwi $s1,$acc06,8,16
408 insrwi $s2,$acc08,8,0
409 insrwi $s3,$acc12,8,0
410 insrwi $s2,$acc09,8,8
411 insrwi $s3,$acc13,8,8
412 insrwi $s2,$acc10,8,16
413 insrwi $s3,$acc14,8,16
416 bl Lppc_AES_encrypt_compact
417 $POP $out,`$FRAME-$SIZE_T*19`($sp)
419 extrwi $acc00,$s0,8,0
420 extrwi $acc01,$s0,8,8
422 extrwi $acc02,$s0,8,16
425 extrwi $acc04,$s1,8,0
427 extrwi $acc05,$s1,8,8
429 extrwi $acc06,$s1,8,16
432 extrwi $acc08,$s2,8,0
434 extrwi $acc09,$s2,8,8
436 extrwi $acc10,$s2,8,16
439 extrwi $acc12,$s3,8,0
441 extrwi $acc13,$s3,8,8
443 extrwi $acc14,$s3,8,16
449 $POP r0,`$FRAME+$LRSAVE`($sp)
450 $POP r14,`$FRAME-$SIZE_T*18`($sp)
451 $POP r15,`$FRAME-$SIZE_T*17`($sp)
452 $POP r16,`$FRAME-$SIZE_T*16`($sp)
453 $POP r17,`$FRAME-$SIZE_T*15`($sp)
454 $POP r18,`$FRAME-$SIZE_T*14`($sp)
455 $POP r19,`$FRAME-$SIZE_T*13`($sp)
456 $POP r20,`$FRAME-$SIZE_T*12`($sp)
457 $POP r21,`$FRAME-$SIZE_T*11`($sp)
458 $POP r22,`$FRAME-$SIZE_T*10`($sp)
459 $POP r23,`$FRAME-$SIZE_T*9`($sp)
460 $POP r24,`$FRAME-$SIZE_T*8`($sp)
461 $POP r25,`$FRAME-$SIZE_T*7`($sp)
462 $POP r26,`$FRAME-$SIZE_T*6`($sp)
463 $POP r27,`$FRAME-$SIZE_T*5`($sp)
464 $POP r28,`$FRAME-$SIZE_T*4`($sp)
465 $POP r29,`$FRAME-$SIZE_T*3`($sp)
466 $POP r30,`$FRAME-$SIZE_T*2`($sp)
467 $POP r31,`$FRAME-$SIZE_T*1`($sp)
472 .byte 0,12,4,1,0x80,18,3,0
474 .size .AES_encrypt,.-.AES_encrypt
485 addi $acc00,$acc00,-1
495 rlwinm $acc00,$s0,`32-24+3`,21,28
496 rlwinm $acc01,$s1,`32-24+3`,21,28
497 rlwinm $acc02,$s2,`32-24+3`,21,28
498 rlwinm $acc03,$s3,`32-24+3`,21,28
500 rlwinm $acc04,$s1,`32-16+3`,21,28
502 rlwinm $acc05,$s2,`32-16+3`,21,28
504 rlwinm $acc06,$s3,`32-16+3`,21,28
506 rlwinm $acc07,$s0,`32-16+3`,21,28
507 lwzx $acc00,$Tbl0,$acc00
508 rlwinm $acc08,$s2,`32-8+3`,21,28
509 lwzx $acc01,$Tbl0,$acc01
510 rlwinm $acc09,$s3,`32-8+3`,21,28
511 lwzx $acc02,$Tbl0,$acc02
512 rlwinm $acc10,$s0,`32-8+3`,21,28
513 lwzx $acc03,$Tbl0,$acc03
514 rlwinm $acc11,$s1,`32-8+3`,21,28
515 lwzx $acc04,$Tbl1,$acc04
516 rlwinm $acc12,$s3,`0+3`,21,28
517 lwzx $acc05,$Tbl1,$acc05
518 rlwinm $acc13,$s0,`0+3`,21,28
519 lwzx $acc06,$Tbl1,$acc06
520 rlwinm $acc14,$s1,`0+3`,21,28
521 lwzx $acc07,$Tbl1,$acc07
522 rlwinm $acc15,$s2,`0+3`,21,28
523 lwzx $acc08,$Tbl2,$acc08
525 lwzx $acc09,$Tbl2,$acc09
527 lwzx $acc10,$Tbl2,$acc10
529 lwzx $acc11,$Tbl2,$acc11
531 lwzx $acc12,$Tbl3,$acc12
533 lwzx $acc13,$Tbl3,$acc13
535 lwzx $acc14,$Tbl3,$acc14
537 lwzx $acc15,$Tbl3,$acc15
550 addi $Tbl2,$Tbl0,2048
553 rlwinm $acc00,$s0,`32-24`,24,31
555 rlwinm $acc01,$s1,`32-24`,24,31
557 rlwinm $acc02,$s2,`32-24`,24,31
559 rlwinm $acc03,$s3,`32-24`,24,31
560 lwz $acc08,`2048+0`($Tbl0) ! prefetch Te4
561 rlwinm $acc04,$s1,`32-16`,24,31
562 lwz $acc09,`2048+32`($Tbl0)
563 rlwinm $acc05,$s2,`32-16`,24,31
564 lwz $acc10,`2048+64`($Tbl0)
565 rlwinm $acc06,$s3,`32-16`,24,31
566 lwz $acc11,`2048+96`($Tbl0)
567 rlwinm $acc07,$s0,`32-16`,24,31
568 lwz $acc12,`2048+128`($Tbl0)
569 rlwinm $acc08,$s2,`32-8`,24,31
570 lwz $acc13,`2048+160`($Tbl0)
571 rlwinm $acc09,$s3,`32-8`,24,31
572 lwz $acc14,`2048+192`($Tbl0)
573 rlwinm $acc10,$s0,`32-8`,24,31
574 lwz $acc15,`2048+224`($Tbl0)
575 rlwinm $acc11,$s1,`32-8`,24,31
576 lbzx $acc00,$Tbl2,$acc00
577 rlwinm $acc12,$s3,`0`,24,31
578 lbzx $acc01,$Tbl2,$acc01
579 rlwinm $acc13,$s0,`0`,24,31
580 lbzx $acc02,$Tbl2,$acc02
581 rlwinm $acc14,$s1,`0`,24,31
582 lbzx $acc03,$Tbl2,$acc03
583 rlwinm $acc15,$s2,`0`,24,31
584 lbzx $acc04,$Tbl2,$acc04
585 rlwinm $s0,$acc00,24,0,7
586 lbzx $acc05,$Tbl2,$acc05
587 rlwinm $s1,$acc01,24,0,7
588 lbzx $acc06,$Tbl2,$acc06
589 rlwinm $s2,$acc02,24,0,7
590 lbzx $acc07,$Tbl2,$acc07
591 rlwinm $s3,$acc03,24,0,7
592 lbzx $acc08,$Tbl2,$acc08
593 rlwimi $s0,$acc04,16,8,15
594 lbzx $acc09,$Tbl2,$acc09
595 rlwimi $s1,$acc05,16,8,15
596 lbzx $acc10,$Tbl2,$acc10
597 rlwimi $s2,$acc06,16,8,15
598 lbzx $acc11,$Tbl2,$acc11
599 rlwimi $s3,$acc07,16,8,15
600 lbzx $acc12,$Tbl2,$acc12
601 rlwimi $s0,$acc08,8,16,23
602 lbzx $acc13,$Tbl2,$acc13
603 rlwimi $s1,$acc09,8,16,23
604 lbzx $acc14,$Tbl2,$acc14
605 rlwimi $s2,$acc10,8,16,23
606 lbzx $acc15,$Tbl2,$acc15
607 rlwimi $s3,$acc11,8,16,23
618 .byte 0,12,0x14,0,0,0,0,0
621 Lppc_AES_encrypt_compact:
623 addi $Tbl1,$Tbl0,2048
629 ori $mask80,$mask80,0x8080
631 ori $mask1b,$mask1b,0x1b1b
638 rlwinm $acc00,$s0,`32-24`,24,31
640 rlwinm $acc01,$s1,`32-24`,24,31
642 rlwinm $acc02,$s2,`32-24`,24,31
643 rlwinm $acc03,$s3,`32-24`,24,31
644 rlwinm $acc04,$s1,`32-16`,24,31
645 rlwinm $acc05,$s2,`32-16`,24,31
646 rlwinm $acc06,$s3,`32-16`,24,31
647 rlwinm $acc07,$s0,`32-16`,24,31
648 lbzx $acc00,$Tbl1,$acc00
649 rlwinm $acc08,$s2,`32-8`,24,31
650 lbzx $acc01,$Tbl1,$acc01
651 rlwinm $acc09,$s3,`32-8`,24,31
652 lbzx $acc02,$Tbl1,$acc02
653 rlwinm $acc10,$s0,`32-8`,24,31
654 lbzx $acc03,$Tbl1,$acc03
655 rlwinm $acc11,$s1,`32-8`,24,31
656 lbzx $acc04,$Tbl1,$acc04
657 rlwinm $acc12,$s3,`0`,24,31
658 lbzx $acc05,$Tbl1,$acc05
659 rlwinm $acc13,$s0,`0`,24,31
660 lbzx $acc06,$Tbl1,$acc06
661 rlwinm $acc14,$s1,`0`,24,31
662 lbzx $acc07,$Tbl1,$acc07
663 rlwinm $acc15,$s2,`0`,24,31
664 lbzx $acc08,$Tbl1,$acc08
665 rlwinm $s0,$acc00,24,0,7
666 lbzx $acc09,$Tbl1,$acc09
667 rlwinm $s1,$acc01,24,0,7
668 lbzx $acc10,$Tbl1,$acc10
669 rlwinm $s2,$acc02,24,0,7
670 lbzx $acc11,$Tbl1,$acc11
671 rlwinm $s3,$acc03,24,0,7
672 lbzx $acc12,$Tbl1,$acc12
673 rlwimi $s0,$acc04,16,8,15
674 lbzx $acc13,$Tbl1,$acc13
675 rlwimi $s1,$acc05,16,8,15
676 lbzx $acc14,$Tbl1,$acc14
677 rlwimi $s2,$acc06,16,8,15
678 lbzx $acc15,$Tbl1,$acc15
679 rlwimi $s3,$acc07,16,8,15
680 rlwimi $s0,$acc08,8,16,23
681 rlwimi $s1,$acc09,8,16,23
682 rlwimi $s2,$acc10,8,16,23
683 rlwimi $s3,$acc11,8,16,23
694 bdz Lenc_compact_done
696 and $acc00,$s0,$mask80 # r1=r0&0x80808080
697 and $acc01,$s1,$mask80
698 and $acc02,$s2,$mask80
699 and $acc03,$s3,$mask80
700 srwi $acc04,$acc00,7 # r1>>7
701 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
703 andc $acc09,$s1,$mask80
705 andc $acc10,$s2,$mask80
707 andc $acc11,$s3,$mask80
708 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
709 sub $acc01,$acc01,$acc05
710 sub $acc02,$acc02,$acc06
711 sub $acc03,$acc03,$acc07
712 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
713 add $acc09,$acc09,$acc09
714 add $acc10,$acc10,$acc10
715 add $acc11,$acc11,$acc11
716 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
717 and $acc01,$acc01,$mask1b
718 and $acc02,$acc02,$mask1b
719 and $acc03,$acc03,$mask1b
720 xor $acc00,$acc00,$acc08 # r2
721 xor $acc01,$acc01,$acc09
722 rotlwi $acc12,$s0,16 # ROTATE(r0,16)
723 xor $acc02,$acc02,$acc10
725 xor $acc03,$acc03,$acc11
728 xor $s0,$s0,$acc00 # r0^r2
731 rotrwi $s0,$s0,24 # ROTATE(r2^r0,24)
736 xor $s0,$s0,$acc00 # ROTATE(r2^r0,24)^r2
741 rotlwi $acc08,$acc12,8 # ROTATE(r0,24)
743 rotlwi $acc09,$acc13,8
745 rotlwi $acc10,$acc14,8
747 rotlwi $acc11,$acc15,8
763 .byte 0,12,0x14,0,0,0,0,0
768 $STU $sp,-$FRAME($sp)
771 $PUSH $out,`$FRAME-$SIZE_T*19`($sp)
772 $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
773 $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
774 $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
775 $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
776 $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
777 $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
778 $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
779 $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
780 $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
781 $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
782 $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
783 $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
784 $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
785 $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
786 $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
787 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
788 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
789 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
790 $PUSH r0,`$FRAME+$LRSAVE`($sp)
803 bl Lppc_AES_decrypt_compact
804 $POP $out,`$FRAME-$SIZE_T*19`($sp)
814 andi. $t0,$t0,4096-16
816 andi. $t1,$t1,4096-16
817 bne Ldec_unaligned_ok
831 insrwi $s0,$acc00,8,0
833 insrwi $s1,$acc04,8,0
835 insrwi $s0,$acc01,8,8
837 insrwi $s1,$acc05,8,8
839 insrwi $s0,$acc02,8,16
841 insrwi $s1,$acc06,8,16
842 insrwi $s2,$acc08,8,0
843 insrwi $s3,$acc12,8,0
844 insrwi $s2,$acc09,8,8
845 insrwi $s3,$acc13,8,8
846 insrwi $s2,$acc10,8,16
847 insrwi $s3,$acc14,8,16
850 bl Lppc_AES_decrypt_compact
851 $POP $out,`$FRAME-$SIZE_T*19`($sp)
853 extrwi $acc00,$s0,8,0
854 extrwi $acc01,$s0,8,8
856 extrwi $acc02,$s0,8,16
859 extrwi $acc04,$s1,8,0
861 extrwi $acc05,$s1,8,8
863 extrwi $acc06,$s1,8,16
866 extrwi $acc08,$s2,8,0
868 extrwi $acc09,$s2,8,8
870 extrwi $acc10,$s2,8,16
873 extrwi $acc12,$s3,8,0
875 extrwi $acc13,$s3,8,8
877 extrwi $acc14,$s3,8,16
883 $POP r0,`$FRAME+$LRSAVE`($sp)
884 $POP r14,`$FRAME-$SIZE_T*18`($sp)
885 $POP r15,`$FRAME-$SIZE_T*17`($sp)
886 $POP r16,`$FRAME-$SIZE_T*16`($sp)
887 $POP r17,`$FRAME-$SIZE_T*15`($sp)
888 $POP r18,`$FRAME-$SIZE_T*14`($sp)
889 $POP r19,`$FRAME-$SIZE_T*13`($sp)
890 $POP r20,`$FRAME-$SIZE_T*12`($sp)
891 $POP r21,`$FRAME-$SIZE_T*11`($sp)
892 $POP r22,`$FRAME-$SIZE_T*10`($sp)
893 $POP r23,`$FRAME-$SIZE_T*9`($sp)
894 $POP r24,`$FRAME-$SIZE_T*8`($sp)
895 $POP r25,`$FRAME-$SIZE_T*7`($sp)
896 $POP r26,`$FRAME-$SIZE_T*6`($sp)
897 $POP r27,`$FRAME-$SIZE_T*5`($sp)
898 $POP r28,`$FRAME-$SIZE_T*4`($sp)
899 $POP r29,`$FRAME-$SIZE_T*3`($sp)
900 $POP r30,`$FRAME-$SIZE_T*2`($sp)
901 $POP r31,`$FRAME-$SIZE_T*1`($sp)
906 .byte 0,12,4,1,0x80,18,3,0
908 .size .AES_decrypt,.-.AES_decrypt
919 addi $acc00,$acc00,-1
929 rlwinm $acc00,$s0,`32-24+3`,21,28
930 rlwinm $acc01,$s1,`32-24+3`,21,28
931 rlwinm $acc02,$s2,`32-24+3`,21,28
932 rlwinm $acc03,$s3,`32-24+3`,21,28
934 rlwinm $acc04,$s3,`32-16+3`,21,28
936 rlwinm $acc05,$s0,`32-16+3`,21,28
938 rlwinm $acc06,$s1,`32-16+3`,21,28
940 rlwinm $acc07,$s2,`32-16+3`,21,28
941 lwzx $acc00,$Tbl0,$acc00
942 rlwinm $acc08,$s2,`32-8+3`,21,28
943 lwzx $acc01,$Tbl0,$acc01
944 rlwinm $acc09,$s3,`32-8+3`,21,28
945 lwzx $acc02,$Tbl0,$acc02
946 rlwinm $acc10,$s0,`32-8+3`,21,28
947 lwzx $acc03,$Tbl0,$acc03
948 rlwinm $acc11,$s1,`32-8+3`,21,28
949 lwzx $acc04,$Tbl1,$acc04
950 rlwinm $acc12,$s1,`0+3`,21,28
951 lwzx $acc05,$Tbl1,$acc05
952 rlwinm $acc13,$s2,`0+3`,21,28
953 lwzx $acc06,$Tbl1,$acc06
954 rlwinm $acc14,$s3,`0+3`,21,28
955 lwzx $acc07,$Tbl1,$acc07
956 rlwinm $acc15,$s0,`0+3`,21,28
957 lwzx $acc08,$Tbl2,$acc08
959 lwzx $acc09,$Tbl2,$acc09
961 lwzx $acc10,$Tbl2,$acc10
963 lwzx $acc11,$Tbl2,$acc11
965 lwzx $acc12,$Tbl3,$acc12
967 lwzx $acc13,$Tbl3,$acc13
969 lwzx $acc14,$Tbl3,$acc14
971 lwzx $acc15,$Tbl3,$acc15
984 addi $Tbl2,$Tbl0,2048
987 rlwinm $acc00,$s0,`32-24`,24,31
989 rlwinm $acc01,$s1,`32-24`,24,31
991 rlwinm $acc02,$s2,`32-24`,24,31
993 rlwinm $acc03,$s3,`32-24`,24,31
994 lwz $acc08,`2048+0`($Tbl0) ! prefetch Td4
995 rlwinm $acc04,$s3,`32-16`,24,31
996 lwz $acc09,`2048+32`($Tbl0)
997 rlwinm $acc05,$s0,`32-16`,24,31
998 lwz $acc10,`2048+64`($Tbl0)
999 lbzx $acc00,$Tbl2,$acc00
1000 lwz $acc11,`2048+96`($Tbl0)
1001 lbzx $acc01,$Tbl2,$acc01
1002 lwz $acc12,`2048+128`($Tbl0)
1003 rlwinm $acc06,$s1,`32-16`,24,31
1004 lwz $acc13,`2048+160`($Tbl0)
1005 rlwinm $acc07,$s2,`32-16`,24,31
1006 lwz $acc14,`2048+192`($Tbl0)
1007 rlwinm $acc08,$s2,`32-8`,24,31
1008 lwz $acc15,`2048+224`($Tbl0)
1009 rlwinm $acc09,$s3,`32-8`,24,31
1010 lbzx $acc02,$Tbl2,$acc02
1011 rlwinm $acc10,$s0,`32-8`,24,31
1012 lbzx $acc03,$Tbl2,$acc03
1013 rlwinm $acc11,$s1,`32-8`,24,31
1014 lbzx $acc04,$Tbl2,$acc04
1015 rlwinm $acc12,$s1,`0`,24,31
1016 lbzx $acc05,$Tbl2,$acc05
1017 rlwinm $acc13,$s2,`0`,24,31
1018 lbzx $acc06,$Tbl2,$acc06
1019 rlwinm $acc14,$s3,`0`,24,31
1020 lbzx $acc07,$Tbl2,$acc07
1021 rlwinm $acc15,$s0,`0`,24,31
1022 lbzx $acc08,$Tbl2,$acc08
1023 rlwinm $s0,$acc00,24,0,7
1024 lbzx $acc09,$Tbl2,$acc09
1025 rlwinm $s1,$acc01,24,0,7
1026 lbzx $acc10,$Tbl2,$acc10
1027 rlwinm $s2,$acc02,24,0,7
1028 lbzx $acc11,$Tbl2,$acc11
1029 rlwinm $s3,$acc03,24,0,7
1030 lbzx $acc12,$Tbl2,$acc12
1031 rlwimi $s0,$acc04,16,8,15
1032 lbzx $acc13,$Tbl2,$acc13
1033 rlwimi $s1,$acc05,16,8,15
1034 lbzx $acc14,$Tbl2,$acc14
1035 rlwimi $s2,$acc06,16,8,15
1036 lbzx $acc15,$Tbl2,$acc15
1037 rlwimi $s3,$acc07,16,8,15
1038 rlwimi $s0,$acc08,8,16,23
1039 rlwimi $s1,$acc09,8,16,23
1040 rlwimi $s2,$acc10,8,16,23
1041 rlwimi $s3,$acc11,8,16,23
1052 .byte 0,12,0x14,0,0,0,0,0
1055 Lppc_AES_decrypt_compact:
1056 lwz $acc00,240($key)
1057 addi $Tbl1,$Tbl0,2048
1063 ori $mask80,$mask80,0x8080
1065 ori $mask1b,$mask1b,0x1b1b
1068 $code.=<<___ if ($SIZE_T==8);
1069 insrdi $mask80,$mask80,32,0
1070 insrdi $mask1b,$mask1b,32,0
1078 rlwinm $acc00,$s0,`32-24`,24,31
1080 rlwinm $acc01,$s1,`32-24`,24,31
1082 rlwinm $acc02,$s2,`32-24`,24,31
1083 rlwinm $acc03,$s3,`32-24`,24,31
1084 rlwinm $acc04,$s3,`32-16`,24,31
1085 rlwinm $acc05,$s0,`32-16`,24,31
1086 rlwinm $acc06,$s1,`32-16`,24,31
1087 rlwinm $acc07,$s2,`32-16`,24,31
1088 lbzx $acc00,$Tbl1,$acc00
1089 rlwinm $acc08,$s2,`32-8`,24,31
1090 lbzx $acc01,$Tbl1,$acc01
1091 rlwinm $acc09,$s3,`32-8`,24,31
1092 lbzx $acc02,$Tbl1,$acc02
1093 rlwinm $acc10,$s0,`32-8`,24,31
1094 lbzx $acc03,$Tbl1,$acc03
1095 rlwinm $acc11,$s1,`32-8`,24,31
1096 lbzx $acc04,$Tbl1,$acc04
1097 rlwinm $acc12,$s1,`0`,24,31
1098 lbzx $acc05,$Tbl1,$acc05
1099 rlwinm $acc13,$s2,`0`,24,31
1100 lbzx $acc06,$Tbl1,$acc06
1101 rlwinm $acc14,$s3,`0`,24,31
1102 lbzx $acc07,$Tbl1,$acc07
1103 rlwinm $acc15,$s0,`0`,24,31
1104 lbzx $acc08,$Tbl1,$acc08
1105 rlwinm $s0,$acc00,24,0,7
1106 lbzx $acc09,$Tbl1,$acc09
1107 rlwinm $s1,$acc01,24,0,7
1108 lbzx $acc10,$Tbl1,$acc10
1109 rlwinm $s2,$acc02,24,0,7
1110 lbzx $acc11,$Tbl1,$acc11
1111 rlwinm $s3,$acc03,24,0,7
1112 lbzx $acc12,$Tbl1,$acc12
1113 rlwimi $s0,$acc04,16,8,15
1114 lbzx $acc13,$Tbl1,$acc13
1115 rlwimi $s1,$acc05,16,8,15
1116 lbzx $acc14,$Tbl1,$acc14
1117 rlwimi $s2,$acc06,16,8,15
1118 lbzx $acc15,$Tbl1,$acc15
1119 rlwimi $s3,$acc07,16,8,15
1120 rlwimi $s0,$acc08,8,16,23
1121 rlwimi $s1,$acc09,8,16,23
1122 rlwimi $s2,$acc10,8,16,23
1123 rlwimi $s3,$acc11,8,16,23
1134 bdz Ldec_compact_done
1136 $code.=<<___ if ($SIZE_T==8);
1137 # vectorized permutation improves decrypt performance by 10%
1141 and $acc00,$s0,$mask80 # r1=r0&0x80808080
1142 and $acc02,$s2,$mask80
1143 srdi $acc04,$acc00,7 # r1>>7
1144 srdi $acc06,$acc02,7
1145 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
1146 andc $acc10,$s2,$mask80
1147 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
1148 sub $acc02,$acc02,$acc06
1149 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
1150 add $acc10,$acc10,$acc10
1151 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1152 and $acc02,$acc02,$mask1b
1153 xor $acc00,$acc00,$acc08 # r2
1154 xor $acc02,$acc02,$acc10
1156 and $acc04,$acc00,$mask80 # r1=r2&0x80808080
1157 and $acc06,$acc02,$mask80
1158 srdi $acc08,$acc04,7 # r1>>7
1159 srdi $acc10,$acc06,7
1160 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
1161 andc $acc14,$acc02,$mask80
1162 sub $acc04,$acc04,$acc08 # r1-(r1>>7)
1163 sub $acc06,$acc06,$acc10
1164 add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1
1165 add $acc14,$acc14,$acc14
1166 and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1167 and $acc06,$acc06,$mask1b
1168 xor $acc04,$acc04,$acc12 # r4
1169 xor $acc06,$acc06,$acc14
1171 and $acc08,$acc04,$mask80 # r1=r4&0x80808080
1172 and $acc10,$acc06,$mask80
1173 srdi $acc12,$acc08,7 # r1>>7
1174 srdi $acc14,$acc10,7
1175 sub $acc08,$acc08,$acc12 # r1-(r1>>7)
1176 sub $acc10,$acc10,$acc14
1177 andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f
1178 andc $acc14,$acc06,$mask80
1179 add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1
1180 add $acc14,$acc14,$acc14
1181 and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1182 and $acc10,$acc10,$mask1b
1183 xor $acc08,$acc08,$acc12 # r8
1184 xor $acc10,$acc10,$acc14
1186 xor $acc00,$acc00,$s0 # r2^r0
1187 xor $acc02,$acc02,$s2
1188 xor $acc04,$acc04,$s0 # r4^r0
1189 xor $acc06,$acc06,$s2
1191 extrdi $acc01,$acc00,32,0
1192 extrdi $acc03,$acc02,32,0
1193 extrdi $acc05,$acc04,32,0
1194 extrdi $acc07,$acc06,32,0
1195 extrdi $acc09,$acc08,32,0
1196 extrdi $acc11,$acc10,32,0
1198 $code.=<<___ if ($SIZE_T==4);
1199 and $acc00,$s0,$mask80 # r1=r0&0x80808080
1200 and $acc01,$s1,$mask80
1201 and $acc02,$s2,$mask80
1202 and $acc03,$s3,$mask80
1203 srwi $acc04,$acc00,7 # r1>>7
1204 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
1205 srwi $acc05,$acc01,7
1206 andc $acc09,$s1,$mask80
1207 srwi $acc06,$acc02,7
1208 andc $acc10,$s2,$mask80
1209 srwi $acc07,$acc03,7
1210 andc $acc11,$s3,$mask80
1211 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
1212 sub $acc01,$acc01,$acc05
1213 sub $acc02,$acc02,$acc06
1214 sub $acc03,$acc03,$acc07
1215 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
1216 add $acc09,$acc09,$acc09
1217 add $acc10,$acc10,$acc10
1218 add $acc11,$acc11,$acc11
1219 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1220 and $acc01,$acc01,$mask1b
1221 and $acc02,$acc02,$mask1b
1222 and $acc03,$acc03,$mask1b
1223 xor $acc00,$acc00,$acc08 # r2
1224 xor $acc01,$acc01,$acc09
1225 xor $acc02,$acc02,$acc10
1226 xor $acc03,$acc03,$acc11
1228 and $acc04,$acc00,$mask80 # r1=r2&0x80808080
1229 and $acc05,$acc01,$mask80
1230 and $acc06,$acc02,$mask80
1231 and $acc07,$acc03,$mask80
1232 srwi $acc08,$acc04,7 # r1>>7
1233 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
1234 srwi $acc09,$acc05,7
1235 andc $acc13,$acc01,$mask80
1236 srwi $acc10,$acc06,7
1237 andc $acc14,$acc02,$mask80
1238 srwi $acc11,$acc07,7
1239 andc $acc15,$acc03,$mask80
1240 sub $acc04,$acc04,$acc08 # r1-(r1>>7)
1241 sub $acc05,$acc05,$acc09
1242 sub $acc06,$acc06,$acc10
1243 sub $acc07,$acc07,$acc11
1244 add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1
1245 add $acc13,$acc13,$acc13
1246 add $acc14,$acc14,$acc14
1247 add $acc15,$acc15,$acc15
1248 and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1249 and $acc05,$acc05,$mask1b
1250 and $acc06,$acc06,$mask1b
1251 and $acc07,$acc07,$mask1b
1252 xor $acc04,$acc04,$acc12 # r4
1253 xor $acc05,$acc05,$acc13
1254 xor $acc06,$acc06,$acc14
1255 xor $acc07,$acc07,$acc15
1257 and $acc08,$acc04,$mask80 # r1=r4&0x80808080
1258 and $acc09,$acc05,$mask80
1259 srwi $acc12,$acc08,7 # r1>>7
1260 and $acc10,$acc06,$mask80
1261 srwi $acc13,$acc09,7
1262 and $acc11,$acc07,$mask80
1263 srwi $acc14,$acc10,7
1264 sub $acc08,$acc08,$acc12 # r1-(r1>>7)
1265 srwi $acc15,$acc11,7
1266 sub $acc09,$acc09,$acc13
1267 sub $acc10,$acc10,$acc14
1268 sub $acc11,$acc11,$acc15
1269 andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f
1270 andc $acc13,$acc05,$mask80
1271 andc $acc14,$acc06,$mask80
1272 andc $acc15,$acc07,$mask80
1273 add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1
1274 add $acc13,$acc13,$acc13
1275 add $acc14,$acc14,$acc14
1276 add $acc15,$acc15,$acc15
1277 and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1278 and $acc09,$acc09,$mask1b
1279 and $acc10,$acc10,$mask1b
1280 and $acc11,$acc11,$mask1b
1281 xor $acc08,$acc08,$acc12 # r8
1282 xor $acc09,$acc09,$acc13
1283 xor $acc10,$acc10,$acc14
1284 xor $acc11,$acc11,$acc15
1286 xor $acc00,$acc00,$s0 # r2^r0
1287 xor $acc01,$acc01,$s1
1288 xor $acc02,$acc02,$s2
1289 xor $acc03,$acc03,$s3
1290 xor $acc04,$acc04,$s0 # r4^r0
1291 xor $acc05,$acc05,$s1
1292 xor $acc06,$acc06,$s2
1293 xor $acc07,$acc07,$s3
1296 rotrwi $s0,$s0,8 # = ROTATE(r0,8)
1298 xor $s0,$s0,$acc00 # ^= r2^r0
1304 xor $acc00,$acc00,$acc08
1305 xor $acc01,$acc01,$acc09
1306 xor $acc02,$acc02,$acc10
1307 xor $acc03,$acc03,$acc11
1308 xor $s0,$s0,$acc04 # ^= r4^r0
1309 rotrwi $acc00,$acc00,24
1311 rotrwi $acc01,$acc01,24
1313 rotrwi $acc02,$acc02,24
1315 rotrwi $acc03,$acc03,24
1316 xor $acc04,$acc04,$acc08
1317 xor $acc05,$acc05,$acc09
1318 xor $acc06,$acc06,$acc10
1319 xor $acc07,$acc07,$acc11
1320 xor $s0,$s0,$acc08 # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
1321 rotrwi $acc04,$acc04,16
1323 rotrwi $acc05,$acc05,16
1325 rotrwi $acc06,$acc06,16
1327 rotrwi $acc07,$acc07,16
1328 xor $s0,$s0,$acc00 # ^= ROTATE(r8^r2^r0,24)
1329 rotrwi $acc08,$acc08,8
1331 rotrwi $acc09,$acc09,8
1333 rotrwi $acc10,$acc10,8
1335 rotrwi $acc11,$acc11,8
1336 xor $s0,$s0,$acc04 # ^= ROTATE(r8^r4^r0,16)
1340 xor $s0,$s0,$acc08 # ^= ROTATE(r8,8)
1354 .byte 0,12,0x14,0,0,0,0,0
1356 .asciz "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
1360 $code =~ s/\`([^\`]*)\`/eval $1/gem;