3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
10 # Needs more work: key setup, CBC routine...
12 # ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
13 # 128-bit key, which is ~40% better than 64-bit code generated by gcc
14 # 4.0. But these are not the ones currently used! Their "compact"
15 # counterparts are, for security reason. ppc_AES_encrypt_compact runs
16 # at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
17 # at 1/3 of ppc_AES_decrypt.
21 # Rescheduling instructions to favour Power6 pipeline gave 10%
22 # performance improvement on the platfrom in question (and marginal
23 # improvement even on others). It should be noted that Power6 fails
24 # to process byte in 18 cycles, only in 23, because it fails to issue
25 # 4 load instructions in two cycles, only in 3. As result non-compact
26 # block subroutines are 25% slower than one would expect. Compact
27 # functions scale better, because they have pure computational part,
28 # which scales perfectly with clock frequency. To be specific
29 # ppc_AES_encrypt_compact operates at 42 cycles per byte, while
30 # ppc_AES_decrypt_compact - at 55 (in 64-bit build).
34 if ($flavour =~ /64/) {
40 } elsif ($flavour =~ /32/) {
46 } else { die "nonsense $flavour"; }
48 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
49 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
50 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
51 die "can't locate ppc-xlate.pl";
53 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
59 while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
103 # stay away from TLS pointer
104 if ($SIZE_T==8) { die if ($t1 ne "r13"); $t1="r0"; }
105 else { die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0"; }
117 mflr $Tbl0 ; vvvvv "distance" between . and 1st data entry
118 addi $Tbl0,$Tbl0,`128-8`
122 .byte 0,12,0x14,0,0,0,0,0
127 mflr $Tbl0 ; vvvvvvvv "distance" between . and 1st data entry
128 addi $Tbl0,$Tbl0,`128-64-8+2048+256`
132 .byte 0,12,0x14,0,0,0,0,0
136 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
137 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
138 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
139 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
140 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
141 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
142 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
143 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
144 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
145 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
146 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
147 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
148 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
149 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
150 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
151 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
152 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
153 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
154 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
155 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
156 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
157 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
158 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
159 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
160 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
161 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
162 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
163 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
164 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
165 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
166 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
167 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
168 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
169 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
170 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
171 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
172 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
173 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
174 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
175 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
176 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
177 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
178 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
179 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
180 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
181 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
182 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
183 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
184 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
185 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
186 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
187 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
188 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
189 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
190 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
191 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
192 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
193 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
194 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
195 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
196 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
197 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
198 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
199 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
201 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
202 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
203 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
204 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
205 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
206 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
207 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
208 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
209 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
210 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
211 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
212 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
213 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
214 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
215 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
216 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
217 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
218 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
219 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
220 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
221 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
222 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
223 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
224 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
225 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
226 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
227 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
228 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
229 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
230 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
231 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
232 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
235 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
236 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
237 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
238 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
239 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
240 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
241 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
242 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
243 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
244 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
245 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
246 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
247 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
248 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
249 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
250 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
251 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
252 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
253 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
254 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
255 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
256 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
257 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
258 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
259 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
260 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
261 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
262 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
263 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
264 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
265 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
266 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
267 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
268 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
269 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
270 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
271 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
272 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
273 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
274 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
275 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
276 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
277 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
278 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
279 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
280 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
281 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
282 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
283 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
284 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
285 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
286 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
287 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
288 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
289 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
290 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
291 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
292 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
293 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
294 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
295 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
296 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
297 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
298 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
300 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
301 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
302 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
303 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
304 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
305 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
306 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
307 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
308 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
309 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
310 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
311 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
312 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
313 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
314 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
315 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
316 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
317 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
318 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
319 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
320 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
321 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
322 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
323 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
324 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
325 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
326 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
327 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
328 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
329 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
330 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
331 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
337 $STU $sp,-$FRAME($sp)
340 $PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
341 $PUSH r13,`$FRAME-$SIZE_T*19`($sp)
342 $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
343 $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
344 $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
345 $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
346 $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
347 $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
348 $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
349 $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
350 $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
351 $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
352 $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
353 $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
354 $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
355 $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
356 $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
357 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
358 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
359 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
360 $PUSH r0,`$FRAME+$LRSAVE`($sp)
373 bl Lppc_AES_encrypt_compact
383 andi. $t0,$t0,4096-16
385 andi. $t1,$t1,4096-16
386 bne Lenc_unaligned_ok
405 insrwi $s0,$acc00,8,0
406 insrwi $s1,$acc04,8,0
407 insrwi $s0,$acc01,8,8
408 insrwi $s1,$acc05,8,8
409 insrwi $s0,$acc02,8,16
410 insrwi $s1,$acc06,8,16
411 insrwi $s2,$acc08,8,0
412 insrwi $s3,$acc12,8,0
413 insrwi $s2,$acc09,8,8
414 insrwi $s3,$acc13,8,8
415 insrwi $s2,$acc10,8,16
416 insrwi $s3,$acc14,8,16
419 bl Lppc_AES_encrypt_compact
421 extrwi $acc00,$s0,8,0
422 extrwi $acc01,$s0,8,8
424 extrwi $acc02,$s0,8,16
427 extrwi $acc04,$s1,8,0
429 extrwi $acc05,$s1,8,8
431 extrwi $acc06,$s1,8,16
434 extrwi $acc08,$s2,8,0
436 extrwi $acc09,$s2,8,8
438 extrwi $acc10,$s2,8,16
441 extrwi $acc12,$s3,8,0
443 extrwi $acc13,$s3,8,8
445 extrwi $acc14,$s3,8,16
451 $POP r0,`$FRAME+$LRSAVE`($sp)
452 $POP $toc,`$FRAME-$SIZE_T*20`($sp)
453 $POP r13,`$FRAME-$SIZE_T*19`($sp)
454 $POP r14,`$FRAME-$SIZE_T*18`($sp)
455 $POP r15,`$FRAME-$SIZE_T*17`($sp)
456 $POP r16,`$FRAME-$SIZE_T*16`($sp)
457 $POP r17,`$FRAME-$SIZE_T*15`($sp)
458 $POP r18,`$FRAME-$SIZE_T*14`($sp)
459 $POP r19,`$FRAME-$SIZE_T*13`($sp)
460 $POP r20,`$FRAME-$SIZE_T*12`($sp)
461 $POP r21,`$FRAME-$SIZE_T*11`($sp)
462 $POP r22,`$FRAME-$SIZE_T*10`($sp)
463 $POP r23,`$FRAME-$SIZE_T*9`($sp)
464 $POP r24,`$FRAME-$SIZE_T*8`($sp)
465 $POP r25,`$FRAME-$SIZE_T*7`($sp)
466 $POP r26,`$FRAME-$SIZE_T*6`($sp)
467 $POP r27,`$FRAME-$SIZE_T*5`($sp)
468 $POP r28,`$FRAME-$SIZE_T*4`($sp)
469 $POP r29,`$FRAME-$SIZE_T*3`($sp)
470 $POP r30,`$FRAME-$SIZE_T*2`($sp)
471 $POP r31,`$FRAME-$SIZE_T*1`($sp)
476 .byte 0,12,4,1,0x80,18,3,0
489 addi $acc00,$acc00,-1
498 rlwinm $acc00,$s0,`32-24+3`,21,28
499 rlwinm $acc01,$s1,`32-24+3`,21,28
500 rlwinm $acc02,$s2,`32-24+3`,21,28
501 rlwinm $acc03,$s3,`32-24+3`,21,28
504 rlwinm $acc04,$s1,`32-16+3`,21,28
505 rlwinm $acc05,$s2,`32-16+3`,21,28
508 rlwinm $acc06,$s3,`32-16+3`,21,28
509 rlwinm $acc07,$s0,`32-16+3`,21,28
510 lwzx $acc00,$Tbl0,$acc00
511 lwzx $acc01,$Tbl0,$acc01
512 rlwinm $acc08,$s2,`32-8+3`,21,28
513 rlwinm $acc09,$s3,`32-8+3`,21,28
514 lwzx $acc02,$Tbl0,$acc02
515 lwzx $acc03,$Tbl0,$acc03
516 rlwinm $acc10,$s0,`32-8+3`,21,28
517 rlwinm $acc11,$s1,`32-8+3`,21,28
518 lwzx $acc04,$Tbl1,$acc04
519 lwzx $acc05,$Tbl1,$acc05
520 rlwinm $acc12,$s3,`0+3`,21,28
521 rlwinm $acc13,$s0,`0+3`,21,28
522 lwzx $acc06,$Tbl1,$acc06
523 lwzx $acc07,$Tbl1,$acc07
524 rlwinm $acc14,$s1,`0+3`,21,28
525 rlwinm $acc15,$s2,`0+3`,21,28
526 lwzx $acc08,$Tbl2,$acc08
527 lwzx $acc09,$Tbl2,$acc09
530 lwzx $acc10,$Tbl2,$acc10
531 lwzx $acc11,$Tbl2,$acc11
534 lwzx $acc12,$Tbl3,$acc12
535 lwzx $acc13,$Tbl3,$acc13
538 lwzx $acc14,$Tbl3,$acc14
539 lwzx $acc15,$Tbl3,$acc15
553 addi $Tbl2,$Tbl0,2048
557 rlwinm $acc00,$s0,`32-24`,24,31
558 rlwinm $acc01,$s1,`32-24`,24,31
561 rlwinm $acc02,$s2,`32-24`,24,31
562 rlwinm $acc03,$s3,`32-24`,24,31
563 lwz $acc08,`2048+0`($Tbl0) ! prefetch Te4
564 lwz $acc09,`2048+32`($Tbl0)
565 rlwinm $acc04,$s1,`32-16`,24,31
566 rlwinm $acc05,$s2,`32-16`,24,31
567 lwz $acc10,`2048+64`($Tbl0)
568 lwz $acc11,`2048+96`($Tbl0)
569 rlwinm $acc06,$s3,`32-16`,24,31
570 rlwinm $acc07,$s0,`32-16`,24,31
571 lwz $acc12,`2048+128`($Tbl0)
572 lwz $acc13,`2048+160`($Tbl0)
573 rlwinm $acc08,$s2,`32-8`,24,31
574 rlwinm $acc09,$s3,`32-8`,24,31
575 lwz $acc14,`2048+192`($Tbl0)
576 lwz $acc15,`2048+224`($Tbl0)
577 rlwinm $acc10,$s0,`32-8`,24,31
578 rlwinm $acc11,$s1,`32-8`,24,31
579 lbzx $acc00,$Tbl2,$acc00
580 lbzx $acc01,$Tbl2,$acc01
581 rlwinm $acc12,$s3,`0`,24,31
582 rlwinm $acc13,$s0,`0`,24,31
583 lbzx $acc02,$Tbl2,$acc02
584 lbzx $acc03,$Tbl2,$acc03
585 rlwinm $acc14,$s1,`0`,24,31
586 rlwinm $acc15,$s2,`0`,24,31
587 lbzx $acc04,$Tbl2,$acc04
588 lbzx $acc05,$Tbl2,$acc05
589 rlwinm $s0,$acc00,24,0,7
590 rlwinm $s1,$acc01,24,0,7
591 lbzx $acc06,$Tbl2,$acc06
592 lbzx $acc07,$Tbl2,$acc07
593 rlwinm $s2,$acc02,24,0,7
594 rlwinm $s3,$acc03,24,0,7
595 lbzx $acc08,$Tbl2,$acc08
596 lbzx $acc09,$Tbl2,$acc09
597 rlwimi $s0,$acc04,16,8,15
598 rlwimi $s1,$acc05,16,8,15
599 lbzx $acc10,$Tbl2,$acc10
600 lbzx $acc11,$Tbl2,$acc11
601 rlwimi $s2,$acc06,16,8,15
602 rlwimi $s3,$acc07,16,8,15
603 lbzx $acc12,$Tbl2,$acc12
604 lbzx $acc13,$Tbl2,$acc13
605 rlwimi $s0,$acc08,8,16,23
606 rlwimi $s1,$acc09,8,16,23
607 lbzx $acc14,$Tbl2,$acc14
608 lbzx $acc15,$Tbl2,$acc15
609 rlwimi $s2,$acc10,8,16,23
610 rlwimi $s3,$acc11,8,16,23
621 .byte 0,12,0x14,0,0,0,0,0
624 Lppc_AES_encrypt_compact:
630 addi $Tbl1,$Tbl0,2048
634 ori $mask80,$mask80,0x8080
635 ori $mask1b,$mask1b,0x1b1b
643 rlwinm $acc00,$s0,`32-24`,24,31
644 rlwinm $acc01,$s1,`32-24`,24,31
645 rlwinm $acc02,$s2,`32-24`,24,31
646 rlwinm $acc03,$s3,`32-24`,24,31
647 rlwinm $acc04,$s1,`32-16`,24,31
648 rlwinm $acc05,$s2,`32-16`,24,31
649 rlwinm $acc06,$s3,`32-16`,24,31
650 rlwinm $acc07,$s0,`32-16`,24,31
651 lbzx $acc00,$Tbl1,$acc00
652 lbzx $acc01,$Tbl1,$acc01
653 rlwinm $acc08,$s2,`32-8`,24,31
654 rlwinm $acc09,$s3,`32-8`,24,31
655 lbzx $acc02,$Tbl1,$acc02
656 lbzx $acc03,$Tbl1,$acc03
657 rlwinm $acc10,$s0,`32-8`,24,31
658 rlwinm $acc11,$s1,`32-8`,24,31
659 lbzx $acc04,$Tbl1,$acc04
660 lbzx $acc05,$Tbl1,$acc05
661 rlwinm $acc12,$s3,`0`,24,31
662 rlwinm $acc13,$s0,`0`,24,31
663 lbzx $acc06,$Tbl1,$acc06
664 lbzx $acc07,$Tbl1,$acc07
665 rlwinm $acc14,$s1,`0`,24,31
666 rlwinm $acc15,$s2,`0`,24,31
667 lbzx $acc08,$Tbl1,$acc08
668 lbzx $acc09,$Tbl1,$acc09
669 rlwinm $s0,$acc00,24,0,7
670 rlwinm $s1,$acc01,24,0,7
671 lbzx $acc10,$Tbl1,$acc10
672 lbzx $acc11,$Tbl1,$acc11
673 rlwinm $s2,$acc02,24,0,7
674 rlwinm $s3,$acc03,24,0,7
675 lbzx $acc12,$Tbl1,$acc12
676 lbzx $acc13,$Tbl1,$acc13
677 rlwimi $s0,$acc04,16,8,15
678 rlwimi $s1,$acc05,16,8,15
679 lbzx $acc14,$Tbl1,$acc14
680 lbzx $acc15,$Tbl1,$acc15
681 rlwimi $s2,$acc06,16,8,15
682 rlwimi $s3,$acc07,16,8,15
683 rlwimi $s0,$acc08,8,16,23
684 rlwimi $s1,$acc09,8,16,23
685 rlwimi $s2,$acc10,8,16,23
686 rlwimi $s3,$acc11,8,16,23
697 bdz Lenc_compact_done
699 and $acc00,$s0,$mask80 # r1=r0&0x80808080
700 and $acc01,$s1,$mask80
701 and $acc02,$s2,$mask80
702 and $acc03,$s3,$mask80
703 srwi $acc04,$acc00,7 # r1>>7
707 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
708 andc $acc09,$s1,$mask80
709 andc $acc10,$s2,$mask80
710 andc $acc11,$s3,$mask80
711 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
712 sub $acc01,$acc01,$acc05
713 sub $acc02,$acc02,$acc06
714 sub $acc03,$acc03,$acc07
715 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
716 add $acc09,$acc09,$acc09
717 add $acc10,$acc10,$acc10
718 add $acc11,$acc11,$acc11
719 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
720 and $acc01,$acc01,$mask1b
721 and $acc02,$acc02,$mask1b
722 and $acc03,$acc03,$mask1b
723 xor $acc00,$acc00,$acc08 # r2
724 xor $acc01,$acc01,$acc09
725 xor $acc02,$acc02,$acc10
726 xor $acc03,$acc03,$acc11
728 rotlwi $acc12,$s0,16 # ROTATE(r0,16)
732 xor $s0,$s0,$acc00 # r0^r2
736 rotrwi $s0,$s0,24 # ROTATE(r2^r0,24)
740 xor $s0,$s0,$acc00 # ROTATE(r2^r0,24)^r2
744 rotlwi $acc08,$acc12,8 # ROTATE(r0,24)
745 rotlwi $acc09,$acc13,8
746 rotlwi $acc10,$acc14,8
747 rotlwi $acc11,$acc15,8
766 .byte 0,12,0x14,0,0,0,0,0
771 $STU $sp,-$FRAME($sp)
774 $PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
775 $PUSH r13,`$FRAME-$SIZE_T*19`($sp)
776 $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
777 $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
778 $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
779 $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
780 $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
781 $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
782 $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
783 $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
784 $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
785 $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
786 $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
787 $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
788 $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
789 $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
790 $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
791 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
792 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
793 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
794 $PUSH r0,`$FRAME+$LRSAVE`($sp)
807 bl Lppc_AES_decrypt_compact
817 andi. $t0,$t0,4096-16
819 andi. $t1,$t1,4096-16
820 bne Ldec_unaligned_ok
839 insrwi $s0,$acc00,8,0
840 insrwi $s1,$acc04,8,0
841 insrwi $s0,$acc01,8,8
842 insrwi $s1,$acc05,8,8
843 insrwi $s0,$acc02,8,16
844 insrwi $s1,$acc06,8,16
845 insrwi $s2,$acc08,8,0
846 insrwi $s3,$acc12,8,0
847 insrwi $s2,$acc09,8,8
848 insrwi $s3,$acc13,8,8
849 insrwi $s2,$acc10,8,16
850 insrwi $s3,$acc14,8,16
853 bl Lppc_AES_decrypt_compact
855 extrwi $acc00,$s0,8,0
856 extrwi $acc01,$s0,8,8
858 extrwi $acc02,$s0,8,16
861 extrwi $acc04,$s1,8,0
863 extrwi $acc05,$s1,8,8
865 extrwi $acc06,$s1,8,16
868 extrwi $acc08,$s2,8,0
870 extrwi $acc09,$s2,8,8
872 extrwi $acc10,$s2,8,16
875 extrwi $acc12,$s3,8,0
877 extrwi $acc13,$s3,8,8
879 extrwi $acc14,$s3,8,16
885 $POP r0,`$FRAME+$LRSAVE`($sp)
886 $POP $toc,`$FRAME-$SIZE_T*20`($sp)
887 $POP r13,`$FRAME-$SIZE_T*19`($sp)
888 $POP r14,`$FRAME-$SIZE_T*18`($sp)
889 $POP r15,`$FRAME-$SIZE_T*17`($sp)
890 $POP r16,`$FRAME-$SIZE_T*16`($sp)
891 $POP r17,`$FRAME-$SIZE_T*15`($sp)
892 $POP r18,`$FRAME-$SIZE_T*14`($sp)
893 $POP r19,`$FRAME-$SIZE_T*13`($sp)
894 $POP r20,`$FRAME-$SIZE_T*12`($sp)
895 $POP r21,`$FRAME-$SIZE_T*11`($sp)
896 $POP r22,`$FRAME-$SIZE_T*10`($sp)
897 $POP r23,`$FRAME-$SIZE_T*9`($sp)
898 $POP r24,`$FRAME-$SIZE_T*8`($sp)
899 $POP r25,`$FRAME-$SIZE_T*7`($sp)
900 $POP r26,`$FRAME-$SIZE_T*6`($sp)
901 $POP r27,`$FRAME-$SIZE_T*5`($sp)
902 $POP r28,`$FRAME-$SIZE_T*4`($sp)
903 $POP r29,`$FRAME-$SIZE_T*3`($sp)
904 $POP r30,`$FRAME-$SIZE_T*2`($sp)
905 $POP r31,`$FRAME-$SIZE_T*1`($sp)
910 .byte 0,12,4,1,0x80,18,3,0
923 addi $acc00,$acc00,-1
932 rlwinm $acc00,$s0,`32-24+3`,21,28
933 rlwinm $acc01,$s1,`32-24+3`,21,28
934 rlwinm $acc02,$s2,`32-24+3`,21,28
935 rlwinm $acc03,$s3,`32-24+3`,21,28
938 rlwinm $acc04,$s3,`32-16+3`,21,28
939 rlwinm $acc05,$s0,`32-16+3`,21,28
942 rlwinm $acc06,$s1,`32-16+3`,21,28
943 rlwinm $acc07,$s2,`32-16+3`,21,28
944 lwzx $acc00,$Tbl0,$acc00
945 lwzx $acc01,$Tbl0,$acc01
946 rlwinm $acc08,$s2,`32-8+3`,21,28
947 rlwinm $acc09,$s3,`32-8+3`,21,28
948 lwzx $acc02,$Tbl0,$acc02
949 lwzx $acc03,$Tbl0,$acc03
950 rlwinm $acc10,$s0,`32-8+3`,21,28
951 rlwinm $acc11,$s1,`32-8+3`,21,28
952 lwzx $acc04,$Tbl1,$acc04
953 lwzx $acc05,$Tbl1,$acc05
954 rlwinm $acc12,$s1,`0+3`,21,28
955 rlwinm $acc13,$s2,`0+3`,21,28
956 lwzx $acc06,$Tbl1,$acc06
957 lwzx $acc07,$Tbl1,$acc07
958 rlwinm $acc14,$s3,`0+3`,21,28
959 rlwinm $acc15,$s0,`0+3`,21,28
960 lwzx $acc08,$Tbl2,$acc08
961 lwzx $acc09,$Tbl2,$acc09
964 lwzx $acc10,$Tbl2,$acc10
965 lwzx $acc11,$Tbl2,$acc11
968 lwzx $acc12,$Tbl3,$acc12
969 lwzx $acc13,$Tbl3,$acc13
972 lwzx $acc14,$Tbl3,$acc14
973 lwzx $acc15,$Tbl3,$acc15
987 addi $Tbl2,$Tbl0,2048
991 rlwinm $acc00,$s0,`32-24`,24,31
992 rlwinm $acc01,$s1,`32-24`,24,31
995 rlwinm $acc02,$s2,`32-24`,24,31
996 rlwinm $acc03,$s3,`32-24`,24,31
997 lwz $acc08,`2048+0`($Tbl0) ! prefetch Td4
998 lwz $acc09,`2048+32`($Tbl0)
999 rlwinm $acc04,$s3,`32-16`,24,31
1000 rlwinm $acc05,$s0,`32-16`,24,31
1001 lwz $acc10,`2048+64`($Tbl0)
1002 lwz $acc11,`2048+96`($Tbl0)
1003 lbzx $acc00,$Tbl2,$acc00
1004 lbzx $acc01,$Tbl2,$acc01
1005 lwz $acc12,`2048+128`($Tbl0)
1006 lwz $acc13,`2048+160`($Tbl0)
1007 rlwinm $acc06,$s1,`32-16`,24,31
1008 rlwinm $acc07,$s2,`32-16`,24,31
1009 lwz $acc14,`2048+192`($Tbl0)
1010 lwz $acc15,`2048+224`($Tbl0)
1011 rlwinm $acc08,$s2,`32-8`,24,31
1012 rlwinm $acc09,$s3,`32-8`,24,31
1013 lbzx $acc02,$Tbl2,$acc02
1014 lbzx $acc03,$Tbl2,$acc03
1015 rlwinm $acc10,$s0,`32-8`,24,31
1016 rlwinm $acc11,$s1,`32-8`,24,31
1017 lbzx $acc04,$Tbl2,$acc04
1018 lbzx $acc05,$Tbl2,$acc05
1019 rlwinm $acc12,$s1,`0`,24,31
1020 rlwinm $acc13,$s2,`0`,24,31
1021 lbzx $acc06,$Tbl2,$acc06
1022 lbzx $acc07,$Tbl2,$acc07
1023 rlwinm $acc14,$s3,`0`,24,31
1024 rlwinm $acc15,$s0,`0`,24,31
1025 lbzx $acc08,$Tbl2,$acc08
1026 lbzx $acc09,$Tbl2,$acc09
1027 rlwinm $s0,$acc00,24,0,7
1028 rlwinm $s1,$acc01,24,0,7
1029 lbzx $acc10,$Tbl2,$acc10
1030 lbzx $acc11,$Tbl2,$acc11
1031 rlwinm $s2,$acc02,24,0,7
1032 rlwinm $s3,$acc03,24,0,7
1033 lbzx $acc12,$Tbl2,$acc12
1034 lbzx $acc13,$Tbl2,$acc13
1035 rlwimi $s0,$acc04,16,8,15
1036 rlwimi $s1,$acc05,16,8,15
1037 lbzx $acc14,$Tbl2,$acc14
1038 lbzx $acc15,$Tbl2,$acc15
1039 rlwimi $s2,$acc06,16,8,15
1040 rlwimi $s3,$acc07,16,8,15
1041 rlwimi $s0,$acc08,8,16,23
1042 rlwimi $s1,$acc09,8,16,23
1043 rlwimi $s2,$acc10,8,16,23
1044 rlwimi $s3,$acc11,8,16,23
1055 .byte 0,12,0x14,0,0,0,0,0
1058 Lppc_AES_decrypt_compact:
1059 lwz $acc00,240($key)
1064 addi $Tbl1,$Tbl0,2048
1068 ori $mask80,$mask80,0x8080
1069 ori $mask1b,$mask1b,0x1b1b
1071 $code.=<<___ if ($SIZE_T==8);
1072 insrdi $mask80,$mask80,32,0
1073 insrdi $mask1b,$mask1b,32,0
1083 rlwinm $acc00,$s0,`32-24`,24,31
1084 rlwinm $acc01,$s1,`32-24`,24,31
1085 rlwinm $acc02,$s2,`32-24`,24,31
1086 rlwinm $acc03,$s3,`32-24`,24,31
1087 rlwinm $acc04,$s3,`32-16`,24,31
1088 rlwinm $acc05,$s0,`32-16`,24,31
1089 rlwinm $acc06,$s1,`32-16`,24,31
1090 rlwinm $acc07,$s2,`32-16`,24,31
1091 lbzx $acc00,$Tbl1,$acc00
1092 lbzx $acc01,$Tbl1,$acc01
1093 rlwinm $acc08,$s2,`32-8`,24,31
1094 rlwinm $acc09,$s3,`32-8`,24,31
1095 lbzx $acc02,$Tbl1,$acc02
1096 lbzx $acc03,$Tbl1,$acc03
1097 rlwinm $acc10,$s0,`32-8`,24,31
1098 rlwinm $acc11,$s1,`32-8`,24,31
1099 lbzx $acc04,$Tbl1,$acc04
1100 lbzx $acc05,$Tbl1,$acc05
1101 rlwinm $acc12,$s1,`0`,24,31
1102 rlwinm $acc13,$s2,`0`,24,31
1103 lbzx $acc06,$Tbl1,$acc06
1104 lbzx $acc07,$Tbl1,$acc07
1105 rlwinm $acc14,$s3,`0`,24,31
1106 rlwinm $acc15,$s0,`0`,24,31
1107 lbzx $acc08,$Tbl1,$acc08
1108 lbzx $acc09,$Tbl1,$acc09
1109 rlwinm $s0,$acc00,24,0,7
1110 rlwinm $s1,$acc01,24,0,7
1111 lbzx $acc10,$Tbl1,$acc10
1112 lbzx $acc11,$Tbl1,$acc11
1113 rlwinm $s2,$acc02,24,0,7
1114 rlwinm $s3,$acc03,24,0,7
1115 lbzx $acc12,$Tbl1,$acc12
1116 lbzx $acc13,$Tbl1,$acc13
1117 rlwimi $s0,$acc04,16,8,15
1118 rlwimi $s1,$acc05,16,8,15
1119 lbzx $acc14,$Tbl1,$acc14
1120 lbzx $acc15,$Tbl1,$acc15
1121 rlwimi $s2,$acc06,16,8,15
1122 rlwimi $s3,$acc07,16,8,15
1123 rlwimi $s0,$acc08,8,16,23
1124 rlwimi $s1,$acc09,8,16,23
1125 rlwimi $s2,$acc10,8,16,23
1126 rlwimi $s3,$acc11,8,16,23
1137 bdz Ldec_compact_done
1139 $code.=<<___ if ($SIZE_T==8);
1140 # vectorized permutation improves decrypt performance by 10%
1144 and $acc00,$s0,$mask80 # r1=r0&0x80808080
1145 and $acc02,$s2,$mask80
1146 srdi $acc04,$acc00,7 # r1>>7
1147 srdi $acc06,$acc02,7
1148 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
1149 andc $acc10,$s2,$mask80
1150 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
1151 sub $acc02,$acc02,$acc06
1152 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
1153 add $acc10,$acc10,$acc10
1154 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1155 and $acc02,$acc02,$mask1b
1156 xor $acc00,$acc00,$acc08 # r2
1157 xor $acc02,$acc02,$acc10
1159 and $acc04,$acc00,$mask80 # r1=r2&0x80808080
1160 and $acc06,$acc02,$mask80
1161 srdi $acc08,$acc04,7 # r1>>7
1162 srdi $acc10,$acc06,7
1163 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
1164 andc $acc14,$acc02,$mask80
1165 sub $acc04,$acc04,$acc08 # r1-(r1>>7)
1166 sub $acc06,$acc06,$acc10
1167 add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1
1168 add $acc14,$acc14,$acc14
1169 and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1170 and $acc06,$acc06,$mask1b
1171 xor $acc04,$acc04,$acc12 # r4
1172 xor $acc06,$acc06,$acc14
1174 and $acc08,$acc04,$mask80 # r1=r4&0x80808080
1175 and $acc10,$acc06,$mask80
1176 srdi $acc12,$acc08,7 # r1>>7
1177 srdi $acc14,$acc10,7
1178 sub $acc08,$acc08,$acc12 # r1-(r1>>7)
1179 sub $acc10,$acc10,$acc14
1180 andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f
1181 andc $acc14,$acc06,$mask80
1182 add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1
1183 add $acc14,$acc14,$acc14
1184 and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1185 and $acc10,$acc10,$mask1b
1186 xor $acc08,$acc08,$acc12 # r8
1187 xor $acc10,$acc10,$acc14
1189 xor $acc00,$acc00,$s0 # r2^r0
1190 xor $acc02,$acc02,$s2
1191 xor $acc04,$acc04,$s0 # r4^r0
1192 xor $acc06,$acc06,$s2
1194 extrdi $acc01,$acc00,32,0
1195 extrdi $acc03,$acc02,32,0
1196 extrdi $acc05,$acc04,32,0
1197 extrdi $acc07,$acc06,32,0
1198 extrdi $acc09,$acc08,32,0
1199 extrdi $acc11,$acc10,32,0
1201 $code.=<<___ if ($SIZE_T==4);
1202 and $acc00,$s0,$mask80 # r1=r0&0x80808080
1203 and $acc01,$s1,$mask80
1204 and $acc02,$s2,$mask80
1205 and $acc03,$s3,$mask80
1206 srwi $acc04,$acc00,7 # r1>>7
1207 srwi $acc05,$acc01,7
1208 srwi $acc06,$acc02,7
1209 srwi $acc07,$acc03,7
1210 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
1211 andc $acc09,$s1,$mask80
1212 andc $acc10,$s2,$mask80
1213 andc $acc11,$s3,$mask80
1214 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
1215 sub $acc01,$acc01,$acc05
1216 sub $acc02,$acc02,$acc06
1217 sub $acc03,$acc03,$acc07
1218 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
1219 add $acc09,$acc09,$acc09
1220 add $acc10,$acc10,$acc10
1221 add $acc11,$acc11,$acc11
1222 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1223 and $acc01,$acc01,$mask1b
1224 and $acc02,$acc02,$mask1b
1225 and $acc03,$acc03,$mask1b
1226 xor $acc00,$acc00,$acc08 # r2
1227 xor $acc01,$acc01,$acc09
1228 xor $acc02,$acc02,$acc10
1229 xor $acc03,$acc03,$acc11
1231 and $acc04,$acc00,$mask80 # r1=r2&0x80808080
1232 and $acc05,$acc01,$mask80
1233 and $acc06,$acc02,$mask80
1234 and $acc07,$acc03,$mask80
1235 srwi $acc08,$acc04,7 # r1>>7
1236 srwi $acc09,$acc05,7
1237 srwi $acc10,$acc06,7
1238 srwi $acc11,$acc07,7
1239 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
1240 andc $acc13,$acc01,$mask80
1241 andc $acc14,$acc02,$mask80
1242 andc $acc15,$acc03,$mask80
1243 sub $acc04,$acc04,$acc08 # r1-(r1>>7)
1244 sub $acc05,$acc05,$acc09
1245 sub $acc06,$acc06,$acc10
1246 sub $acc07,$acc07,$acc11
1247 add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1
1248 add $acc13,$acc13,$acc13
1249 add $acc14,$acc14,$acc14
1250 add $acc15,$acc15,$acc15
1251 and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1252 and $acc05,$acc05,$mask1b
1253 and $acc06,$acc06,$mask1b
1254 and $acc07,$acc07,$mask1b
1255 xor $acc04,$acc04,$acc12 # r4
1256 xor $acc05,$acc05,$acc13
1257 xor $acc06,$acc06,$acc14
1258 xor $acc07,$acc07,$acc15
1260 and $acc08,$acc04,$mask80 # r1=r4&0x80808080
1261 and $acc09,$acc05,$mask80
1262 and $acc10,$acc06,$mask80
1263 and $acc11,$acc07,$mask80
1264 srwi $acc12,$acc08,7 # r1>>7
1265 srwi $acc13,$acc09,7
1266 srwi $acc14,$acc10,7
1267 srwi $acc15,$acc11,7
1268 sub $acc08,$acc08,$acc12 # r1-(r1>>7)
1269 sub $acc09,$acc09,$acc13
1270 sub $acc10,$acc10,$acc14
1271 sub $acc11,$acc11,$acc15
1272 andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f
1273 andc $acc13,$acc05,$mask80
1274 andc $acc14,$acc06,$mask80
1275 andc $acc15,$acc07,$mask80
1276 add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1
1277 add $acc13,$acc13,$acc13
1278 add $acc14,$acc14,$acc14
1279 add $acc15,$acc15,$acc15
1280 and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1281 and $acc09,$acc09,$mask1b
1282 and $acc10,$acc10,$mask1b
1283 and $acc11,$acc11,$mask1b
1284 xor $acc08,$acc08,$acc12 # r8
1285 xor $acc09,$acc09,$acc13
1286 xor $acc10,$acc10,$acc14
1287 xor $acc11,$acc11,$acc15
1289 xor $acc00,$acc00,$s0 # r2^r0
1290 xor $acc01,$acc01,$s1
1291 xor $acc02,$acc02,$s2
1292 xor $acc03,$acc03,$s3
1293 xor $acc04,$acc04,$s0 # r4^r0
1294 xor $acc05,$acc05,$s1
1295 xor $acc06,$acc06,$s2
1296 xor $acc07,$acc07,$s3
1299 rotrwi $s0,$s0,8 # = ROTATE(r0,8)
1303 xor $s0,$s0,$acc00 # ^= r2^r0
1307 xor $acc00,$acc00,$acc08
1308 xor $acc01,$acc01,$acc09
1309 xor $acc02,$acc02,$acc10
1310 xor $acc03,$acc03,$acc11
1311 xor $s0,$s0,$acc04 # ^= r4^r0
1315 rotrwi $acc00,$acc00,24
1316 rotrwi $acc01,$acc01,24
1317 rotrwi $acc02,$acc02,24
1318 rotrwi $acc03,$acc03,24
1319 xor $acc04,$acc04,$acc08
1320 xor $acc05,$acc05,$acc09
1321 xor $acc06,$acc06,$acc10
1322 xor $acc07,$acc07,$acc11
1323 xor $s0,$s0,$acc08 # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
1327 rotrwi $acc04,$acc04,16
1328 rotrwi $acc05,$acc05,16
1329 rotrwi $acc06,$acc06,16
1330 rotrwi $acc07,$acc07,16
1331 xor $s0,$s0,$acc00 # ^= ROTATE(r8^r2^r0,24)
1335 rotrwi $acc08,$acc08,8
1336 rotrwi $acc09,$acc09,8
1337 rotrwi $acc10,$acc10,8
1338 rotrwi $acc11,$acc11,8
1339 xor $s0,$s0,$acc04 # ^= ROTATE(r8^r4^r0,16)
1343 xor $s0,$s0,$acc08 # ^= ROTATE(r8,8)
1357 .byte 0,12,0x14,0,0,0,0,0
1359 .asciz "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
1363 $code =~ s/\`([^\`]*)\`/eval $1/gem;