MIPS32R3 provides the EXT instruction to extract bits from
[openssl.git] / crypto / aes / asm / aes-mips.pl
1 #! /usr/bin/env perl
2 # Copyright 2010-2019 The OpenSSL Project Authors. All Rights Reserved.
3 #
4 # Licensed under the Apache License 2.0 (the "License").  You may not use
5 # this file except in compliance with the License.  You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
8
9
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
16
17 # AES for MIPS
18
19 # October 2010
20 #
21 # Code uses 1K[+256B] S-box and on single-issue core [such as R5000]
22 # spends ~68 cycles per byte processed with 128-bit key. This is ~16%
23 # faster than gcc-generated code, which is not very impressive. But
24 # recall that compressed S-box requires extra processing, namely
25 # additional rotations. Rotations are implemented with lwl/lwr pairs,
26 # which is normally used for loading unaligned data. Another cool
27 # thing about this module is its endian neutrality, which means that
28 # it processes data without ever changing byte order...
29
30 # September 2012
31 #
32 # Add MIPS32R2 (~10% less instructions) and SmartMIPS ASE (further
33 # ~25% less instructions) code. Note that there is no run-time switch,
34 # instead, code path is chosen upon pre-process time, pass -mips32r2
35 # or/and -msmartmips.
36
37 # February 2019
38 #
39 # Normalize MIPS32R2 AES table address calculation by always using EXT
40 # instruction. This reduces the standard codebase by another 10%. 
41
42 ######################################################################
43 # There is a number of MIPS ABI in use, O32 and N32/64 are most
44 # widely used. Then there is a new contender: NUBI. It appears that if
45 # one picks the latter, it's possible to arrange code in ABI neutral
46 # manner. Therefore let's stick to NUBI register layout:
47 #
48 ($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
49 ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
50 ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
51 ($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
52 #
53 # The return value is placed in $a0. Following coding rules facilitate
54 # interoperability:
55 #
56 # - never ever touch $tp, "thread pointer", former $gp;
57 # - copy return value to $t0, former $v0 [or to $a0 if you're adapting
58 #   old code];
59 # - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
60 #
61 # For reference here is register layout for N32/64 MIPS ABIs:
62 #
63 # ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
64 # ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
65 # ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
66 # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
67 # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
68 #
69 $flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
70
71 if ($flavour =~ /64|n32/i) {
72         $PTR_LA="dla";
73         $PTR_ADD="daddu";       # incidentally works even on n32
74         $PTR_SUB="dsubu";       # incidentally works even on n32
75         $PTR_INS="dins";
76         $REG_S="sd";
77         $REG_L="ld";
78         $PTR_SLL="dsll";        # incidentally works even on n32
79         $SZREG=8;
80 } else {
81         $PTR_LA="la";
82         $PTR_ADD="addu";
83         $PTR_SUB="subu";
84         $PTR_INS="ins";
85         $REG_S="sw";
86         $REG_L="lw";
87         $PTR_SLL="sll";
88         $SZREG=4;
89 }
90 $pf = ($flavour =~ /nubi/i) ? $t0 : $t2;
91 #
92 # <appro@openssl.org>
93 #
94 ######################################################################
95
96 $big_endian=(`echo MIPSEB | $ENV{CC} -E -`=~/MIPSEB/)?0:1 if ($ENV{CC});
97
98 for (@ARGV) {   $output=$_ if (/\w[\w\-]*\.\w+$/);      }
99 open STDOUT,">$output";
100
101 if (!defined($big_endian))
102 {    $big_endian=(unpack('L',pack('N',1))==1);   }
103
104 while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
105 open STDOUT,">$output";
106
107 my ($MSB,$LSB)=(0,3);   # automatically converted to little-endian
108
109 $code.=<<___;
110 #include "mips_arch.h"
111
112 .text
113 #if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__))
114 .option pic2
115 #endif
116 .set    noat
117 ___
118 \f
119 {{{
120 my $FRAMESIZE=16*$SZREG;
121 my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc0fff008" : "0xc0ff0000";
122
123 my ($inp,$out,$key,$Tbl,$s0,$s1,$s2,$s3)=($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7);
124 my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
125 my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$t9,$t10,$t11) = map("\$$_",(12..23));
126 my ($key0,$cnt)=($gp,$fp);
127
128 # instruction ordering is "stolen" from output from MIPSpro assembler
129 # invoked with -mips3 -O3 arguments...
130 $code.=<<___;
131 .align  5
132 .ent    _mips_AES_encrypt
133 _mips_AES_encrypt:
134         .frame  $sp,0,$ra
135         .set    reorder
136         lw      $t0,0($key)
137         lw      $t1,4($key)
138         lw      $t2,8($key)
139         lw      $t3,12($key)
140         lw      $cnt,240($key)
141         $PTR_ADD $key0,$key,16
142
143         xor     $s0,$t0
144         xor     $s1,$t1
145         xor     $s2,$t2
146         xor     $s3,$t3
147
148         subu    $cnt,1
149 #if defined(__mips_smartmips)
150         ext     $i0,$s1,16,8
151 .Loop_enc:
152         ext     $i1,$s2,16,8
153         ext     $i2,$s3,16,8
154         ext     $i3,$s0,16,8
155         lwxs    $t0,$i0($Tbl)           # Te1[s1>>16]
156         ext     $i0,$s2,8,8
157         lwxs    $t1,$i1($Tbl)           # Te1[s2>>16]
158         ext     $i1,$s3,8,8
159         lwxs    $t2,$i2($Tbl)           # Te1[s3>>16]
160         ext     $i2,$s0,8,8
161         lwxs    $t3,$i3($Tbl)           # Te1[s0>>16]
162         ext     $i3,$s1,8,8
163
164         lwxs    $t4,$i0($Tbl)           # Te2[s2>>8]
165         ext     $i0,$s3,0,8
166         lwxs    $t5,$i1($Tbl)           # Te2[s3>>8]
167         ext     $i1,$s0,0,8
168         lwxs    $t6,$i2($Tbl)           # Te2[s0>>8]
169         ext     $i2,$s1,0,8
170         lwxs    $t7,$i3($Tbl)           # Te2[s1>>8]
171         ext     $i3,$s2,0,8
172
173         lwxs    $t8,$i0($Tbl)           # Te3[s3]
174         ext     $i0,$s0,24,8
175         lwxs    $t9,$i1($Tbl)           # Te3[s0]
176         ext     $i1,$s1,24,8
177         lwxs    $t10,$i2($Tbl)          # Te3[s1]
178         ext     $i2,$s2,24,8
179         lwxs    $t11,$i3($Tbl)          # Te3[s2]
180         ext     $i3,$s3,24,8
181
182         rotr    $t0,$t0,8
183         rotr    $t1,$t1,8
184         rotr    $t2,$t2,8
185         rotr    $t3,$t3,8
186
187         rotr    $t4,$t4,16
188         rotr    $t5,$t5,16
189         rotr    $t6,$t6,16
190         rotr    $t7,$t7,16
191
192         xor     $t0,$t4
193         lwxs    $t4,$i0($Tbl)           # Te0[s0>>24]
194         xor     $t1,$t5
195         lwxs    $t5,$i1($Tbl)           # Te0[s1>>24]
196         xor     $t2,$t6
197         lwxs    $t6,$i2($Tbl)           # Te0[s2>>24]
198         xor     $t3,$t7
199         lwxs    $t7,$i3($Tbl)           # Te0[s3>>24]
200
201         rotr    $t8,$t8,24
202         lw      $s0,0($key0)
203         rotr    $t9,$t9,24
204         lw      $s1,4($key0)
205         rotr    $t10,$t10,24
206         lw      $s2,8($key0)
207         rotr    $t11,$t11,24
208         lw      $s3,12($key0)
209
210         xor     $t0,$t8
211         xor     $t1,$t9
212         xor     $t2,$t10
213         xor     $t3,$t11
214
215         xor     $t0,$t4
216         xor     $t1,$t5
217         xor     $t2,$t6
218         xor     $t3,$t7
219
220         subu    $cnt,1
221         $PTR_ADD $key0,16
222         xor     $s0,$t0
223         xor     $s1,$t1
224         xor     $s2,$t2
225         xor     $s3,$t3
226         .set    noreorder
227         bnez    $cnt,.Loop_enc
228         ext     $i0,$s1,16,8
229
230         _xtr    $i0,$s1,16-2
231 #else
232 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
233         move    $i0,$Tbl
234         move    $i1,$Tbl
235         move    $i2,$Tbl
236         move    $i3,$Tbl
237         ext     $t0,$s1,16,8
238 .Loop_enc:
239         ext     $t1,$s2,16,8
240         ext     $t2,$s3,16,8
241         ext     $t3,$s0,16,8
242         $PTR_INS $i0,$t0,2,8
243         $PTR_INS $i1,$t1,2,8
244         $PTR_INS $i2,$t2,2,8
245         $PTR_INS $i3,$t3,2,8
246         lw      $t0,0($i0)              # Te1[s1>>16]
247         ext     $t4,$s2,8,8
248         lw      $t1,0($i1)              # Te1[s2>>16]
249         ext     $t5,$s3,8,8
250         lw      $t2,0($i2)              # Te1[s3>>16]
251         ext     $t6,$s0,8,8
252         lw      $t3,0($i3)              # Te1[s0>>16]
253         ext     $t7,$s1,8,8
254         $PTR_INS $i0,$t4,2,8
255         $PTR_INS $i1,$t5,2,8
256         $PTR_INS $i2,$t6,2,8
257         $PTR_INS $i3,$t7,2,8
258 #else
259         _xtr    $i0,$s1,16-2
260 .Loop_enc:
261         _xtr    $i1,$s2,16-2
262         _xtr    $i2,$s3,16-2
263         _xtr    $i3,$s0,16-2
264         and     $i0,0x3fc
265         and     $i1,0x3fc
266         and     $i2,0x3fc
267         and     $i3,0x3fc
268         $PTR_ADD $i0,$Tbl
269         $PTR_ADD $i1,$Tbl
270         $PTR_ADD $i2,$Tbl
271         $PTR_ADD $i3,$Tbl
272         lwl     $t0,3($i0)              # Te1[s1>>16]
273         lwl     $t1,3($i1)              # Te1[s2>>16]
274         lwl     $t2,3($i2)              # Te1[s3>>16]
275         lwl     $t3,3($i3)              # Te1[s0>>16]
276         lwr     $t0,2($i0)              # Te1[s1>>16]
277         _xtr    $i0,$s2,8-2
278         lwr     $t1,2($i1)              # Te1[s2>>16]
279         _xtr    $i1,$s3,8-2
280         lwr     $t2,2($i2)              # Te1[s3>>16]
281         _xtr    $i2,$s0,8-2
282         lwr     $t3,2($i3)              # Te1[s0>>16]
283         _xtr    $i3,$s1,8-2
284         and     $i0,0x3fc
285         and     $i1,0x3fc
286         and     $i2,0x3fc
287         and     $i3,0x3fc
288         $PTR_ADD $i0,$Tbl
289         $PTR_ADD $i1,$Tbl
290         $PTR_ADD $i2,$Tbl
291         $PTR_ADD $i3,$Tbl
292 #endif
293 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
294         rotr    $t0,$t0,8
295         rotr    $t1,$t1,8
296         rotr    $t2,$t2,8
297         rotr    $t3,$t3,8
298 # if defined(_MIPSEL)
299         lw      $t4,0($i0)              # Te2[s2>>8]
300         ext     $t8,$s3,0,8
301         lw      $t5,0($i1)              # Te2[s3>>8]
302         ext     $t9,$s0,0,8
303         lw      $t6,0($i2)              # Te2[s0>>8]
304         ext     $t10,$s1,0,8
305         lw      $t7,0($i3)              # Te2[s1>>8]
306         ext     $t11,$s2,0,8
307         $PTR_INS $i0,$t8,2,8
308         $PTR_INS $i1,$t9,2,8
309         $PTR_INS $i2,$t10,2,8
310         $PTR_INS $i3,$t11,2,8
311
312         lw      $t8,0($i0)              # Te3[s3]
313         $PTR_INS $i0,$s0,2,8
314         lw      $t9,0($i1)              # Te3[s0]
315         $PTR_INS $i1,$s1,2,8
316         lw      $t10,0($i2)             # Te3[s1]
317         $PTR_INS $i2,$s2,2,8
318         lw      $t11,0($i3)             # Te3[s2]
319         $PTR_INS $i3,$s3,2,8
320 # else
321         lw      $t4,0($i0)              # Te2[s2>>8]
322         $PTR_INS $i0,$s3,2,8
323         lw      $t5,0($i1)              # Te2[s3>>8]
324         $PTR_INS $i1,$s0,2,8
325         lw      $t6,0($i2)              # Te2[s0>>8]
326         $PTR_INS $i2,$s1,2,8
327         lw      $t7,0($i3)              # Te2[s1>>8]
328         $PTR_INS $i3,$s2,2,8
329
330         lw      $t8,0($i0)              # Te3[s3]
331         _xtr    $i0,$s0,24-2
332         lw      $t9,0($i1)              # Te3[s0]
333         _xtr    $i1,$s1,24-2
334         lw      $t10,0($i2)             # Te3[s1]
335         _xtr    $i2,$s2,24-2
336         lw      $t11,0($i3)             # Te3[s2]
337         _xtr    $i3,$s3,24-2
338
339         and     $i0,0x3fc
340         and     $i1,0x3fc
341         and     $i2,0x3fc
342         and     $i3,0x3fc
343         $PTR_ADD $i0,$Tbl
344         $PTR_ADD $i1,$Tbl
345         $PTR_ADD $i2,$Tbl
346         $PTR_ADD $i3,$Tbl
347 # endif
348         rotr    $t4,$t4,16
349         rotr    $t5,$t5,16
350         rotr    $t6,$t6,16
351         rotr    $t7,$t7,16
352
353         rotr    $t8,$t8,24
354         rotr    $t9,$t9,24
355         rotr    $t10,$t10,24
356         rotr    $t11,$t11,24
357 #else
358         lwl     $t4,2($i0)              # Te2[s2>>8]
359         lwl     $t5,2($i1)              # Te2[s3>>8]
360         lwl     $t6,2($i2)              # Te2[s0>>8]
361         lwl     $t7,2($i3)              # Te2[s1>>8]
362         lwr     $t4,1($i0)              # Te2[s2>>8]
363         _xtr    $i0,$s3,0-2
364         lwr     $t5,1($i1)              # Te2[s3>>8]
365         _xtr    $i1,$s0,0-2
366         lwr     $t6,1($i2)              # Te2[s0>>8]
367         _xtr    $i2,$s1,0-2
368         lwr     $t7,1($i3)              # Te2[s1>>8]
369         _xtr    $i3,$s2,0-2
370
371         and     $i0,0x3fc
372         and     $i1,0x3fc
373         and     $i2,0x3fc
374         and     $i3,0x3fc
375         $PTR_ADD $i0,$Tbl
376         $PTR_ADD $i1,$Tbl
377         $PTR_ADD $i2,$Tbl
378         $PTR_ADD $i3,$Tbl
379         lwl     $t8,1($i0)              # Te3[s3]
380         lwl     $t9,1($i1)              # Te3[s0]
381         lwl     $t10,1($i2)             # Te3[s1]
382         lwl     $t11,1($i3)             # Te3[s2]
383         lwr     $t8,0($i0)              # Te3[s3]
384         _xtr    $i0,$s0,24-2
385         lwr     $t9,0($i1)              # Te3[s0]
386         _xtr    $i1,$s1,24-2
387         lwr     $t10,0($i2)             # Te3[s1]
388         _xtr    $i2,$s2,24-2
389         lwr     $t11,0($i3)             # Te3[s2]
390         _xtr    $i3,$s3,24-2
391
392         and     $i0,0x3fc
393         and     $i1,0x3fc
394         and     $i2,0x3fc
395         and     $i3,0x3fc
396         $PTR_ADD $i0,$Tbl
397         $PTR_ADD $i1,$Tbl
398         $PTR_ADD $i2,$Tbl
399         $PTR_ADD $i3,$Tbl
400 #endif
401         xor     $t0,$t4
402         lw      $t4,0($i0)              # Te0[s0>>24]
403         xor     $t1,$t5
404         lw      $t5,0($i1)              # Te0[s1>>24]
405         xor     $t2,$t6
406         lw      $t6,0($i2)              # Te0[s2>>24]
407         xor     $t3,$t7
408         lw      $t7,0($i3)              # Te0[s3>>24]
409
410         xor     $t0,$t8
411         lw      $s0,0($key0)
412         xor     $t1,$t9
413         lw      $s1,4($key0)
414         xor     $t2,$t10
415         lw      $s2,8($key0)
416         xor     $t3,$t11
417         lw      $s3,12($key0)
418
419         xor     $t0,$t4
420         xor     $t1,$t5
421         xor     $t2,$t6
422         xor     $t3,$t7
423
424         subu    $cnt,1
425         $PTR_ADD $key0,16
426         xor     $s0,$t0
427         xor     $s1,$t1
428         xor     $s2,$t2
429         xor     $s3,$t3
430         .set    noreorder
431         bnez    $cnt,.Loop_enc
432 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
433         ext     $t0,$s1,16,8
434 #endif
435         _xtr    $i0,$s1,16-2
436 #endif
437
438         .set    reorder
439         _xtr    $i1,$s2,16-2
440         _xtr    $i2,$s3,16-2
441         _xtr    $i3,$s0,16-2
442         and     $i0,0x3fc
443         and     $i1,0x3fc
444         and     $i2,0x3fc
445         and     $i3,0x3fc
446         $PTR_ADD $i0,$Tbl
447         $PTR_ADD $i1,$Tbl
448         $PTR_ADD $i2,$Tbl
449         $PTR_ADD $i3,$Tbl
450         lbu     $t0,2($i0)              # Te4[s1>>16]
451         _xtr    $i0,$s2,8-2
452         lbu     $t1,2($i1)              # Te4[s2>>16]
453         _xtr    $i1,$s3,8-2
454         lbu     $t2,2($i2)              # Te4[s3>>16]
455         _xtr    $i2,$s0,8-2
456         lbu     $t3,2($i3)              # Te4[s0>>16]
457         _xtr    $i3,$s1,8-2
458
459         and     $i0,0x3fc
460         and     $i1,0x3fc
461         and     $i2,0x3fc
462         and     $i3,0x3fc
463         $PTR_ADD $i0,$Tbl
464         $PTR_ADD $i1,$Tbl
465         $PTR_ADD $i2,$Tbl
466         $PTR_ADD $i3,$Tbl
467 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
468 # if defined(_MIPSEL)
469         lbu     $t4,2($i0)              # Te4[s2>>8]
470         $PTR_INS $i0,$s0,2,8
471         lbu     $t5,2($i1)              # Te4[s3>>8]
472         $PTR_INS $i1,$s1,2,8
473         lbu     $t6,2($i2)              # Te4[s0>>8]
474         $PTR_INS $i2,$s2,2,8
475         lbu     $t7,2($i3)              # Te4[s1>>8]
476         $PTR_INS $i3,$s3,2,8
477
478         lbu     $t8,2($i0)              # Te4[s0>>24]
479         _xtr    $i0,$s3,0-2
480         lbu     $t9,2($i1)              # Te4[s1>>24]
481         _xtr    $i1,$s0,0-2
482         lbu     $t10,2($i2)             # Te4[s2>>24]
483         _xtr    $i2,$s1,0-2
484         lbu     $t11,2($i3)             # Te4[s3>>24]
485         _xtr    $i3,$s2,0-2
486
487         and     $i0,0x3fc
488         and     $i1,0x3fc
489         and     $i2,0x3fc
490         and     $i3,0x3fc
491         $PTR_ADD $i0,$Tbl
492         $PTR_ADD $i1,$Tbl
493         $PTR_ADD $i2,$Tbl
494         $PTR_ADD $i3,$Tbl
495 # else
496         lbu     $t4,2($i0)              # Te4[s2>>8]
497         _xtr    $i0,$s0,24-2
498         lbu     $t5,2($i1)              # Te4[s3>>8]
499         _xtr    $i1,$s1,24-2
500         lbu     $t6,2($i2)              # Te4[s0>>8]
501         _xtr    $i2,$s2,24-2
502         lbu     $t7,2($i3)              # Te4[s1>>8]
503         _xtr    $i3,$s3,24-2
504
505         and     $i0,0x3fc
506         and     $i1,0x3fc
507         and     $i2,0x3fc
508         and     $i3,0x3fc
509         $PTR_ADD $i0,$Tbl
510         $PTR_ADD $i1,$Tbl
511         $PTR_ADD $i2,$Tbl
512         $PTR_ADD $i3,$Tbl
513         lbu     $t8,2($i0)              # Te4[s0>>24]
514         $PTR_INS $i0,$s3,2,8
515         lbu     $t9,2($i1)              # Te4[s1>>24]
516         $PTR_INS $i1,$s0,2,8
517         lbu     $t10,2($i2)             # Te4[s2>>24]
518         $PTR_INS $i2,$s1,2,8
519         lbu     $t11,2($i3)             # Te4[s3>>24]
520         $PTR_INS $i3,$s2,2,8
521 # endif
522         _ins    $t0,16
523         _ins    $t1,16
524         _ins    $t2,16
525         _ins    $t3,16
526
527         _ins2   $t0,$t4,8
528         lbu     $t4,2($i0)              # Te4[s3]
529         _ins2   $t1,$t5,8
530         lbu     $t5,2($i1)              # Te4[s0]
531         _ins2   $t2,$t6,8
532         lbu     $t6,2($i2)              # Te4[s1]
533         _ins2   $t3,$t7,8
534         lbu     $t7,2($i3)              # Te4[s2]
535
536         _ins2   $t0,$t8,24
537         lw      $s0,0($key0)
538         _ins2   $t1,$t9,24
539         lw      $s1,4($key0)
540         _ins2   $t2,$t10,24
541         lw      $s2,8($key0)
542         _ins2   $t3,$t11,24
543         lw      $s3,12($key0)
544
545         _ins2   $t0,$t4,0
546         _ins2   $t1,$t5,0
547         _ins2   $t2,$t6,0
548         _ins2   $t3,$t7,0
549 #else
550         lbu     $t4,2($i0)              # Te4[s2>>8]
551         _xtr    $i0,$s0,24-2
552         lbu     $t5,2($i1)              # Te4[s3>>8]
553         _xtr    $i1,$s1,24-2
554         lbu     $t6,2($i2)              # Te4[s0>>8]
555         _xtr    $i2,$s2,24-2
556         lbu     $t7,2($i3)              # Te4[s1>>8]
557         _xtr    $i3,$s3,24-2
558
559         and     $i0,0x3fc
560         and     $i1,0x3fc
561         and     $i2,0x3fc
562         and     $i3,0x3fc
563         $PTR_ADD $i0,$Tbl
564         $PTR_ADD $i1,$Tbl
565         $PTR_ADD $i2,$Tbl
566         $PTR_ADD $i3,$Tbl
567         lbu     $t8,2($i0)              # Te4[s0>>24]
568         _xtr    $i0,$s3,0-2
569         lbu     $t9,2($i1)              # Te4[s1>>24]
570         _xtr    $i1,$s0,0-2
571         lbu     $t10,2($i2)             # Te4[s2>>24]
572         _xtr    $i2,$s1,0-2
573         lbu     $t11,2($i3)             # Te4[s3>>24]
574         _xtr    $i3,$s2,0-2
575
576         and     $i0,0x3fc
577         and     $i1,0x3fc
578         and     $i2,0x3fc
579         and     $i3,0x3fc
580         $PTR_ADD $i0,$Tbl
581         $PTR_ADD $i1,$Tbl
582         $PTR_ADD $i2,$Tbl
583         $PTR_ADD $i3,$Tbl
584
585         _ins    $t0,16
586         _ins    $t1,16
587         _ins    $t2,16
588         _ins    $t3,16
589
590         _ins    $t4,8
591         _ins    $t5,8
592         _ins    $t6,8
593         _ins    $t7,8
594
595         xor     $t0,$t4
596         lbu     $t4,2($i0)              # Te4[s3]
597         xor     $t1,$t5
598         lbu     $t5,2($i1)              # Te4[s0]
599         xor     $t2,$t6
600         lbu     $t6,2($i2)              # Te4[s1]
601         xor     $t3,$t7
602         lbu     $t7,2($i3)              # Te4[s2]
603
604         _ins    $t8,24
605         lw      $s0,0($key0)
606         _ins    $t9,24
607         lw      $s1,4($key0)
608         _ins    $t10,24
609         lw      $s2,8($key0)
610         _ins    $t11,24
611         lw      $s3,12($key0)
612
613         xor     $t0,$t8
614         xor     $t1,$t9
615         xor     $t2,$t10
616         xor     $t3,$t11
617
618         _ins    $t4,0
619         _ins    $t5,0
620         _ins    $t6,0
621         _ins    $t7,0
622
623         xor     $t0,$t4
624         xor     $t1,$t5
625         xor     $t2,$t6
626         xor     $t3,$t7
627 #endif
628         xor     $s0,$t0
629         xor     $s1,$t1
630         xor     $s2,$t2
631         xor     $s3,$t3
632
633         jr      $ra
634 .end    _mips_AES_encrypt
635
636 .align  5
637 .globl  AES_encrypt
638 .ent    AES_encrypt
639 AES_encrypt:
640         .frame  $sp,$FRAMESIZE,$ra
641         .mask   $SAVED_REGS_MASK,-$SZREG
642         .set    noreorder
643 ___
644 $code.=<<___ if ($flavour =~ /o32/i);   # o32 PIC-ification
645         .cpload $pf
646 ___
647 $code.=<<___;
648         $PTR_SUB $sp,$FRAMESIZE
649         $REG_S  $ra,$FRAMESIZE-1*$SZREG($sp)
650         $REG_S  $fp,$FRAMESIZE-2*$SZREG($sp)
651         $REG_S  $s11,$FRAMESIZE-3*$SZREG($sp)
652         $REG_S  $s10,$FRAMESIZE-4*$SZREG($sp)
653         $REG_S  $s9,$FRAMESIZE-5*$SZREG($sp)
654         $REG_S  $s8,$FRAMESIZE-6*$SZREG($sp)
655         $REG_S  $s7,$FRAMESIZE-7*$SZREG($sp)
656         $REG_S  $s6,$FRAMESIZE-8*$SZREG($sp)
657         $REG_S  $s5,$FRAMESIZE-9*$SZREG($sp)
658         $REG_S  $s4,$FRAMESIZE-10*$SZREG($sp)
659 ___
660 $code.=<<___ if ($flavour =~ /nubi/i);  # optimize non-nubi prologue
661         $REG_S  \$15,$FRAMESIZE-11*$SZREG($sp)
662         $REG_S  \$14,$FRAMESIZE-12*$SZREG($sp)
663         $REG_S  \$13,$FRAMESIZE-13*$SZREG($sp)
664         $REG_S  \$12,$FRAMESIZE-14*$SZREG($sp)
665         $REG_S  $gp,$FRAMESIZE-15*$SZREG($sp)
666 ___
667 $code.=<<___ if ($flavour !~ /o32/i);   # non-o32 PIC-ification
668         .cplocal        $Tbl
669         .cpsetup        $pf,$zero,AES_encrypt
670 ___
671 $code.=<<___;
672         .set    reorder
673         $PTR_LA $Tbl,AES_Te             # PIC-ified 'load address'
674
675 #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
676         lw      $s0,0($inp)
677         lw      $s1,4($inp)
678         lw      $s2,8($inp)
679         lw      $s3,12($inp)
680 #else
681         lwl     $s0,0+$MSB($inp)
682         lwl     $s1,4+$MSB($inp)
683         lwl     $s2,8+$MSB($inp)
684         lwl     $s3,12+$MSB($inp)
685         lwr     $s0,0+$LSB($inp)
686         lwr     $s1,4+$LSB($inp)
687         lwr     $s2,8+$LSB($inp)
688         lwr     $s3,12+$LSB($inp)
689 #endif
690
691         bal     _mips_AES_encrypt
692
693 #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
694         sw      $s0,0($out)
695         sw      $s1,4($out)
696         sw      $s2,8($out)
697         sw      $s3,12($out)
698 #else
699         swr     $s0,0+$LSB($out)
700         swr     $s1,4+$LSB($out)
701         swr     $s2,8+$LSB($out)
702         swr     $s3,12+$LSB($out)
703         swl     $s0,0+$MSB($out)
704         swl     $s1,4+$MSB($out)
705         swl     $s2,8+$MSB($out)
706         swl     $s3,12+$MSB($out)
707 #endif
708
709         .set    noreorder
710         $REG_L  $ra,$FRAMESIZE-1*$SZREG($sp)
711         $REG_L  $fp,$FRAMESIZE-2*$SZREG($sp)
712         $REG_L  $s11,$FRAMESIZE-3*$SZREG($sp)
713         $REG_L  $s10,$FRAMESIZE-4*$SZREG($sp)
714         $REG_L  $s9,$FRAMESIZE-5*$SZREG($sp)
715         $REG_L  $s8,$FRAMESIZE-6*$SZREG($sp)
716         $REG_L  $s7,$FRAMESIZE-7*$SZREG($sp)
717         $REG_L  $s6,$FRAMESIZE-8*$SZREG($sp)
718         $REG_L  $s5,$FRAMESIZE-9*$SZREG($sp)
719         $REG_L  $s4,$FRAMESIZE-10*$SZREG($sp)
720 ___
721 $code.=<<___ if ($flavour =~ /nubi/i);
722         $REG_L  \$15,$FRAMESIZE-11*$SZREG($sp)
723         $REG_L  \$14,$FRAMESIZE-12*$SZREG($sp)
724         $REG_L  \$13,$FRAMESIZE-13*$SZREG($sp)
725         $REG_L  \$12,$FRAMESIZE-14*$SZREG($sp)
726         $REG_L  $gp,$FRAMESIZE-15*$SZREG($sp)
727 ___
728 $code.=<<___;
729         jr      $ra
730         $PTR_ADD $sp,$FRAMESIZE
731 .end    AES_encrypt
732 ___
733 \f
734 $code.=<<___;
735 .align  5
736 .ent    _mips_AES_decrypt
737 _mips_AES_decrypt:
738         .frame  $sp,0,$ra
739         .set    reorder
740         lw      $t0,0($key)
741         lw      $t1,4($key)
742         lw      $t2,8($key)
743         lw      $t3,12($key)
744         lw      $cnt,240($key)
745         $PTR_ADD $key0,$key,16
746
747         xor     $s0,$t0
748         xor     $s1,$t1
749         xor     $s2,$t2
750         xor     $s3,$t3
751
752         subu    $cnt,1
753 #if defined(__mips_smartmips)
754         ext     $i0,$s3,16,8
755 .Loop_dec:
756         ext     $i1,$s0,16,8
757         ext     $i2,$s1,16,8
758         ext     $i3,$s2,16,8
759         lwxs    $t0,$i0($Tbl)           # Td1[s3>>16]
760         ext     $i0,$s2,8,8
761         lwxs    $t1,$i1($Tbl)           # Td1[s0>>16]
762         ext     $i1,$s3,8,8
763         lwxs    $t2,$i2($Tbl)           # Td1[s1>>16]
764         ext     $i2,$s0,8,8
765         lwxs    $t3,$i3($Tbl)           # Td1[s2>>16]
766         ext     $i3,$s1,8,8
767
768         lwxs    $t4,$i0($Tbl)           # Td2[s2>>8]
769         ext     $i0,$s1,0,8
770         lwxs    $t5,$i1($Tbl)           # Td2[s3>>8]
771         ext     $i1,$s2,0,8
772         lwxs    $t6,$i2($Tbl)           # Td2[s0>>8]
773         ext     $i2,$s3,0,8
774         lwxs    $t7,$i3($Tbl)           # Td2[s1>>8]
775         ext     $i3,$s0,0,8
776
777         lwxs    $t8,$i0($Tbl)           # Td3[s1]
778         ext     $i0,$s0,24,8
779         lwxs    $t9,$i1($Tbl)           # Td3[s2]
780         ext     $i1,$s1,24,8
781         lwxs    $t10,$i2($Tbl)          # Td3[s3]
782         ext     $i2,$s2,24,8
783         lwxs    $t11,$i3($Tbl)          # Td3[s0]
784         ext     $i3,$s3,24,8
785
786         rotr    $t0,$t0,8
787         rotr    $t1,$t1,8
788         rotr    $t2,$t2,8
789         rotr    $t3,$t3,8
790
791         rotr    $t4,$t4,16
792         rotr    $t5,$t5,16
793         rotr    $t6,$t6,16
794         rotr    $t7,$t7,16
795
796         xor     $t0,$t4
797         lwxs    $t4,$i0($Tbl)           # Td0[s0>>24]
798         xor     $t1,$t5
799         lwxs    $t5,$i1($Tbl)           # Td0[s1>>24]
800         xor     $t2,$t6
801         lwxs    $t6,$i2($Tbl)           # Td0[s2>>24]
802         xor     $t3,$t7
803         lwxs    $t7,$i3($Tbl)           # Td0[s3>>24]
804
805         rotr    $t8,$t8,24
806         lw      $s0,0($key0)
807         rotr    $t9,$t9,24
808         lw      $s1,4($key0)
809         rotr    $t10,$t10,24
810         lw      $s2,8($key0)
811         rotr    $t11,$t11,24
812         lw      $s3,12($key0)
813
814         xor     $t0,$t8
815         xor     $t1,$t9
816         xor     $t2,$t10
817         xor     $t3,$t11
818
819         xor     $t0,$t4
820         xor     $t1,$t5
821         xor     $t2,$t6
822         xor     $t3,$t7
823
824         subu    $cnt,1
825         $PTR_ADD $key0,16
826         xor     $s0,$t0
827         xor     $s1,$t1
828         xor     $s2,$t2
829         xor     $s3,$t3
830         .set    noreorder
831         bnez    $cnt,.Loop_dec
832         ext     $i0,$s3,16,8
833
834         _xtr    $i0,$s3,16-2
835 #else
836 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
837         move    $i0,$Tbl
838         move    $i1,$Tbl
839         move    $i2,$Tbl
840         move    $i3,$Tbl
841         ext     $t0,$s3,16,8
842 .Loop_dec:
843         ext     $t1,$s0,16,8
844         ext     $t2,$s1,16,8
845         ext     $t3,$s2,16,8
846         $PTR_INS $i0,$t0,2,8
847         $PTR_INS $i1,$t1,2,8
848         $PTR_INS $i2,$t2,2,8
849         $PTR_INS $i3,$t3,2,8
850         lw      $t0,0($i0)              # Td1[s3>>16]
851         ext     $t4,$s2,8,8
852         lw      $t1,0($i1)              # Td1[s0>>16]
853         ext     $t5,$s3,8,8
854         lw      $t2,0($i2)              # Td1[s1>>16]
855         ext     $t6,$s0,8,8
856         lw      $t3,0($i3)              # Td1[s2>>16]
857         ext     $t7,$s1,8,8
858         $PTR_INS $i0,$t4,2,8
859         $PTR_INS $i1,$t5,2,8
860         $PTR_INS $i2,$t6,2,8
861         $PTR_INS $i3,$t7,2,8
862 #else
863         _xtr    $i0,$s3,16-2
864 .Loop_dec:
865         _xtr    $i1,$s0,16-2
866         _xtr    $i2,$s1,16-2
867         _xtr    $i3,$s2,16-2
868         and     $i0,0x3fc
869         and     $i1,0x3fc
870         and     $i2,0x3fc
871         and     $i3,0x3fc
872         $PTR_ADD $i0,$Tbl
873         $PTR_ADD $i1,$Tbl
874         $PTR_ADD $i2,$Tbl
875         $PTR_ADD $i3,$Tbl
876         lwl     $t0,3($i0)              # Td1[s3>>16]
877         lwl     $t1,3($i1)              # Td1[s0>>16]
878         lwl     $t2,3($i2)              # Td1[s1>>16]
879         lwl     $t3,3($i3)              # Td1[s2>>16]
880         lwr     $t0,2($i0)              # Td1[s3>>16]
881         _xtr    $i0,$s2,8-2
882         lwr     $t1,2($i1)              # Td1[s0>>16]
883         _xtr    $i1,$s3,8-2
884         lwr     $t2,2($i2)              # Td1[s1>>16]
885         _xtr    $i2,$s0,8-2
886         lwr     $t3,2($i3)              # Td1[s2>>16]
887         _xtr    $i3,$s1,8-2
888         and     $i0,0x3fc
889         and     $i1,0x3fc
890         and     $i2,0x3fc
891         and     $i3,0x3fc
892         $PTR_ADD $i0,$Tbl
893         $PTR_ADD $i1,$Tbl
894         $PTR_ADD $i2,$Tbl
895         $PTR_ADD $i3,$Tbl
896 #endif
897 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
898         rotr    $t0,$t0,8
899         rotr    $t1,$t1,8
900         rotr    $t2,$t2,8
901         rotr    $t3,$t3,8
902 # if defined(_MIPSEL)
903         lw      $t4,0($i0)              # Td2[s2>>8]
904         ext     $t8,$s1,0,8
905         lw      $t5,0($i1)              # Td2[s3>>8]
906         ext     $t9,$s2,0,8
907         lw      $t6,0($i2)              # Td2[s0>>8]
908         ext     $t10,$s3,0,8
909         lw      $t7,0($i3)              # Td2[s1>>8]
910         ext     $t11,$s0,0,8
911         $PTR_INS $i0,$t8,2,8
912         $PTR_INS $i1,$t9,2,8
913         $PTR_INS $i2,$t10,2,8
914         $PTR_INS $i3,$t11,2,8
915         lw      $t8,0($i0)              # Td3[s1]
916         $PTR_INS $i0,$s0,2,8
917         lw      $t9,0($i1)              # Td3[s2]
918         $PTR_INS $i1,$s1,2,8
919         lw      $t10,0($i2)             # Td3[s3]
920         $PTR_INS $i2,$s2,2,8
921         lw      $t11,0($i3)             # Td3[s0]
922         $PTR_INS $i3,$s3,2,8
923 #else
924         lw      $t4,0($i0)              # Td2[s2>>8]
925         $PTR_INS $i0,$s1,2,8
926         lw      $t5,0($i1)              # Td2[s3>>8]
927         $PTR_INS $i1,$s2,2,8
928         lw      $t6,0($i2)              # Td2[s0>>8]
929         $PTR_INS $i2,$s3,2,8
930         lw      $t7,0($i3)              # Td2[s1>>8]
931         $PTR_INS $i3,$s0,2,8
932
933         lw      $t8,0($i0)              # Td3[s1]
934         _xtr    $i0,$s0,24-2
935         lw      $t9,0($i1)              # Td3[s2]
936         _xtr    $i1,$s1,24-2
937         lw      $t10,0($i2)             # Td3[s3]
938         _xtr    $i2,$s2,24-2
939         lw      $t11,0($i3)             # Td3[s0]
940         _xtr    $i3,$s3,24-2
941
942         and     $i0,0x3fc
943         and     $i1,0x3fc
944         and     $i2,0x3fc
945         and     $i3,0x3fc
946         $PTR_ADD $i0,$Tbl
947         $PTR_ADD $i1,$Tbl
948         $PTR_ADD $i2,$Tbl
949         $PTR_ADD $i3,$Tbl
950 #endif
951         rotr    $t4,$t4,16
952         rotr    $t5,$t5,16
953         rotr    $t6,$t6,16
954         rotr    $t7,$t7,16
955
956         rotr    $t8,$t8,24
957         rotr    $t9,$t9,24
958         rotr    $t10,$t10,24
959         rotr    $t11,$t11,24
960 #else
961         lwl     $t4,2($i0)              # Td2[s2>>8]
962         lwl     $t5,2($i1)              # Td2[s3>>8]
963         lwl     $t6,2($i2)              # Td2[s0>>8]
964         lwl     $t7,2($i3)              # Td2[s1>>8]
965         lwr     $t4,1($i0)              # Td2[s2>>8]
966         _xtr    $i0,$s1,0-2
967         lwr     $t5,1($i1)              # Td2[s3>>8]
968         _xtr    $i1,$s2,0-2
969         lwr     $t6,1($i2)              # Td2[s0>>8]
970         _xtr    $i2,$s3,0-2
971         lwr     $t7,1($i3)              # Td2[s1>>8]
972         _xtr    $i3,$s0,0-2
973
974         and     $i0,0x3fc
975         and     $i1,0x3fc
976         and     $i2,0x3fc
977         and     $i3,0x3fc
978         $PTR_ADD $i0,$Tbl
979         $PTR_ADD $i1,$Tbl
980         $PTR_ADD $i2,$Tbl
981         $PTR_ADD $i3,$Tbl
982         lwl     $t8,1($i0)              # Td3[s1]
983         lwl     $t9,1($i1)              # Td3[s2]
984         lwl     $t10,1($i2)             # Td3[s3]
985         lwl     $t11,1($i3)             # Td3[s0]
986         lwr     $t8,0($i0)              # Td3[s1]
987         _xtr    $i0,$s0,24-2
988         lwr     $t9,0($i1)              # Td3[s2]
989         _xtr    $i1,$s1,24-2
990         lwr     $t10,0($i2)             # Td3[s3]
991         _xtr    $i2,$s2,24-2
992         lwr     $t11,0($i3)             # Td3[s0]
993         _xtr    $i3,$s3,24-2
994
995         and     $i0,0x3fc
996         and     $i1,0x3fc
997         and     $i2,0x3fc
998         and     $i3,0x3fc
999         $PTR_ADD $i0,$Tbl
1000         $PTR_ADD $i1,$Tbl
1001         $PTR_ADD $i2,$Tbl
1002         $PTR_ADD $i3,$Tbl
1003 #endif
1004
1005         xor     $t0,$t4
1006         lw      $t4,0($i0)              # Td0[s0>>24]
1007         xor     $t1,$t5
1008         lw      $t5,0($i1)              # Td0[s1>>24]
1009         xor     $t2,$t6
1010         lw      $t6,0($i2)              # Td0[s2>>24]
1011         xor     $t3,$t7
1012         lw      $t7,0($i3)              # Td0[s3>>24]
1013
1014         xor     $t0,$t8
1015         lw      $s0,0($key0)
1016         xor     $t1,$t9
1017         lw      $s1,4($key0)
1018         xor     $t2,$t10
1019         lw      $s2,8($key0)
1020         xor     $t3,$t11
1021         lw      $s3,12($key0)
1022
1023         xor     $t0,$t4
1024         xor     $t1,$t5
1025         xor     $t2,$t6
1026         xor     $t3,$t7
1027
1028         subu    $cnt,1
1029         $PTR_ADD $key0,16
1030         xor     $s0,$t0
1031         xor     $s1,$t1
1032         xor     $s2,$t2
1033         xor     $s3,$t3
1034         .set    noreorder
1035         bnez    $cnt,.Loop_dec
1036 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1037         ext     $t0,$s3,16,8
1038 #endif
1039
1040         _xtr    $i0,$s3,16-2
1041 #endif
1042
1043         .set    reorder
1044         lw      $t4,1024($Tbl)          # prefetch Td4
1045         _xtr    $i0,$s3,16
1046         lw      $t5,1024+32($Tbl)
1047         _xtr    $i1,$s0,16
1048         lw      $t6,1024+64($Tbl)
1049         _xtr    $i2,$s1,16
1050         lw      $t7,1024+96($Tbl)
1051         _xtr    $i3,$s2,16
1052         lw      $t8,1024+128($Tbl)
1053         and     $i0,0xff
1054         lw      $t9,1024+160($Tbl)
1055         and     $i1,0xff
1056         lw      $t10,1024+192($Tbl)
1057         and     $i2,0xff
1058         lw      $t11,1024+224($Tbl)
1059         and     $i3,0xff
1060
1061         $PTR_ADD $i0,$Tbl
1062         $PTR_ADD $i1,$Tbl
1063         $PTR_ADD $i2,$Tbl
1064         $PTR_ADD $i3,$Tbl
1065         lbu     $t0,1024($i0)           # Td4[s3>>16]
1066         _xtr    $i0,$s2,8
1067         lbu     $t1,1024($i1)           # Td4[s0>>16]
1068         _xtr    $i1,$s3,8
1069         lbu     $t2,1024($i2)           # Td4[s1>>16]
1070         _xtr    $i2,$s0,8
1071         lbu     $t3,1024($i3)           # Td4[s2>>16]
1072         _xtr    $i3,$s1,8
1073
1074         and     $i0,0xff
1075         and     $i1,0xff
1076         and     $i2,0xff
1077         and     $i3,0xff
1078         $PTR_ADD $i0,$Tbl
1079         $PTR_ADD $i1,$Tbl
1080         $PTR_ADD $i2,$Tbl
1081         $PTR_ADD $i3,$Tbl
1082 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1083 # if defined(_MIPSEL)
1084         lbu     $t4,1024($i0)           # Td4[s2>>8]
1085         $PTR_INS $i0,$s0,0,8
1086         lbu     $t5,1024($i1)           # Td4[s3>>8]
1087         $PTR_INS $i1,$s1,0,8
1088         lbu     $t6,1024($i2)           # Td4[s0>>8]
1089         $PTR_INS $i2,$s2,0,8
1090         lbu     $t7,1024($i3)           # Td4[s1>>8]
1091         $PTR_INS $i3,$s3,0,8
1092
1093         lbu     $t8,1024($i0)           # Td4[s0>>24]
1094         _xtr    $i0,$s1,0
1095         lbu     $t9,1024($i1)           # Td4[s1>>24]
1096         _xtr    $i1,$s2,0
1097         lbu     $t10,1024($i2)          # Td4[s2>>24]
1098         _xtr    $i2,$s3,0
1099         lbu     $t11,1024($i3)          # Td4[s3>>24]
1100         _xtr    $i3,$s0,0
1101
1102         $PTR_ADD $i0,$Tbl
1103         $PTR_ADD $i1,$Tbl
1104         $PTR_ADD $i2,$Tbl
1105         $PTR_ADD $i3,$Tbl
1106 # else
1107         lbu     $t4,1024($i0)           # Td4[s2>>8]
1108         _xtr    $i0,$s0,24
1109         lbu     $t5,1024($i1)           # Td4[s3>>8]
1110         _xtr    $i1,$s1,24
1111         lbu     $t6,1024($i2)           # Td4[s0>>8]
1112         _xtr    $i2,$s2,24
1113         lbu     $t7,1024($i3)           # Td4[s1>>8]
1114         _xtr    $i3,$s3,24
1115
1116         $PTR_ADD $i0,$Tbl
1117         $PTR_ADD $i1,$Tbl
1118         $PTR_ADD $i2,$Tbl
1119         $PTR_ADD $i3,$Tbl
1120         lbu     $t8,1024($i0)           # Td4[s0>>24]
1121         $PTR_INS $i0,$s1,0,8
1122         lbu     $t9,1024($i1)           # Td4[s1>>24]
1123         $PTR_INS $i1,$s2,0,8
1124         lbu     $t10,1024($i2)          # Td4[s2>>24]
1125         $PTR_INS $i2,$s3,0,8
1126         lbu     $t11,1024($i3)          # Td4[s3>>24]
1127         $PTR_INS $i3,$s0,0,8
1128 # endif
1129         _ins    $t0,16
1130         _ins    $t1,16
1131         _ins    $t2,16
1132         _ins    $t3,16
1133
1134         _ins2   $t0,$t4,8
1135         lbu     $t4,1024($i0)           # Td4[s1]
1136         _ins2   $t1,$t5,8
1137         lbu     $t5,1024($i1)           # Td4[s2]
1138         _ins2   $t2,$t6,8
1139         lbu     $t6,1024($i2)           # Td4[s3]
1140         _ins2   $t3,$t7,8
1141         lbu     $t7,1024($i3)           # Td4[s0]
1142
1143         _ins2   $t0,$t8,24
1144         lw      $s0,0($key0)
1145         _ins2   $t1,$t9,24
1146         lw      $s1,4($key0)
1147         _ins2   $t2,$t10,24
1148         lw      $s2,8($key0)
1149         _ins2   $t3,$t11,24
1150         lw      $s3,12($key0)
1151
1152         _ins2   $t0,$t4,0
1153         _ins2   $t1,$t5,0
1154         _ins2   $t2,$t6,0
1155         _ins2   $t3,$t7,0
1156 #else
1157         lbu     $t4,1024($i0)           # Td4[s2>>8]
1158         _xtr    $i0,$s0,24
1159         lbu     $t5,1024($i1)           # Td4[s3>>8]
1160         _xtr    $i1,$s1,24
1161         lbu     $t6,1024($i2)           # Td4[s0>>8]
1162         _xtr    $i2,$s2,24
1163         lbu     $t7,1024($i3)           # Td4[s1>>8]
1164         _xtr    $i3,$s3,24
1165
1166         $PTR_ADD $i0,$Tbl
1167         $PTR_ADD $i1,$Tbl
1168         $PTR_ADD $i2,$Tbl
1169         $PTR_ADD $i3,$Tbl
1170         lbu     $t8,1024($i0)           # Td4[s0>>24]
1171         _xtr    $i0,$s1,0
1172         lbu     $t9,1024($i1)           # Td4[s1>>24]
1173         _xtr    $i1,$s2,0
1174         lbu     $t10,1024($i2)          # Td4[s2>>24]
1175         _xtr    $i2,$s3,0
1176         lbu     $t11,1024($i3)          # Td4[s3>>24]
1177         _xtr    $i3,$s0,0
1178
1179         $PTR_ADD $i0,$Tbl
1180         $PTR_ADD $i1,$Tbl
1181         $PTR_ADD $i2,$Tbl
1182         $PTR_ADD $i3,$Tbl
1183
1184         _ins    $t0,16
1185         _ins    $t1,16
1186         _ins    $t2,16
1187         _ins    $t3,16
1188
1189         _ins    $t4,8
1190         _ins    $t5,8
1191         _ins    $t6,8
1192         _ins    $t7,8
1193
1194         xor     $t0,$t4
1195         lbu     $t4,1024($i0)           # Td4[s1]
1196         xor     $t1,$t5
1197         lbu     $t5,1024($i1)           # Td4[s2]
1198         xor     $t2,$t6
1199         lbu     $t6,1024($i2)           # Td4[s3]
1200         xor     $t3,$t7
1201         lbu     $t7,1024($i3)           # Td4[s0]
1202
1203         _ins    $t8,24
1204         lw      $s0,0($key0)
1205         _ins    $t9,24
1206         lw      $s1,4($key0)
1207         _ins    $t10,24
1208         lw      $s2,8($key0)
1209         _ins    $t11,24
1210         lw      $s3,12($key0)
1211
1212         xor     $t0,$t8
1213         xor     $t1,$t9
1214         xor     $t2,$t10
1215         xor     $t3,$t11
1216
1217         _ins    $t4,0
1218         _ins    $t5,0
1219         _ins    $t6,0
1220         _ins    $t7,0
1221
1222         xor     $t0,$t4
1223         xor     $t1,$t5
1224         xor     $t2,$t6
1225         xor     $t3,$t7
1226 #endif
1227
1228         xor     $s0,$t0
1229         xor     $s1,$t1
1230         xor     $s2,$t2
1231         xor     $s3,$t3
1232
1233         jr      $ra
1234 .end    _mips_AES_decrypt
1235
1236 .align  5
1237 .globl  AES_decrypt
1238 .ent    AES_decrypt
1239 AES_decrypt:
1240         .frame  $sp,$FRAMESIZE,$ra
1241         .mask   $SAVED_REGS_MASK,-$SZREG
1242         .set    noreorder
1243 ___
1244 $code.=<<___ if ($flavour =~ /o32/i);   # o32 PIC-ification
1245         .cpload $pf
1246 ___
1247 $code.=<<___;
1248         $PTR_SUB $sp,$FRAMESIZE
1249         $REG_S  $ra,$FRAMESIZE-1*$SZREG($sp)
1250         $REG_S  $fp,$FRAMESIZE-2*$SZREG($sp)
1251         $REG_S  $s11,$FRAMESIZE-3*$SZREG($sp)
1252         $REG_S  $s10,$FRAMESIZE-4*$SZREG($sp)
1253         $REG_S  $s9,$FRAMESIZE-5*$SZREG($sp)
1254         $REG_S  $s8,$FRAMESIZE-6*$SZREG($sp)
1255         $REG_S  $s7,$FRAMESIZE-7*$SZREG($sp)
1256         $REG_S  $s6,$FRAMESIZE-8*$SZREG($sp)
1257         $REG_S  $s5,$FRAMESIZE-9*$SZREG($sp)
1258         $REG_S  $s4,$FRAMESIZE-10*$SZREG($sp)
1259 ___
1260 $code.=<<___ if ($flavour =~ /nubi/i);  # optimize non-nubi prologue
1261         $REG_S  \$15,$FRAMESIZE-11*$SZREG($sp)
1262         $REG_S  \$14,$FRAMESIZE-12*$SZREG($sp)
1263         $REG_S  \$13,$FRAMESIZE-13*$SZREG($sp)
1264         $REG_S  \$12,$FRAMESIZE-14*$SZREG($sp)
1265         $REG_S  $gp,$FRAMESIZE-15*$SZREG($sp)
1266 ___
1267 $code.=<<___ if ($flavour !~ /o32/i);   # non-o32 PIC-ification
1268         .cplocal        $Tbl
1269         .cpsetup        $pf,$zero,AES_decrypt
1270 ___
1271 $code.=<<___;
1272         .set    reorder
1273         $PTR_LA $Tbl,AES_Td             # PIC-ified 'load address'
1274
1275 #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
1276         lw      $s0,0($inp)
1277         lw      $s1,4($inp)
1278         lw      $s2,8($inp)
1279         lw      $s3,12($inp)
1280 #else
1281         lwl     $s0,0+$MSB($inp)
1282         lwl     $s1,4+$MSB($inp)
1283         lwl     $s2,8+$MSB($inp)
1284         lwl     $s3,12+$MSB($inp)
1285         lwr     $s0,0+$LSB($inp)
1286         lwr     $s1,4+$LSB($inp)
1287         lwr     $s2,8+$LSB($inp)
1288         lwr     $s3,12+$LSB($inp)
1289 #endif
1290
1291         bal     _mips_AES_decrypt
1292
1293 #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
1294         sw      $s0,0($out)
1295         sw      $s1,4($out)
1296         sw      $s2,8($out)
1297         sw      $s3,12($out)
1298 #else
1299         swr     $s0,0+$LSB($out)
1300         swr     $s1,4+$LSB($out)
1301         swr     $s2,8+$LSB($out)
1302         swr     $s3,12+$LSB($out)
1303         swl     $s0,0+$MSB($out)
1304         swl     $s1,4+$MSB($out)
1305         swl     $s2,8+$MSB($out)
1306         swl     $s3,12+$MSB($out)
1307 #endif
1308
1309         .set    noreorder
1310         $REG_L  $ra,$FRAMESIZE-1*$SZREG($sp)
1311         $REG_L  $fp,$FRAMESIZE-2*$SZREG($sp)
1312         $REG_L  $s11,$FRAMESIZE-3*$SZREG($sp)
1313         $REG_L  $s10,$FRAMESIZE-4*$SZREG($sp)
1314         $REG_L  $s9,$FRAMESIZE-5*$SZREG($sp)
1315         $REG_L  $s8,$FRAMESIZE-6*$SZREG($sp)
1316         $REG_L  $s7,$FRAMESIZE-7*$SZREG($sp)
1317         $REG_L  $s6,$FRAMESIZE-8*$SZREG($sp)
1318         $REG_L  $s5,$FRAMESIZE-9*$SZREG($sp)
1319         $REG_L  $s4,$FRAMESIZE-10*$SZREG($sp)
1320 ___
1321 $code.=<<___ if ($flavour =~ /nubi/i);
1322         $REG_L  \$15,$FRAMESIZE-11*$SZREG($sp)
1323         $REG_L  \$14,$FRAMESIZE-12*$SZREG($sp)
1324         $REG_L  \$13,$FRAMESIZE-13*$SZREG($sp)
1325         $REG_L  \$12,$FRAMESIZE-14*$SZREG($sp)
1326         $REG_L  $gp,$FRAMESIZE-15*$SZREG($sp)
1327 ___
1328 $code.=<<___;
1329         jr      $ra
1330         $PTR_ADD $sp,$FRAMESIZE
1331 .end    AES_decrypt
1332 ___
1333 }}}
1334 \f
1335 {{{
1336 my $FRAMESIZE=8*$SZREG;
1337 my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc000f008" : "0xc0000000";
1338
1339 my ($inp,$bits,$key,$Tbl)=($a0,$a1,$a2,$a3);
1340 my ($rk0,$rk1,$rk2,$rk3,$rk4,$rk5,$rk6,$rk7)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
1341 my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
1342 my ($rcon,$cnt)=($gp,$fp);
1343
1344 $code.=<<___;
1345 .align  5
1346 .ent    _mips_AES_set_encrypt_key
1347 _mips_AES_set_encrypt_key:
1348         .frame  $sp,0,$ra
1349         .set    noreorder
1350         beqz    $inp,.Lekey_done
1351         li      $t0,-1
1352         beqz    $key,.Lekey_done
1353         $PTR_ADD $rcon,$Tbl,256
1354
1355         .set    reorder
1356 #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
1357         lw      $rk0,0($inp)            # load 128 bits
1358         lw      $rk1,4($inp)
1359         lw      $rk2,8($inp)
1360         lw      $rk3,12($inp)
1361 #else
1362         lwl     $rk0,0+$MSB($inp)       # load 128 bits
1363         lwl     $rk1,4+$MSB($inp)
1364         lwl     $rk2,8+$MSB($inp)
1365         lwl     $rk3,12+$MSB($inp)
1366         lwr     $rk0,0+$LSB($inp)
1367         lwr     $rk1,4+$LSB($inp)
1368         lwr     $rk2,8+$LSB($inp)
1369         lwr     $rk3,12+$LSB($inp)
1370 #endif
1371         li      $at,128
1372         .set    noreorder
1373         beq     $bits,$at,.L128bits
1374         li      $cnt,10
1375
1376         .set    reorder
1377 #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
1378         lw      $rk4,16($inp)           # load 192 bits
1379         lw      $rk5,20($inp)
1380 #else
1381         lwl     $rk4,16+$MSB($inp)      # load 192 bits
1382         lwl     $rk5,20+$MSB($inp)
1383         lwr     $rk4,16+$LSB($inp)
1384         lwr     $rk5,20+$LSB($inp)
1385 #endif
1386         li      $at,192
1387         .set    noreorder
1388         beq     $bits,$at,.L192bits
1389         li      $cnt,8
1390
1391         .set    reorder
1392 #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
1393         lw      $rk6,24($inp)           # load 256 bits
1394         lw      $rk7,28($inp)
1395 #else
1396         lwl     $rk6,24+$MSB($inp)      # load 256 bits
1397         lwl     $rk7,28+$MSB($inp)
1398         lwr     $rk6,24+$LSB($inp)
1399         lwr     $rk7,28+$LSB($inp)
1400 #endif
1401         li      $at,256
1402         .set    noreorder
1403         beq     $bits,$at,.L256bits
1404         li      $cnt,7
1405
1406         b       .Lekey_done
1407         li      $t0,-2
1408
1409 .align  4
1410 .L128bits:
1411         .set    reorder
1412         srl     $i0,$rk3,16
1413         srl     $i1,$rk3,8
1414         and     $i0,0xff
1415         and     $i1,0xff
1416         and     $i2,$rk3,0xff
1417         srl     $i3,$rk3,24
1418         $PTR_ADD $i0,$Tbl
1419         $PTR_ADD $i1,$Tbl
1420         $PTR_ADD $i2,$Tbl
1421         $PTR_ADD $i3,$Tbl
1422         lbu     $i0,0($i0)
1423         lbu     $i1,0($i1)
1424         lbu     $i2,0($i2)
1425         lbu     $i3,0($i3)
1426
1427         sw      $rk0,0($key)
1428         sw      $rk1,4($key)
1429         sw      $rk2,8($key)
1430         sw      $rk3,12($key)
1431         subu    $cnt,1
1432         $PTR_ADD $key,16
1433
1434         _bias   $i0,24
1435         _bias   $i1,16
1436         _bias   $i2,8
1437         _bias   $i3,0
1438
1439         xor     $rk0,$i0
1440         lw      $i0,0($rcon)
1441         xor     $rk0,$i1
1442         xor     $rk0,$i2
1443         xor     $rk0,$i3
1444         xor     $rk0,$i0
1445
1446         xor     $rk1,$rk0
1447         xor     $rk2,$rk1
1448         xor     $rk3,$rk2
1449
1450         .set    noreorder
1451         bnez    $cnt,.L128bits
1452         $PTR_ADD $rcon,4
1453
1454         sw      $rk0,0($key)
1455         sw      $rk1,4($key)
1456         sw      $rk2,8($key)
1457         li      $cnt,10
1458         sw      $rk3,12($key)
1459         li      $t0,0
1460         sw      $cnt,80($key)
1461         b       .Lekey_done
1462         $PTR_SUB $key,10*16
1463
1464 .align  4
1465 .L192bits:
1466         .set    reorder
1467         srl     $i0,$rk5,16
1468         srl     $i1,$rk5,8
1469         and     $i0,0xff
1470         and     $i1,0xff
1471         and     $i2,$rk5,0xff
1472         srl     $i3,$rk5,24
1473         $PTR_ADD $i0,$Tbl
1474         $PTR_ADD $i1,$Tbl
1475         $PTR_ADD $i2,$Tbl
1476         $PTR_ADD $i3,$Tbl
1477         lbu     $i0,0($i0)
1478         lbu     $i1,0($i1)
1479         lbu     $i2,0($i2)
1480         lbu     $i3,0($i3)
1481
1482         sw      $rk0,0($key)
1483         sw      $rk1,4($key)
1484         sw      $rk2,8($key)
1485         sw      $rk3,12($key)
1486         sw      $rk4,16($key)
1487         sw      $rk5,20($key)
1488         subu    $cnt,1
1489         $PTR_ADD $key,24
1490
1491         _bias   $i0,24
1492         _bias   $i1,16
1493         _bias   $i2,8
1494         _bias   $i3,0
1495
1496         xor     $rk0,$i0
1497         lw      $i0,0($rcon)
1498         xor     $rk0,$i1
1499         xor     $rk0,$i2
1500         xor     $rk0,$i3
1501         xor     $rk0,$i0
1502
1503         xor     $rk1,$rk0
1504         xor     $rk2,$rk1
1505         xor     $rk3,$rk2
1506         xor     $rk4,$rk3
1507         xor     $rk5,$rk4
1508
1509         .set    noreorder
1510         bnez    $cnt,.L192bits
1511         $PTR_ADD $rcon,4
1512
1513         sw      $rk0,0($key)
1514         sw      $rk1,4($key)
1515         sw      $rk2,8($key)
1516         li      $cnt,12
1517         sw      $rk3,12($key)
1518         li      $t0,0
1519         sw      $cnt,48($key)
1520         b       .Lekey_done
1521         $PTR_SUB $key,12*16
1522
1523 .align  4
1524 .L256bits:
1525         .set    reorder
1526         srl     $i0,$rk7,16
1527         srl     $i1,$rk7,8
1528         and     $i0,0xff
1529         and     $i1,0xff
1530         and     $i2,$rk7,0xff
1531         srl     $i3,$rk7,24
1532         $PTR_ADD $i0,$Tbl
1533         $PTR_ADD $i1,$Tbl
1534         $PTR_ADD $i2,$Tbl
1535         $PTR_ADD $i3,$Tbl
1536         lbu     $i0,0($i0)
1537         lbu     $i1,0($i1)
1538         lbu     $i2,0($i2)
1539         lbu     $i3,0($i3)
1540
1541         sw      $rk0,0($key)
1542         sw      $rk1,4($key)
1543         sw      $rk2,8($key)
1544         sw      $rk3,12($key)
1545         sw      $rk4,16($key)
1546         sw      $rk5,20($key)
1547         sw      $rk6,24($key)
1548         sw      $rk7,28($key)
1549         subu    $cnt,1
1550
1551         _bias   $i0,24
1552         _bias   $i1,16
1553         _bias   $i2,8
1554         _bias   $i3,0
1555
1556         xor     $rk0,$i0
1557         lw      $i0,0($rcon)
1558         xor     $rk0,$i1
1559         xor     $rk0,$i2
1560         xor     $rk0,$i3
1561         xor     $rk0,$i0
1562
1563         xor     $rk1,$rk0
1564         xor     $rk2,$rk1
1565         xor     $rk3,$rk2
1566         beqz    $cnt,.L256bits_done
1567
1568         srl     $i0,$rk3,24
1569         srl     $i1,$rk3,16
1570         srl     $i2,$rk3,8
1571         and     $i3,$rk3,0xff
1572         and     $i1,0xff
1573         and     $i2,0xff
1574         $PTR_ADD $i0,$Tbl
1575         $PTR_ADD $i1,$Tbl
1576         $PTR_ADD $i2,$Tbl
1577         $PTR_ADD $i3,$Tbl
1578         lbu     $i0,0($i0)
1579         lbu     $i1,0($i1)
1580         lbu     $i2,0($i2)
1581         lbu     $i3,0($i3)
1582         sll     $i0,24
1583         sll     $i1,16
1584         sll     $i2,8
1585
1586         xor     $rk4,$i0
1587         xor     $rk4,$i1
1588         xor     $rk4,$i2
1589         xor     $rk4,$i3
1590
1591         xor     $rk5,$rk4
1592         xor     $rk6,$rk5
1593         xor     $rk7,$rk6
1594
1595         $PTR_ADD $key,32
1596         .set    noreorder
1597         b       .L256bits
1598         $PTR_ADD $rcon,4
1599
1600 .L256bits_done:
1601         sw      $rk0,32($key)
1602         sw      $rk1,36($key)
1603         sw      $rk2,40($key)
1604         li      $cnt,14
1605         sw      $rk3,44($key)
1606         li      $t0,0
1607         sw      $cnt,48($key)
1608         $PTR_SUB $key,12*16
1609
1610 .Lekey_done:
1611         jr      $ra
1612         nop
1613 .end    _mips_AES_set_encrypt_key
1614
1615 .globl  AES_set_encrypt_key
1616 .ent    AES_set_encrypt_key
1617 AES_set_encrypt_key:
1618         .frame  $sp,$FRAMESIZE,$ra
1619         .mask   $SAVED_REGS_MASK,-$SZREG
1620         .set    noreorder
1621 ___
1622 $code.=<<___ if ($flavour =~ /o32/i);   # o32 PIC-ification
1623         .cpload $pf
1624 ___
1625 $code.=<<___;
1626         $PTR_SUB $sp,$FRAMESIZE
1627         $REG_S  $ra,$FRAMESIZE-1*$SZREG($sp)
1628         $REG_S  $fp,$FRAMESIZE-2*$SZREG($sp)
1629 ___
1630 $code.=<<___ if ($flavour =~ /nubi/i);  # optimize non-nubi prologue
1631         $REG_S  $s3,$FRAMESIZE-3*$SZREG($sp)
1632         $REG_S  $s2,$FRAMESIZE-4*$SZREG($sp)
1633         $REG_S  $s1,$FRAMESIZE-5*$SZREG($sp)
1634         $REG_S  $s0,$FRAMESIZE-6*$SZREG($sp)
1635         $REG_S  $gp,$FRAMESIZE-7*$SZREG($sp)
1636 ___
1637 $code.=<<___ if ($flavour !~ /o32/i);   # non-o32 PIC-ification
1638         .cplocal        $Tbl
1639         .cpsetup        $pf,$zero,AES_set_encrypt_key
1640 ___
1641 $code.=<<___;
1642         .set    reorder
1643         $PTR_LA $Tbl,AES_Te4            # PIC-ified 'load address'
1644
1645         bal     _mips_AES_set_encrypt_key
1646
1647         .set    noreorder
1648         move    $a0,$t0
1649         $REG_L  $ra,$FRAMESIZE-1*$SZREG($sp)
1650         $REG_L  $fp,$FRAMESIZE-2*$SZREG($sp)
1651 ___
1652 $code.=<<___ if ($flavour =~ /nubi/i);
1653         $REG_L  $s3,$FRAMESIZE-11*$SZREG($sp)
1654         $REG_L  $s2,$FRAMESIZE-12*$SZREG($sp)
1655         $REG_L  $s1,$FRAMESIZE-13*$SZREG($sp)
1656         $REG_L  $s0,$FRAMESIZE-14*$SZREG($sp)
1657         $REG_L  $gp,$FRAMESIZE-15*$SZREG($sp)
1658 ___
1659 $code.=<<___;
1660         jr      $ra
1661         $PTR_ADD $sp,$FRAMESIZE
1662 .end    AES_set_encrypt_key
1663 ___
1664 \f
1665 my ($head,$tail)=($inp,$bits);
1666 my ($tp1,$tp2,$tp4,$tp8,$tp9,$tpb,$tpd,$tpe)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
1667 my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2);
1668 $code.=<<___;
1669 .align  5
1670 .globl  AES_set_decrypt_key
1671 .ent    AES_set_decrypt_key
1672 AES_set_decrypt_key:
1673         .frame  $sp,$FRAMESIZE,$ra
1674         .mask   $SAVED_REGS_MASK,-$SZREG
1675         .set    noreorder
1676 ___
1677 $code.=<<___ if ($flavour =~ /o32/i);   # o32 PIC-ification
1678         .cpload $pf
1679 ___
1680 $code.=<<___;
1681         $PTR_SUB $sp,$FRAMESIZE
1682         $REG_S  $ra,$FRAMESIZE-1*$SZREG($sp)
1683         $REG_S  $fp,$FRAMESIZE-2*$SZREG($sp)
1684 ___
1685 $code.=<<___ if ($flavour =~ /nubi/i);  # optimize non-nubi prologue
1686         $REG_S  $s3,$FRAMESIZE-3*$SZREG($sp)
1687         $REG_S  $s2,$FRAMESIZE-4*$SZREG($sp)
1688         $REG_S  $s1,$FRAMESIZE-5*$SZREG($sp)
1689         $REG_S  $s0,$FRAMESIZE-6*$SZREG($sp)
1690         $REG_S  $gp,$FRAMESIZE-7*$SZREG($sp)
1691 ___
1692 $code.=<<___ if ($flavour !~ /o32/i);   # non-o32 PIC-ification
1693         .cplocal        $Tbl
1694         .cpsetup        $pf,$zero,AES_set_decrypt_key
1695 ___
1696 $code.=<<___;
1697         .set    reorder
1698         $PTR_LA $Tbl,AES_Te4            # PIC-ified 'load address'
1699
1700         bal     _mips_AES_set_encrypt_key
1701
1702         bltz    $t0,.Ldkey_done
1703
1704         sll     $at,$cnt,4
1705         $PTR_ADD $head,$key,0
1706         $PTR_ADD $tail,$key,$at
1707 .align  4
1708 .Lswap:
1709         lw      $rk0,0($head)
1710         lw      $rk1,4($head)
1711         lw      $rk2,8($head)
1712         lw      $rk3,12($head)
1713         lw      $rk4,0($tail)
1714         lw      $rk5,4($tail)
1715         lw      $rk6,8($tail)
1716         lw      $rk7,12($tail)
1717         sw      $rk0,0($tail)
1718         sw      $rk1,4($tail)
1719         sw      $rk2,8($tail)
1720         sw      $rk3,12($tail)
1721         $PTR_ADD $head,16
1722         $PTR_SUB $tail,16
1723         sw      $rk4,-16($head)
1724         sw      $rk5,-12($head)
1725         sw      $rk6,-8($head)
1726         sw      $rk7,-4($head)
1727         bne     $head,$tail,.Lswap
1728
1729         lw      $tp1,16($key)           # modulo-scheduled
1730         lui     $x80808080,0x8080
1731         subu    $cnt,1
1732         or      $x80808080,0x8080
1733         sll     $cnt,2
1734         $PTR_ADD $key,16
1735         lui     $x1b1b1b1b,0x1b1b
1736         nor     $x7f7f7f7f,$zero,$x80808080
1737         or      $x1b1b1b1b,0x1b1b
1738 .align  4
1739 .Lmix:
1740         and     $m,$tp1,$x80808080
1741         and     $tp2,$tp1,$x7f7f7f7f
1742         srl     $tp4,$m,7
1743         addu    $tp2,$tp2               # tp2<<1
1744         subu    $m,$tp4
1745         and     $m,$x1b1b1b1b
1746         xor     $tp2,$m
1747
1748         and     $m,$tp2,$x80808080
1749         and     $tp4,$tp2,$x7f7f7f7f
1750         srl     $tp8,$m,7
1751         addu    $tp4,$tp4               # tp4<<1
1752         subu    $m,$tp8
1753         and     $m,$x1b1b1b1b
1754         xor     $tp4,$m
1755
1756         and     $m,$tp4,$x80808080
1757         and     $tp8,$tp4,$x7f7f7f7f
1758         srl     $tp9,$m,7
1759         addu    $tp8,$tp8               # tp8<<1
1760         subu    $m,$tp9
1761         and     $m,$x1b1b1b1b
1762         xor     $tp8,$m
1763
1764         xor     $tp9,$tp8,$tp1
1765         xor     $tpe,$tp8,$tp4
1766         xor     $tpb,$tp9,$tp2
1767         xor     $tpd,$tp9,$tp4
1768
1769 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1770         rotr    $tp1,$tpd,16
1771          xor    $tpe,$tp2
1772         rotr    $tp2,$tp9,8
1773         xor     $tpe,$tp1
1774         rotr    $tp4,$tpb,24
1775         xor     $tpe,$tp2
1776         lw      $tp1,4($key)            # modulo-scheduled
1777         xor     $tpe,$tp4
1778 #else
1779         _ror    $tp1,$tpd,16
1780          xor    $tpe,$tp2
1781         _ror    $tp2,$tpd,-16
1782         xor     $tpe,$tp1
1783         _ror    $tp1,$tp9,8
1784         xor     $tpe,$tp2
1785         _ror    $tp2,$tp9,-24
1786         xor     $tpe,$tp1
1787         _ror    $tp1,$tpb,24
1788         xor     $tpe,$tp2
1789         _ror    $tp2,$tpb,-8
1790         xor     $tpe,$tp1
1791         lw      $tp1,4($key)            # modulo-scheduled
1792         xor     $tpe,$tp2
1793 #endif
1794         subu    $cnt,1
1795         sw      $tpe,0($key)
1796         $PTR_ADD $key,4
1797         bnez    $cnt,.Lmix
1798
1799         li      $t0,0
1800 .Ldkey_done:
1801         .set    noreorder
1802         move    $a0,$t0
1803         $REG_L  $ra,$FRAMESIZE-1*$SZREG($sp)
1804         $REG_L  $fp,$FRAMESIZE-2*$SZREG($sp)
1805 ___
1806 $code.=<<___ if ($flavour =~ /nubi/i);
1807         $REG_L  $s3,$FRAMESIZE-11*$SZREG($sp)
1808         $REG_L  $s2,$FRAMESIZE-12*$SZREG($sp)
1809         $REG_L  $s1,$FRAMESIZE-13*$SZREG($sp)
1810         $REG_L  $s0,$FRAMESIZE-14*$SZREG($sp)
1811         $REG_L  $gp,$FRAMESIZE-15*$SZREG($sp)
1812 ___
1813 $code.=<<___;
1814         jr      $ra
1815         $PTR_ADD $sp,$FRAMESIZE
1816 .end    AES_set_decrypt_key
1817 ___
1818 }}}
1819
1820 ######################################################################
1821 # Tables are kept in endian-neutral manner
1822 $code.=<<___;
1823 .rdata
1824 .align  10
1825 AES_Te:
1826 .byte   0xc6,0x63,0x63,0xa5,    0xf8,0x7c,0x7c,0x84     # Te0
1827 .byte   0xee,0x77,0x77,0x99,    0xf6,0x7b,0x7b,0x8d
1828 .byte   0xff,0xf2,0xf2,0x0d,    0xd6,0x6b,0x6b,0xbd
1829 .byte   0xde,0x6f,0x6f,0xb1,    0x91,0xc5,0xc5,0x54
1830 .byte   0x60,0x30,0x30,0x50,    0x02,0x01,0x01,0x03
1831 .byte   0xce,0x67,0x67,0xa9,    0x56,0x2b,0x2b,0x7d
1832 .byte   0xe7,0xfe,0xfe,0x19,    0xb5,0xd7,0xd7,0x62
1833 .byte   0x4d,0xab,0xab,0xe6,    0xec,0x76,0x76,0x9a
1834 .byte   0x8f,0xca,0xca,0x45,    0x1f,0x82,0x82,0x9d
1835 .byte   0x89,0xc9,0xc9,0x40,    0xfa,0x7d,0x7d,0x87
1836 .byte   0xef,0xfa,0xfa,0x15,    0xb2,0x59,0x59,0xeb
1837 .byte   0x8e,0x47,0x47,0xc9,    0xfb,0xf0,0xf0,0x0b
1838 .byte   0x41,0xad,0xad,0xec,    0xb3,0xd4,0xd4,0x67
1839 .byte   0x5f,0xa2,0xa2,0xfd,    0x45,0xaf,0xaf,0xea
1840 .byte   0x23,0x9c,0x9c,0xbf,    0x53,0xa4,0xa4,0xf7
1841 .byte   0xe4,0x72,0x72,0x96,    0x9b,0xc0,0xc0,0x5b
1842 .byte   0x75,0xb7,0xb7,0xc2,    0xe1,0xfd,0xfd,0x1c
1843 .byte   0x3d,0x93,0x93,0xae,    0x4c,0x26,0x26,0x6a
1844 .byte   0x6c,0x36,0x36,0x5a,    0x7e,0x3f,0x3f,0x41
1845 .byte   0xf5,0xf7,0xf7,0x02,    0x83,0xcc,0xcc,0x4f
1846 .byte   0x68,0x34,0x34,0x5c,    0x51,0xa5,0xa5,0xf4
1847 .byte   0xd1,0xe5,0xe5,0x34,    0xf9,0xf1,0xf1,0x08
1848 .byte   0xe2,0x71,0x71,0x93,    0xab,0xd8,0xd8,0x73
1849 .byte   0x62,0x31,0x31,0x53,    0x2a,0x15,0x15,0x3f
1850 .byte   0x08,0x04,0x04,0x0c,    0x95,0xc7,0xc7,0x52
1851 .byte   0x46,0x23,0x23,0x65,    0x9d,0xc3,0xc3,0x5e
1852 .byte   0x30,0x18,0x18,0x28,    0x37,0x96,0x96,0xa1
1853 .byte   0x0a,0x05,0x05,0x0f,    0x2f,0x9a,0x9a,0xb5
1854 .byte   0x0e,0x07,0x07,0x09,    0x24,0x12,0x12,0x36
1855 .byte   0x1b,0x80,0x80,0x9b,    0xdf,0xe2,0xe2,0x3d
1856 .byte   0xcd,0xeb,0xeb,0x26,    0x4e,0x27,0x27,0x69
1857 .byte   0x7f,0xb2,0xb2,0xcd,    0xea,0x75,0x75,0x9f
1858 .byte   0x12,0x09,0x09,0x1b,    0x1d,0x83,0x83,0x9e
1859 .byte   0x58,0x2c,0x2c,0x74,    0x34,0x1a,0x1a,0x2e
1860 .byte   0x36,0x1b,0x1b,0x2d,    0xdc,0x6e,0x6e,0xb2
1861 .byte   0xb4,0x5a,0x5a,0xee,    0x5b,0xa0,0xa0,0xfb
1862 .byte   0xa4,0x52,0x52,0xf6,    0x76,0x3b,0x3b,0x4d
1863 .byte   0xb7,0xd6,0xd6,0x61,    0x7d,0xb3,0xb3,0xce
1864 .byte   0x52,0x29,0x29,0x7b,    0xdd,0xe3,0xe3,0x3e
1865 .byte   0x5e,0x2f,0x2f,0x71,    0x13,0x84,0x84,0x97
1866 .byte   0xa6,0x53,0x53,0xf5,    0xb9,0xd1,0xd1,0x68
1867 .byte   0x00,0x00,0x00,0x00,    0xc1,0xed,0xed,0x2c
1868 .byte   0x40,0x20,0x20,0x60,    0xe3,0xfc,0xfc,0x1f
1869 .byte   0x79,0xb1,0xb1,0xc8,    0xb6,0x5b,0x5b,0xed
1870 .byte   0xd4,0x6a,0x6a,0xbe,    0x8d,0xcb,0xcb,0x46
1871 .byte   0x67,0xbe,0xbe,0xd9,    0x72,0x39,0x39,0x4b
1872 .byte   0x94,0x4a,0x4a,0xde,    0x98,0x4c,0x4c,0xd4
1873 .byte   0xb0,0x58,0x58,0xe8,    0x85,0xcf,0xcf,0x4a
1874 .byte   0xbb,0xd0,0xd0,0x6b,    0xc5,0xef,0xef,0x2a
1875 .byte   0x4f,0xaa,0xaa,0xe5,    0xed,0xfb,0xfb,0x16
1876 .byte   0x86,0x43,0x43,0xc5,    0x9a,0x4d,0x4d,0xd7
1877 .byte   0x66,0x33,0x33,0x55,    0x11,0x85,0x85,0x94
1878 .byte   0x8a,0x45,0x45,0xcf,    0xe9,0xf9,0xf9,0x10
1879 .byte   0x04,0x02,0x02,0x06,    0xfe,0x7f,0x7f,0x81
1880 .byte   0xa0,0x50,0x50,0xf0,    0x78,0x3c,0x3c,0x44
1881 .byte   0x25,0x9f,0x9f,0xba,    0x4b,0xa8,0xa8,0xe3
1882 .byte   0xa2,0x51,0x51,0xf3,    0x5d,0xa3,0xa3,0xfe
1883 .byte   0x80,0x40,0x40,0xc0,    0x05,0x8f,0x8f,0x8a
1884 .byte   0x3f,0x92,0x92,0xad,    0x21,0x9d,0x9d,0xbc
1885 .byte   0x70,0x38,0x38,0x48,    0xf1,0xf5,0xf5,0x04
1886 .byte   0x63,0xbc,0xbc,0xdf,    0x77,0xb6,0xb6,0xc1
1887 .byte   0xaf,0xda,0xda,0x75,    0x42,0x21,0x21,0x63
1888 .byte   0x20,0x10,0x10,0x30,    0xe5,0xff,0xff,0x1a
1889 .byte   0xfd,0xf3,0xf3,0x0e,    0xbf,0xd2,0xd2,0x6d
1890 .byte   0x81,0xcd,0xcd,0x4c,    0x18,0x0c,0x0c,0x14
1891 .byte   0x26,0x13,0x13,0x35,    0xc3,0xec,0xec,0x2f
1892 .byte   0xbe,0x5f,0x5f,0xe1,    0x35,0x97,0x97,0xa2
1893 .byte   0x88,0x44,0x44,0xcc,    0x2e,0x17,0x17,0x39
1894 .byte   0x93,0xc4,0xc4,0x57,    0x55,0xa7,0xa7,0xf2
1895 .byte   0xfc,0x7e,0x7e,0x82,    0x7a,0x3d,0x3d,0x47
1896 .byte   0xc8,0x64,0x64,0xac,    0xba,0x5d,0x5d,0xe7
1897 .byte   0x32,0x19,0x19,0x2b,    0xe6,0x73,0x73,0x95
1898 .byte   0xc0,0x60,0x60,0xa0,    0x19,0x81,0x81,0x98
1899 .byte   0x9e,0x4f,0x4f,0xd1,    0xa3,0xdc,0xdc,0x7f
1900 .byte   0x44,0x22,0x22,0x66,    0x54,0x2a,0x2a,0x7e
1901 .byte   0x3b,0x90,0x90,0xab,    0x0b,0x88,0x88,0x83
1902 .byte   0x8c,0x46,0x46,0xca,    0xc7,0xee,0xee,0x29
1903 .byte   0x6b,0xb8,0xb8,0xd3,    0x28,0x14,0x14,0x3c
1904 .byte   0xa7,0xde,0xde,0x79,    0xbc,0x5e,0x5e,0xe2
1905 .byte   0x16,0x0b,0x0b,0x1d,    0xad,0xdb,0xdb,0x76
1906 .byte   0xdb,0xe0,0xe0,0x3b,    0x64,0x32,0x32,0x56
1907 .byte   0x74,0x3a,0x3a,0x4e,    0x14,0x0a,0x0a,0x1e
1908 .byte   0x92,0x49,0x49,0xdb,    0x0c,0x06,0x06,0x0a
1909 .byte   0x48,0x24,0x24,0x6c,    0xb8,0x5c,0x5c,0xe4
1910 .byte   0x9f,0xc2,0xc2,0x5d,    0xbd,0xd3,0xd3,0x6e
1911 .byte   0x43,0xac,0xac,0xef,    0xc4,0x62,0x62,0xa6
1912 .byte   0x39,0x91,0x91,0xa8,    0x31,0x95,0x95,0xa4
1913 .byte   0xd3,0xe4,0xe4,0x37,    0xf2,0x79,0x79,0x8b
1914 .byte   0xd5,0xe7,0xe7,0x32,    0x8b,0xc8,0xc8,0x43
1915 .byte   0x6e,0x37,0x37,0x59,    0xda,0x6d,0x6d,0xb7
1916 .byte   0x01,0x8d,0x8d,0x8c,    0xb1,0xd5,0xd5,0x64
1917 .byte   0x9c,0x4e,0x4e,0xd2,    0x49,0xa9,0xa9,0xe0
1918 .byte   0xd8,0x6c,0x6c,0xb4,    0xac,0x56,0x56,0xfa
1919 .byte   0xf3,0xf4,0xf4,0x07,    0xcf,0xea,0xea,0x25
1920 .byte   0xca,0x65,0x65,0xaf,    0xf4,0x7a,0x7a,0x8e
1921 .byte   0x47,0xae,0xae,0xe9,    0x10,0x08,0x08,0x18
1922 .byte   0x6f,0xba,0xba,0xd5,    0xf0,0x78,0x78,0x88
1923 .byte   0x4a,0x25,0x25,0x6f,    0x5c,0x2e,0x2e,0x72
1924 .byte   0x38,0x1c,0x1c,0x24,    0x57,0xa6,0xa6,0xf1
1925 .byte   0x73,0xb4,0xb4,0xc7,    0x97,0xc6,0xc6,0x51
1926 .byte   0xcb,0xe8,0xe8,0x23,    0xa1,0xdd,0xdd,0x7c
1927 .byte   0xe8,0x74,0x74,0x9c,    0x3e,0x1f,0x1f,0x21
1928 .byte   0x96,0x4b,0x4b,0xdd,    0x61,0xbd,0xbd,0xdc
1929 .byte   0x0d,0x8b,0x8b,0x86,    0x0f,0x8a,0x8a,0x85
1930 .byte   0xe0,0x70,0x70,0x90,    0x7c,0x3e,0x3e,0x42
1931 .byte   0x71,0xb5,0xb5,0xc4,    0xcc,0x66,0x66,0xaa
1932 .byte   0x90,0x48,0x48,0xd8,    0x06,0x03,0x03,0x05
1933 .byte   0xf7,0xf6,0xf6,0x01,    0x1c,0x0e,0x0e,0x12
1934 .byte   0xc2,0x61,0x61,0xa3,    0x6a,0x35,0x35,0x5f
1935 .byte   0xae,0x57,0x57,0xf9,    0x69,0xb9,0xb9,0xd0
1936 .byte   0x17,0x86,0x86,0x91,    0x99,0xc1,0xc1,0x58
1937 .byte   0x3a,0x1d,0x1d,0x27,    0x27,0x9e,0x9e,0xb9
1938 .byte   0xd9,0xe1,0xe1,0x38,    0xeb,0xf8,0xf8,0x13
1939 .byte   0x2b,0x98,0x98,0xb3,    0x22,0x11,0x11,0x33
1940 .byte   0xd2,0x69,0x69,0xbb,    0xa9,0xd9,0xd9,0x70
1941 .byte   0x07,0x8e,0x8e,0x89,    0x33,0x94,0x94,0xa7
1942 .byte   0x2d,0x9b,0x9b,0xb6,    0x3c,0x1e,0x1e,0x22
1943 .byte   0x15,0x87,0x87,0x92,    0xc9,0xe9,0xe9,0x20
1944 .byte   0x87,0xce,0xce,0x49,    0xaa,0x55,0x55,0xff
1945 .byte   0x50,0x28,0x28,0x78,    0xa5,0xdf,0xdf,0x7a
1946 .byte   0x03,0x8c,0x8c,0x8f,    0x59,0xa1,0xa1,0xf8
1947 .byte   0x09,0x89,0x89,0x80,    0x1a,0x0d,0x0d,0x17
1948 .byte   0x65,0xbf,0xbf,0xda,    0xd7,0xe6,0xe6,0x31
1949 .byte   0x84,0x42,0x42,0xc6,    0xd0,0x68,0x68,0xb8
1950 .byte   0x82,0x41,0x41,0xc3,    0x29,0x99,0x99,0xb0
1951 .byte   0x5a,0x2d,0x2d,0x77,    0x1e,0x0f,0x0f,0x11
1952 .byte   0x7b,0xb0,0xb0,0xcb,    0xa8,0x54,0x54,0xfc
1953 .byte   0x6d,0xbb,0xbb,0xd6,    0x2c,0x16,0x16,0x3a
1954
1955 AES_Td:
1956 .byte   0x51,0xf4,0xa7,0x50,    0x7e,0x41,0x65,0x53     # Td0
1957 .byte   0x1a,0x17,0xa4,0xc3,    0x3a,0x27,0x5e,0x96
1958 .byte   0x3b,0xab,0x6b,0xcb,    0x1f,0x9d,0x45,0xf1
1959 .byte   0xac,0xfa,0x58,0xab,    0x4b,0xe3,0x03,0x93
1960 .byte   0x20,0x30,0xfa,0x55,    0xad,0x76,0x6d,0xf6
1961 .byte   0x88,0xcc,0x76,0x91,    0xf5,0x02,0x4c,0x25
1962 .byte   0x4f,0xe5,0xd7,0xfc,    0xc5,0x2a,0xcb,0xd7
1963 .byte   0x26,0x35,0x44,0x80,    0xb5,0x62,0xa3,0x8f
1964 .byte   0xde,0xb1,0x5a,0x49,    0x25,0xba,0x1b,0x67
1965 .byte   0x45,0xea,0x0e,0x98,    0x5d,0xfe,0xc0,0xe1
1966 .byte   0xc3,0x2f,0x75,0x02,    0x81,0x4c,0xf0,0x12
1967 .byte   0x8d,0x46,0x97,0xa3,    0x6b,0xd3,0xf9,0xc6
1968 .byte   0x03,0x8f,0x5f,0xe7,    0x15,0x92,0x9c,0x95
1969 .byte   0xbf,0x6d,0x7a,0xeb,    0x95,0x52,0x59,0xda
1970 .byte   0xd4,0xbe,0x83,0x2d,    0x58,0x74,0x21,0xd3
1971 .byte   0x49,0xe0,0x69,0x29,    0x8e,0xc9,0xc8,0x44
1972 .byte   0x75,0xc2,0x89,0x6a,    0xf4,0x8e,0x79,0x78
1973 .byte   0x99,0x58,0x3e,0x6b,    0x27,0xb9,0x71,0xdd
1974 .byte   0xbe,0xe1,0x4f,0xb6,    0xf0,0x88,0xad,0x17
1975 .byte   0xc9,0x20,0xac,0x66,    0x7d,0xce,0x3a,0xb4
1976 .byte   0x63,0xdf,0x4a,0x18,    0xe5,0x1a,0x31,0x82
1977 .byte   0x97,0x51,0x33,0x60,    0x62,0x53,0x7f,0x45
1978 .byte   0xb1,0x64,0x77,0xe0,    0xbb,0x6b,0xae,0x84
1979 .byte   0xfe,0x81,0xa0,0x1c,    0xf9,0x08,0x2b,0x94
1980 .byte   0x70,0x48,0x68,0x58,    0x8f,0x45,0xfd,0x19
1981 .byte   0x94,0xde,0x6c,0x87,    0x52,0x7b,0xf8,0xb7
1982 .byte   0xab,0x73,0xd3,0x23,    0x72,0x4b,0x02,0xe2
1983 .byte   0xe3,0x1f,0x8f,0x57,    0x66,0x55,0xab,0x2a
1984 .byte   0xb2,0xeb,0x28,0x07,    0x2f,0xb5,0xc2,0x03
1985 .byte   0x86,0xc5,0x7b,0x9a,    0xd3,0x37,0x08,0xa5
1986 .byte   0x30,0x28,0x87,0xf2,    0x23,0xbf,0xa5,0xb2
1987 .byte   0x02,0x03,0x6a,0xba,    0xed,0x16,0x82,0x5c
1988 .byte   0x8a,0xcf,0x1c,0x2b,    0xa7,0x79,0xb4,0x92
1989 .byte   0xf3,0x07,0xf2,0xf0,    0x4e,0x69,0xe2,0xa1
1990 .byte   0x65,0xda,0xf4,0xcd,    0x06,0x05,0xbe,0xd5
1991 .byte   0xd1,0x34,0x62,0x1f,    0xc4,0xa6,0xfe,0x8a
1992 .byte   0x34,0x2e,0x53,0x9d,    0xa2,0xf3,0x55,0xa0
1993 .byte   0x05,0x8a,0xe1,0x32,    0xa4,0xf6,0xeb,0x75
1994 .byte   0x0b,0x83,0xec,0x39,    0x40,0x60,0xef,0xaa
1995 .byte   0x5e,0x71,0x9f,0x06,    0xbd,0x6e,0x10,0x51
1996 .byte   0x3e,0x21,0x8a,0xf9,    0x96,0xdd,0x06,0x3d
1997 .byte   0xdd,0x3e,0x05,0xae,    0x4d,0xe6,0xbd,0x46
1998 .byte   0x91,0x54,0x8d,0xb5,    0x71,0xc4,0x5d,0x05
1999 .byte   0x04,0x06,0xd4,0x6f,    0x60,0x50,0x15,0xff
2000 .byte   0x19,0x98,0xfb,0x24,    0xd6,0xbd,0xe9,0x97
2001 .byte   0x89,0x40,0x43,0xcc,    0x67,0xd9,0x9e,0x77
2002 .byte   0xb0,0xe8,0x42,0xbd,    0x07,0x89,0x8b,0x88
2003 .byte   0xe7,0x19,0x5b,0x38,    0x79,0xc8,0xee,0xdb
2004 .byte   0xa1,0x7c,0x0a,0x47,    0x7c,0x42,0x0f,0xe9
2005 .byte   0xf8,0x84,0x1e,0xc9,    0x00,0x00,0x00,0x00
2006 .byte   0x09,0x80,0x86,0x83,    0x32,0x2b,0xed,0x48
2007 .byte   0x1e,0x11,0x70,0xac,    0x6c,0x5a,0x72,0x4e
2008 .byte   0xfd,0x0e,0xff,0xfb,    0x0f,0x85,0x38,0x56
2009 .byte   0x3d,0xae,0xd5,0x1e,    0x36,0x2d,0x39,0x27
2010 .byte   0x0a,0x0f,0xd9,0x64,    0x68,0x5c,0xa6,0x21
2011 .byte   0x9b,0x5b,0x54,0xd1,    0x24,0x36,0x2e,0x3a
2012 .byte   0x0c,0x0a,0x67,0xb1,    0x93,0x57,0xe7,0x0f
2013 .byte   0xb4,0xee,0x96,0xd2,    0x1b,0x9b,0x91,0x9e
2014 .byte   0x80,0xc0,0xc5,0x4f,    0x61,0xdc,0x20,0xa2
2015 .byte   0x5a,0x77,0x4b,0x69,    0x1c,0x12,0x1a,0x16
2016 .byte   0xe2,0x93,0xba,0x0a,    0xc0,0xa0,0x2a,0xe5
2017 .byte   0x3c,0x22,0xe0,0x43,    0x12,0x1b,0x17,0x1d
2018 .byte   0x0e,0x09,0x0d,0x0b,    0xf2,0x8b,0xc7,0xad
2019 .byte   0x2d,0xb6,0xa8,0xb9,    0x14,0x1e,0xa9,0xc8
2020 .byte   0x57,0xf1,0x19,0x85,    0xaf,0x75,0x07,0x4c
2021 .byte   0xee,0x99,0xdd,0xbb,    0xa3,0x7f,0x60,0xfd
2022 .byte   0xf7,0x01,0x26,0x9f,    0x5c,0x72,0xf5,0xbc
2023 .byte   0x44,0x66,0x3b,0xc5,    0x5b,0xfb,0x7e,0x34
2024 .byte   0x8b,0x43,0x29,0x76,    0xcb,0x23,0xc6,0xdc
2025 .byte   0xb6,0xed,0xfc,0x68,    0xb8,0xe4,0xf1,0x63
2026 .byte   0xd7,0x31,0xdc,0xca,    0x42,0x63,0x85,0x10
2027 .byte   0x13,0x97,0x22,0x40,    0x84,0xc6,0x11,0x20
2028 .byte   0x85,0x4a,0x24,0x7d,    0xd2,0xbb,0x3d,0xf8
2029 .byte   0xae,0xf9,0x32,0x11,    0xc7,0x29,0xa1,0x6d
2030 .byte   0x1d,0x9e,0x2f,0x4b,    0xdc,0xb2,0x30,0xf3
2031 .byte   0x0d,0x86,0x52,0xec,    0x77,0xc1,0xe3,0xd0
2032 .byte   0x2b,0xb3,0x16,0x6c,    0xa9,0x70,0xb9,0x99
2033 .byte   0x11,0x94,0x48,0xfa,    0x47,0xe9,0x64,0x22
2034 .byte   0xa8,0xfc,0x8c,0xc4,    0xa0,0xf0,0x3f,0x1a
2035 .byte   0x56,0x7d,0x2c,0xd8,    0x22,0x33,0x90,0xef
2036 .byte   0x87,0x49,0x4e,0xc7,    0xd9,0x38,0xd1,0xc1
2037 .byte   0x8c,0xca,0xa2,0xfe,    0x98,0xd4,0x0b,0x36
2038 .byte   0xa6,0xf5,0x81,0xcf,    0xa5,0x7a,0xde,0x28
2039 .byte   0xda,0xb7,0x8e,0x26,    0x3f,0xad,0xbf,0xa4
2040 .byte   0x2c,0x3a,0x9d,0xe4,    0x50,0x78,0x92,0x0d
2041 .byte   0x6a,0x5f,0xcc,0x9b,    0x54,0x7e,0x46,0x62
2042 .byte   0xf6,0x8d,0x13,0xc2,    0x90,0xd8,0xb8,0xe8
2043 .byte   0x2e,0x39,0xf7,0x5e,    0x82,0xc3,0xaf,0xf5
2044 .byte   0x9f,0x5d,0x80,0xbe,    0x69,0xd0,0x93,0x7c
2045 .byte   0x6f,0xd5,0x2d,0xa9,    0xcf,0x25,0x12,0xb3
2046 .byte   0xc8,0xac,0x99,0x3b,    0x10,0x18,0x7d,0xa7
2047 .byte   0xe8,0x9c,0x63,0x6e,    0xdb,0x3b,0xbb,0x7b
2048 .byte   0xcd,0x26,0x78,0x09,    0x6e,0x59,0x18,0xf4
2049 .byte   0xec,0x9a,0xb7,0x01,    0x83,0x4f,0x9a,0xa8
2050 .byte   0xe6,0x95,0x6e,0x65,    0xaa,0xff,0xe6,0x7e
2051 .byte   0x21,0xbc,0xcf,0x08,    0xef,0x15,0xe8,0xe6
2052 .byte   0xba,0xe7,0x9b,0xd9,    0x4a,0x6f,0x36,0xce
2053 .byte   0xea,0x9f,0x09,0xd4,    0x29,0xb0,0x7c,0xd6
2054 .byte   0x31,0xa4,0xb2,0xaf,    0x2a,0x3f,0x23,0x31
2055 .byte   0xc6,0xa5,0x94,0x30,    0x35,0xa2,0x66,0xc0
2056 .byte   0x74,0x4e,0xbc,0x37,    0xfc,0x82,0xca,0xa6
2057 .byte   0xe0,0x90,0xd0,0xb0,    0x33,0xa7,0xd8,0x15
2058 .byte   0xf1,0x04,0x98,0x4a,    0x41,0xec,0xda,0xf7
2059 .byte   0x7f,0xcd,0x50,0x0e,    0x17,0x91,0xf6,0x2f
2060 .byte   0x76,0x4d,0xd6,0x8d,    0x43,0xef,0xb0,0x4d
2061 .byte   0xcc,0xaa,0x4d,0x54,    0xe4,0x96,0x04,0xdf
2062 .byte   0x9e,0xd1,0xb5,0xe3,    0x4c,0x6a,0x88,0x1b
2063 .byte   0xc1,0x2c,0x1f,0xb8,    0x46,0x65,0x51,0x7f
2064 .byte   0x9d,0x5e,0xea,0x04,    0x01,0x8c,0x35,0x5d
2065 .byte   0xfa,0x87,0x74,0x73,    0xfb,0x0b,0x41,0x2e
2066 .byte   0xb3,0x67,0x1d,0x5a,    0x92,0xdb,0xd2,0x52
2067 .byte   0xe9,0x10,0x56,0x33,    0x6d,0xd6,0x47,0x13
2068 .byte   0x9a,0xd7,0x61,0x8c,    0x37,0xa1,0x0c,0x7a
2069 .byte   0x59,0xf8,0x14,0x8e,    0xeb,0x13,0x3c,0x89
2070 .byte   0xce,0xa9,0x27,0xee,    0xb7,0x61,0xc9,0x35
2071 .byte   0xe1,0x1c,0xe5,0xed,    0x7a,0x47,0xb1,0x3c
2072 .byte   0x9c,0xd2,0xdf,0x59,    0x55,0xf2,0x73,0x3f
2073 .byte   0x18,0x14,0xce,0x79,    0x73,0xc7,0x37,0xbf
2074 .byte   0x53,0xf7,0xcd,0xea,    0x5f,0xfd,0xaa,0x5b
2075 .byte   0xdf,0x3d,0x6f,0x14,    0x78,0x44,0xdb,0x86
2076 .byte   0xca,0xaf,0xf3,0x81,    0xb9,0x68,0xc4,0x3e
2077 .byte   0x38,0x24,0x34,0x2c,    0xc2,0xa3,0x40,0x5f
2078 .byte   0x16,0x1d,0xc3,0x72,    0xbc,0xe2,0x25,0x0c
2079 .byte   0x28,0x3c,0x49,0x8b,    0xff,0x0d,0x95,0x41
2080 .byte   0x39,0xa8,0x01,0x71,    0x08,0x0c,0xb3,0xde
2081 .byte   0xd8,0xb4,0xe4,0x9c,    0x64,0x56,0xc1,0x90
2082 .byte   0x7b,0xcb,0x84,0x61,    0xd5,0x32,0xb6,0x70
2083 .byte   0x48,0x6c,0x5c,0x74,    0xd0,0xb8,0x57,0x42
2084
2085 .byte   0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38  # Td4
2086 .byte   0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
2087 .byte   0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
2088 .byte   0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
2089 .byte   0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
2090 .byte   0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
2091 .byte   0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
2092 .byte   0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
2093 .byte   0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
2094 .byte   0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
2095 .byte   0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
2096 .byte   0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
2097 .byte   0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
2098 .byte   0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
2099 .byte   0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
2100 .byte   0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
2101 .byte   0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
2102 .byte   0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
2103 .byte   0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
2104 .byte   0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
2105 .byte   0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
2106 .byte   0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
2107 .byte   0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
2108 .byte   0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
2109 .byte   0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
2110 .byte   0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
2111 .byte   0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
2112 .byte   0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
2113 .byte   0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
2114 .byte   0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
2115 .byte   0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
2116 .byte   0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
2117
2118 AES_Te4:
2119 .byte   0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5  # Te4
2120 .byte   0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
2121 .byte   0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
2122 .byte   0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
2123 .byte   0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
2124 .byte   0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
2125 .byte   0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
2126 .byte   0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
2127 .byte   0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
2128 .byte   0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
2129 .byte   0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
2130 .byte   0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
2131 .byte   0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
2132 .byte   0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
2133 .byte   0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
2134 .byte   0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
2135 .byte   0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
2136 .byte   0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
2137 .byte   0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
2138 .byte   0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
2139 .byte   0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
2140 .byte   0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
2141 .byte   0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
2142 .byte   0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
2143 .byte   0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
2144 .byte   0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
2145 .byte   0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
2146 .byte   0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
2147 .byte   0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
2148 .byte   0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
2149 .byte   0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
2150 .byte   0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
2151
2152 .byte   0x01,0x00,0x00,0x00,    0x02,0x00,0x00,0x00     # rcon
2153 .byte   0x04,0x00,0x00,0x00,    0x08,0x00,0x00,0x00
2154 .byte   0x10,0x00,0x00,0x00,    0x20,0x00,0x00,0x00
2155 .byte   0x40,0x00,0x00,0x00,    0x80,0x00,0x00,0x00
2156 .byte   0x1B,0x00,0x00,0x00,    0x36,0x00,0x00,0x00
2157 ___
2158 \f
2159 foreach (split("\n",$code)) {
2160         s/\`([^\`]*)\`/eval $1/ge;
2161
2162         # made-up _instructions, _xtr, _ins, _ror and _bias, cope
2163         # with byte order dependencies...
2164         if (/^\s+_/) {
2165             s/(_[a-z]+\s+)(\$[0-9]+),([^,]+)(#.*)*$/$1$2,$2,$3/;
2166
2167             s/_xtr\s+(\$[0-9]+),(\$[0-9]+),([0-9]+(\-2)*)/
2168                 sprintf("srl\t$1,$2,%d",$big_endian ?   eval($3)
2169                                         :               eval("24-$3"))/e or
2170             s/_ins\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
2171                 sprintf("sll\t$1,$2,%d",$big_endian ?   eval($3)
2172                                         :               eval("24-$3"))/e or
2173             s/_ins2\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
2174                 sprintf("ins\t$1,$2,%d,8",$big_endian ? eval($3)
2175                                         :               eval("24-$3"))/e or
2176             s/_ror\s+(\$[0-9]+),(\$[0-9]+),(\-?[0-9]+)/
2177                 sprintf("srl\t$1,$2,%d",$big_endian ?   eval($3)
2178                                         :               eval("$3*-1"))/e or
2179             s/_bias\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
2180                 sprintf("sll\t$1,$2,%d",$big_endian ?   eval($3)
2181                                         :               eval("($3-16)&31"))/e;
2182
2183             s/srl\s+(\$[0-9]+),(\$[0-9]+),\-([0-9]+)/
2184                 sprintf("sll\t$1,$2,$3")/e                              or
2185             s/srl\s+(\$[0-9]+),(\$[0-9]+),0/
2186                 sprintf("and\t$1,$2,0xff")/e                            or
2187             s/(sll\s+\$[0-9]+,\$[0-9]+,0)/#$1/;
2188         }
2189
2190         # convert lwl/lwr and swr/swl to little-endian order
2191         if (!$big_endian && /^\s+[sl]w[lr]\s+/) {
2192             s/([sl]wl.*)([0-9]+)\((\$[0-9]+)\)/
2193                 sprintf("$1%d($3)",eval("$2-$2%4+($2%4-1)&3"))/e        or
2194             s/([sl]wr.*)([0-9]+)\((\$[0-9]+)\)/
2195                 sprintf("$1%d($3)",eval("$2-$2%4+($2%4+1)&3"))/e;
2196         }
2197
2198         if (!$big_endian) {
2199             s/(rotr\s+\$[0-9]+,\$[0-9]+),([0-9]+)/sprintf("$1,%d",32-$2)/e;
2200             s/(ext\s+\$[0-9]+,\$[0-9]+),([0-9]+),8/sprintf("$1,%d,8",24-$2)/e;
2201         }
2202
2203         print $_,"\n";
2204 }
2205
2206 close STDOUT;