MIPS assembly pack: fix MIPS64 assembler warnings.
[openssl.git] / crypto / aes / asm / aes-mips.pl
1 #!/usr/bin/env perl
2
3 # ====================================================================
4 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9
10 # AES for MIPS
11
12 # October 2010
13 #
14 # Code uses 1K[+256B] S-box and on single-issue core [such as R5000]
15 # spends ~68 cycles per byte processed with 128-bit key. This is ~16%
16 # faster than gcc-generated code, which is not very impressive. But
17 # recall that compressed S-box requires extra processing, namely
18 # additional rotations. Rotations are implemented with lwl/lwr pairs,
19 # which is normally used for loading unaligned data. Another cool
20 # thing about this module is its endian neutrality, which means that
21 # it processes data without ever changing byte order...
22
23 # September 2012
24 #
25 # Add MIPS32R2 (~10% less instructions) and SmartMIPS ASE (further
26 # ~25% less instructions) code. Note that there is no run-time switch,
27 # instead, code path is chosen upon pre-process time, pass -mips32r2
28 # or/and -msmartmips.
29
30 ######################################################################
31 # There is a number of MIPS ABI in use, O32 and N32/64 are most
32 # widely used. Then there is a new contender: NUBI. It appears that if
33 # one picks the latter, it's possible to arrange code in ABI neutral
34 # manner. Therefore let's stick to NUBI register layout:
35 #
36 ($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
37 ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
38 ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
39 ($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
40 #
41 # The return value is placed in $a0. Following coding rules facilitate
42 # interoperability:
43 #
44 # - never ever touch $tp, "thread pointer", former $gp;
45 # - copy return value to $t0, former $v0 [or to $a0 if you're adapting
46 #   old code];
47 # - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
48 #
49 # For reference here is register layout for N32/64 MIPS ABIs:
50 #
51 # ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
52 # ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
53 # ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
54 # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
55 # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
56 #
57 $flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
58
59 if ($flavour =~ /64|n32/i) {
60         $PTR_LA="dla";
61         $PTR_ADD="dadd";        # incidentally works even on n32
62         $PTR_SUB="dsub";        # incidentally works even on n32
63         $PTR_INS="dins";
64         $REG_S="sd";
65         $REG_L="ld";
66         $PTR_SLL="dsll";        # incidentally works even on n32
67         $SZREG=8;
68 } else {
69         $PTR_LA="la";
70         $PTR_ADD="add";
71         $PTR_SUB="sub";
72         $PTR_INS="ins";
73         $REG_S="sw";
74         $REG_L="lw";
75         $PTR_SLL="sll";
76         $SZREG=4;
77 }
78 $pf = ($flavour =~ /nubi/i) ? $t0 : $t2;
79 #
80 # <appro@openssl.org>
81 #
82 ######################################################################
83
84 $big_endian=(`echo MIPSEL | $ENV{CC} -E -`=~/MIPSEL/)?1:0 if ($ENV{CC});
85
86 for (@ARGV) {   $output=$_ if (/\w[\w\-]*\.\w+$/);      }
87 open STDOUT,">$output";
88
89 if (!defined($big_endian))
90 {    $big_endian=(unpack('L',pack('N',1))==1);   }
91
92 while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
93 open STDOUT,">$output";
94
95 my ($MSB,$LSB)=(0,3);   # automatically converted to little-endian
96
97 $code.=<<___;
98 .text
99 #ifdef OPENSSL_FIPSCANISTER
100 # include <openssl/fipssyms.h>
101 #endif
102
103 #if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2)
104 #define _MIPS_ARCH_MIPS32R2
105 #endif
106
107 #if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__))
108 .option pic2
109 #endif
110 .set    noat
111 ___
112 \f
113 {{{
114 my $FRAMESIZE=16*$SZREG;
115 my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc0fff008" : "0xc0ff0000";
116
117 my ($inp,$out,$key,$Tbl,$s0,$s1,$s2,$s3)=($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7);
118 my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
119 my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$t9,$t10,$t11) = map("\$$_",(12..23));
120 my ($key0,$cnt)=($gp,$fp);
121
122 # instuction ordering is "stolen" from output from MIPSpro assembler
123 # invoked with -mips3 -O3 arguments...
124 $code.=<<___;
125 .align  5
126 .ent    _mips_AES_encrypt
127 _mips_AES_encrypt:
128         .frame  $sp,0,$ra
129         .set    reorder
130         lw      $t0,0($key)
131         lw      $t1,4($key)
132         lw      $t2,8($key)
133         lw      $t3,12($key)
134         lw      $cnt,240($key)
135         $PTR_ADD $key0,$key,16
136
137         xor     $s0,$t0
138         xor     $s1,$t1
139         xor     $s2,$t2
140         xor     $s3,$t3
141
142         sub     $cnt,1
143 #if defined(__mips_smartmips)
144         ext     $i0,$s1,16,8
145 .Loop_enc:
146         ext     $i1,$s2,16,8
147         ext     $i2,$s3,16,8
148         ext     $i3,$s0,16,8
149         lwxs    $t0,$i0($Tbl)           # Te1[s1>>16]
150         ext     $i0,$s2,8,8
151         lwxs    $t1,$i1($Tbl)           # Te1[s2>>16]
152         ext     $i1,$s3,8,8
153         lwxs    $t2,$i2($Tbl)           # Te1[s3>>16]
154         ext     $i2,$s0,8,8
155         lwxs    $t3,$i3($Tbl)           # Te1[s0>>16]
156         ext     $i3,$s1,8,8
157
158         lwxs    $t4,$i0($Tbl)           # Te2[s2>>8]
159         ext     $i0,$s3,0,8
160         lwxs    $t5,$i1($Tbl)           # Te2[s3>>8]
161         ext     $i1,$s0,0,8
162         lwxs    $t6,$i2($Tbl)           # Te2[s0>>8]
163         ext     $i2,$s1,0,8
164         lwxs    $t7,$i3($Tbl)           # Te2[s1>>8]
165         ext     $i3,$s2,0,8
166
167         lwxs    $t8,$i0($Tbl)           # Te3[s3]
168         ext     $i0,$s0,24,8
169         lwxs    $t9,$i1($Tbl)           # Te3[s0]
170         ext     $i1,$s1,24,8
171         lwxs    $t10,$i2($Tbl)          # Te3[s1]
172         ext     $i2,$s2,24,8
173         lwxs    $t11,$i3($Tbl)          # Te3[s2]
174         ext     $i3,$s3,24,8
175
176         rotr    $t0,$t0,8
177         rotr    $t1,$t1,8
178         rotr    $t2,$t2,8
179         rotr    $t3,$t3,8
180
181         rotr    $t4,$t4,16
182         rotr    $t5,$t5,16
183         rotr    $t6,$t6,16
184         rotr    $t7,$t7,16
185
186         xor     $t0,$t4
187         lwxs    $t4,$i0($Tbl)           # Te0[s0>>24]
188         xor     $t1,$t5
189         lwxs    $t5,$i1($Tbl)           # Te0[s1>>24]
190         xor     $t2,$t6
191         lwxs    $t6,$i2($Tbl)           # Te0[s2>>24]
192         xor     $t3,$t7
193         lwxs    $t7,$i3($Tbl)           # Te0[s3>>24]
194
195         rotr    $t8,$t8,24
196         lw      $s0,0($key0)
197         rotr    $t9,$t9,24
198         lw      $s1,4($key0)
199         rotr    $t10,$t10,24
200         lw      $s2,8($key0)
201         rotr    $t11,$t11,24
202         lw      $s3,12($key0)
203
204         xor     $t0,$t8
205         xor     $t1,$t9
206         xor     $t2,$t10
207         xor     $t3,$t11
208
209         xor     $t0,$t4
210         xor     $t1,$t5
211         xor     $t2,$t6
212         xor     $t3,$t7
213
214         sub     $cnt,1
215         $PTR_ADD $key0,16
216         xor     $s0,$t0
217         xor     $s1,$t1
218         xor     $s2,$t2
219         xor     $s3,$t3
220         .set    noreorder
221         bnez    $cnt,.Loop_enc
222         ext     $i0,$s1,16,8
223
224         _xtr    $i0,$s1,16-2
225 #else
226         _xtr    $i0,$s1,16-2
227 .Loop_enc:
228         _xtr    $i1,$s2,16-2
229         _xtr    $i2,$s3,16-2
230         _xtr    $i3,$s0,16-2
231         and     $i0,0x3fc
232         and     $i1,0x3fc
233         and     $i2,0x3fc
234         and     $i3,0x3fc
235         $PTR_ADD $i0,$Tbl
236         $PTR_ADD $i1,$Tbl
237         $PTR_ADD $i2,$Tbl
238         $PTR_ADD $i3,$Tbl
239 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
240         lw      $t0,0($i0)              # Te1[s1>>16]
241         _xtr    $i0,$s2,8-2
242         lw      $t1,0($i1)              # Te1[s2>>16]
243         _xtr    $i1,$s3,8-2
244         lw      $t2,0($i2)              # Te1[s3>>16]
245         _xtr    $i2,$s0,8-2
246         lw      $t3,0($i3)              # Te1[s0>>16]
247         _xtr    $i3,$s1,8-2
248 #else
249         lwl     $t0,3($i0)              # Te1[s1>>16]
250         lwl     $t1,3($i1)              # Te1[s2>>16]
251         lwl     $t2,3($i2)              # Te1[s3>>16]
252         lwl     $t3,3($i3)              # Te1[s0>>16]
253         lwr     $t0,2($i0)              # Te1[s1>>16]
254         _xtr    $i0,$s2,8-2
255         lwr     $t1,2($i1)              # Te1[s2>>16]
256         _xtr    $i1,$s3,8-2
257         lwr     $t2,2($i2)              # Te1[s3>>16]
258         _xtr    $i2,$s0,8-2
259         lwr     $t3,2($i3)              # Te1[s0>>16]
260         _xtr    $i3,$s1,8-2
261 #endif
262         and     $i0,0x3fc
263         and     $i1,0x3fc
264         and     $i2,0x3fc
265         and     $i3,0x3fc
266         $PTR_ADD $i0,$Tbl
267         $PTR_ADD $i1,$Tbl
268         $PTR_ADD $i2,$Tbl
269         $PTR_ADD $i3,$Tbl
270 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
271         rotr    $t0,$t0,8
272         rotr    $t1,$t1,8
273         rotr    $t2,$t2,8
274         rotr    $t3,$t3,8
275 # if defined(_MIPSEL)
276         lw      $t4,0($i0)              # Te2[s2>>8]
277         _xtr    $i0,$s3,0-2
278         lw      $t5,0($i1)              # Te2[s3>>8]
279         _xtr    $i1,$s0,0-2
280         lw      $t6,0($i2)              # Te2[s0>>8]
281         _xtr    $i2,$s1,0-2
282         lw      $t7,0($i3)              # Te2[s1>>8]
283         _xtr    $i3,$s2,0-2
284
285         and     $i0,0x3fc
286         and     $i1,0x3fc
287         and     $i2,0x3fc
288         and     $i3,0x3fc
289         $PTR_ADD $i0,$Tbl
290         $PTR_ADD $i1,$Tbl
291         $PTR_ADD $i2,$Tbl
292         $PTR_ADD $i3,$Tbl
293         lw      $t8,0($i0)              # Te3[s3]
294         $PTR_INS $i0,$s0,2,8
295         lw      $t9,0($i1)              # Te3[s0]
296         $PTR_INS $i1,$s1,2,8
297         lw      $t10,0($i2)             # Te3[s1]
298         $PTR_INS $i2,$s2,2,8
299         lw      $t11,0($i3)             # Te3[s2]
300         $PTR_INS $i3,$s3,2,8
301 # else
302         lw      $t4,0($i0)              # Te2[s2>>8]
303         $PTR_INS $i0,$s3,2,8
304         lw      $t5,0($i1)              # Te2[s3>>8]
305         $PTR_INS $i1,$s0,2,8
306         lw      $t6,0($i2)              # Te2[s0>>8]
307         $PTR_INS $i2,$s1,2,8
308         lw      $t7,0($i3)              # Te2[s1>>8]
309         $PTR_INS $i3,$s2,2,8
310
311         lw      $t8,0($i0)              # Te3[s3]
312         _xtr    $i0,$s0,24-2
313         lw      $t9,0($i1)              # Te3[s0]
314         _xtr    $i1,$s1,24-2
315         lw      $t10,0($i2)             # Te3[s1]
316         _xtr    $i2,$s2,24-2
317         lw      $t11,0($i3)             # Te3[s2]
318         _xtr    $i3,$s3,24-2
319
320         and     $i0,0x3fc
321         and     $i1,0x3fc
322         and     $i2,0x3fc
323         and     $i3,0x3fc
324         $PTR_ADD $i0,$Tbl
325         $PTR_ADD $i1,$Tbl
326         $PTR_ADD $i2,$Tbl
327         $PTR_ADD $i3,$Tbl
328 # endif
329         rotr    $t4,$t4,16
330         rotr    $t5,$t5,16
331         rotr    $t6,$t6,16
332         rotr    $t7,$t7,16
333
334         rotr    $t8,$t8,24
335         rotr    $t9,$t9,24
336         rotr    $t10,$t10,24
337         rotr    $t11,$t11,24
338 #else
339         lwl     $t4,2($i0)              # Te2[s2>>8]
340         lwl     $t5,2($i1)              # Te2[s3>>8]
341         lwl     $t6,2($i2)              # Te2[s0>>8]
342         lwl     $t7,2($i3)              # Te2[s1>>8]
343         lwr     $t4,1($i0)              # Te2[s2>>8]
344         _xtr    $i0,$s3,0-2
345         lwr     $t5,1($i1)              # Te2[s3>>8]
346         _xtr    $i1,$s0,0-2
347         lwr     $t6,1($i2)              # Te2[s0>>8]
348         _xtr    $i2,$s1,0-2
349         lwr     $t7,1($i3)              # Te2[s1>>8]
350         _xtr    $i3,$s2,0-2
351
352         and     $i0,0x3fc
353         and     $i1,0x3fc
354         and     $i2,0x3fc
355         and     $i3,0x3fc
356         $PTR_ADD $i0,$Tbl
357         $PTR_ADD $i1,$Tbl
358         $PTR_ADD $i2,$Tbl
359         $PTR_ADD $i3,$Tbl
360         lwl     $t8,1($i0)              # Te3[s3]
361         lwl     $t9,1($i1)              # Te3[s0]
362         lwl     $t10,1($i2)             # Te3[s1]
363         lwl     $t11,1($i3)             # Te3[s2]
364         lwr     $t8,0($i0)              # Te3[s3]
365         _xtr    $i0,$s0,24-2
366         lwr     $t9,0($i1)              # Te3[s0]
367         _xtr    $i1,$s1,24-2
368         lwr     $t10,0($i2)             # Te3[s1]
369         _xtr    $i2,$s2,24-2
370         lwr     $t11,0($i3)             # Te3[s2]
371         _xtr    $i3,$s3,24-2
372
373         and     $i0,0x3fc
374         and     $i1,0x3fc
375         and     $i2,0x3fc
376         and     $i3,0x3fc
377         $PTR_ADD $i0,$Tbl
378         $PTR_ADD $i1,$Tbl
379         $PTR_ADD $i2,$Tbl
380         $PTR_ADD $i3,$Tbl
381 #endif
382         xor     $t0,$t4
383         lw      $t4,0($i0)              # Te0[s0>>24]
384         xor     $t1,$t5
385         lw      $t5,0($i1)              # Te0[s1>>24]
386         xor     $t2,$t6
387         lw      $t6,0($i2)              # Te0[s2>>24]
388         xor     $t3,$t7
389         lw      $t7,0($i3)              # Te0[s3>>24]
390
391         xor     $t0,$t8
392         lw      $s0,0($key0)
393         xor     $t1,$t9
394         lw      $s1,4($key0)
395         xor     $t2,$t10
396         lw      $s2,8($key0)
397         xor     $t3,$t11
398         lw      $s3,12($key0)
399
400         xor     $t0,$t4
401         xor     $t1,$t5
402         xor     $t2,$t6
403         xor     $t3,$t7
404
405         sub     $cnt,1
406         $PTR_ADD $key0,16
407         xor     $s0,$t0
408         xor     $s1,$t1
409         xor     $s2,$t2
410         xor     $s3,$t3
411         .set    noreorder
412         bnez    $cnt,.Loop_enc
413         _xtr    $i0,$s1,16-2
414 #endif
415
416         .set    reorder
417         _xtr    $i1,$s2,16-2
418         _xtr    $i2,$s3,16-2
419         _xtr    $i3,$s0,16-2
420         and     $i0,0x3fc
421         and     $i1,0x3fc
422         and     $i2,0x3fc
423         and     $i3,0x3fc
424         $PTR_ADD $i0,$Tbl
425         $PTR_ADD $i1,$Tbl
426         $PTR_ADD $i2,$Tbl
427         $PTR_ADD $i3,$Tbl
428         lbu     $t0,2($i0)              # Te4[s1>>16]
429         _xtr    $i0,$s2,8-2
430         lbu     $t1,2($i1)              # Te4[s2>>16]
431         _xtr    $i1,$s3,8-2
432         lbu     $t2,2($i2)              # Te4[s3>>16]
433         _xtr    $i2,$s0,8-2
434         lbu     $t3,2($i3)              # Te4[s0>>16]
435         _xtr    $i3,$s1,8-2
436
437         and     $i0,0x3fc
438         and     $i1,0x3fc
439         and     $i2,0x3fc
440         and     $i3,0x3fc
441         $PTR_ADD $i0,$Tbl
442         $PTR_ADD $i1,$Tbl
443         $PTR_ADD $i2,$Tbl
444         $PTR_ADD $i3,$Tbl
445 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
446 # if defined(_MIPSEL)
447         lbu     $t4,2($i0)              # Te4[s2>>8]
448         $PTR_INS $i0,$s0,2,8
449         lbu     $t5,2($i1)              # Te4[s3>>8]
450         $PTR_INS $i1,$s1,2,8
451         lbu     $t6,2($i2)              # Te4[s0>>8]
452         $PTR_INS $i2,$s2,2,8
453         lbu     $t7,2($i3)              # Te4[s1>>8]
454         $PTR_INS $i3,$s3,2,8
455
456         lbu     $t8,2($i0)              # Te4[s0>>24]
457         _xtr    $i0,$s3,0-2
458         lbu     $t9,2($i1)              # Te4[s1>>24]
459         _xtr    $i1,$s0,0-2
460         lbu     $t10,2($i2)             # Te4[s2>>24]
461         _xtr    $i2,$s1,0-2
462         lbu     $t11,2($i3)             # Te4[s3>>24]
463         _xtr    $i3,$s2,0-2
464
465         and     $i0,0x3fc
466         and     $i1,0x3fc
467         and     $i2,0x3fc
468         and     $i3,0x3fc
469         $PTR_ADD $i0,$Tbl
470         $PTR_ADD $i1,$Tbl
471         $PTR_ADD $i2,$Tbl
472         $PTR_ADD $i3,$Tbl
473 # else
474         lbu     $t4,2($i0)              # Te4[s2>>8]
475         _xtr    $i0,$s0,24-2
476         lbu     $t5,2($i1)              # Te4[s3>>8]
477         _xtr    $i1,$s1,24-2
478         lbu     $t6,2($i2)              # Te4[s0>>8]
479         _xtr    $i2,$s2,24-2
480         lbu     $t7,2($i3)              # Te4[s1>>8]
481         _xtr    $i3,$s3,24-2
482
483         and     $i0,0x3fc
484         and     $i1,0x3fc
485         and     $i2,0x3fc
486         and     $i3,0x3fc
487         $PTR_ADD $i0,$Tbl
488         $PTR_ADD $i1,$Tbl
489         $PTR_ADD $i2,$Tbl
490         $PTR_ADD $i3,$Tbl
491         lbu     $t8,2($i0)              # Te4[s0>>24]
492         $PTR_INS $i0,$s3,2,8
493         lbu     $t9,2($i1)              # Te4[s1>>24]
494         $PTR_INS $i1,$s0,2,8
495         lbu     $t10,2($i2)             # Te4[s2>>24]
496         $PTR_INS $i2,$s1,2,8
497         lbu     $t11,2($i3)             # Te4[s3>>24]
498         $PTR_INS $i3,$s2,2,8
499 # endif
500         _ins    $t0,16
501         _ins    $t1,16
502         _ins    $t2,16
503         _ins    $t3,16
504
505         _ins2   $t0,$t4,8
506         lbu     $t4,2($i0)              # Te4[s3]
507         _ins2   $t1,$t5,8
508         lbu     $t5,2($i1)              # Te4[s0]
509         _ins2   $t2,$t6,8
510         lbu     $t6,2($i2)              # Te4[s1]
511         _ins2   $t3,$t7,8
512         lbu     $t7,2($i3)              # Te4[s2]
513
514         _ins2   $t0,$t8,24
515         lw      $s0,0($key0)
516         _ins2   $t1,$t9,24
517         lw      $s1,4($key0)
518         _ins2   $t2,$t10,24
519         lw      $s2,8($key0)
520         _ins2   $t3,$t11,24
521         lw      $s3,12($key0)
522
523         _ins2   $t0,$t4,0
524         _ins2   $t1,$t5,0
525         _ins2   $t2,$t6,0
526         _ins2   $t3,$t7,0
527 #else
528         lbu     $t4,2($i0)              # Te4[s2>>8]
529         _xtr    $i0,$s0,24-2
530         lbu     $t5,2($i1)              # Te4[s3>>8]
531         _xtr    $i1,$s1,24-2
532         lbu     $t6,2($i2)              # Te4[s0>>8]
533         _xtr    $i2,$s2,24-2
534         lbu     $t7,2($i3)              # Te4[s1>>8]
535         _xtr    $i3,$s3,24-2
536
537         and     $i0,0x3fc
538         and     $i1,0x3fc
539         and     $i2,0x3fc
540         and     $i3,0x3fc
541         $PTR_ADD $i0,$Tbl
542         $PTR_ADD $i1,$Tbl
543         $PTR_ADD $i2,$Tbl
544         $PTR_ADD $i3,$Tbl
545         lbu     $t8,2($i0)              # Te4[s0>>24]
546         _xtr    $i0,$s3,0-2
547         lbu     $t9,2($i1)              # Te4[s1>>24]
548         _xtr    $i1,$s0,0-2
549         lbu     $t10,2($i2)             # Te4[s2>>24]
550         _xtr    $i2,$s1,0-2
551         lbu     $t11,2($i3)             # Te4[s3>>24]
552         _xtr    $i3,$s2,0-2
553
554         and     $i0,0x3fc
555         and     $i1,0x3fc
556         and     $i2,0x3fc
557         and     $i3,0x3fc
558         $PTR_ADD $i0,$Tbl
559         $PTR_ADD $i1,$Tbl
560         $PTR_ADD $i2,$Tbl
561         $PTR_ADD $i3,$Tbl
562
563         _ins    $t0,16
564         _ins    $t1,16
565         _ins    $t2,16
566         _ins    $t3,16
567
568         _ins    $t4,8
569         _ins    $t5,8
570         _ins    $t6,8
571         _ins    $t7,8
572
573         xor     $t0,$t4
574         lbu     $t4,2($i0)              # Te4[s3]
575         xor     $t1,$t5
576         lbu     $t5,2($i1)              # Te4[s0]
577         xor     $t2,$t6
578         lbu     $t6,2($i2)              # Te4[s1]
579         xor     $t3,$t7
580         lbu     $t7,2($i3)              # Te4[s2]
581
582         _ins    $t8,24
583         lw      $s0,0($key0)
584         _ins    $t9,24
585         lw      $s1,4($key0)
586         _ins    $t10,24
587         lw      $s2,8($key0)
588         _ins    $t11,24
589         lw      $s3,12($key0)
590
591         xor     $t0,$t8
592         xor     $t1,$t9
593         xor     $t2,$t10
594         xor     $t3,$t11
595
596         _ins    $t4,0
597         _ins    $t5,0
598         _ins    $t6,0
599         _ins    $t7,0
600
601         xor     $t0,$t4
602         xor     $t1,$t5
603         xor     $t2,$t6
604         xor     $t3,$t7
605 #endif
606         xor     $s0,$t0
607         xor     $s1,$t1
608         xor     $s2,$t2
609         xor     $s3,$t3
610
611         jr      $ra
612 .end    _mips_AES_encrypt
613
614 .align  5
615 .globl  AES_encrypt
616 .ent    AES_encrypt
617 AES_encrypt:
618         .frame  $sp,$FRAMESIZE,$ra
619         .mask   $SAVED_REGS_MASK,-$SZREG
620         .set    noreorder
621 ___
622 $code.=<<___ if ($flavour =~ /o32/i);   # o32 PIC-ification
623         .cpload $pf
624 ___
625 $code.=<<___;
626         $PTR_SUB $sp,$FRAMESIZE
627         $REG_S  $ra,$FRAMESIZE-1*$SZREG($sp)
628         $REG_S  $fp,$FRAMESIZE-2*$SZREG($sp)
629         $REG_S  $s11,$FRAMESIZE-3*$SZREG($sp)
630         $REG_S  $s10,$FRAMESIZE-4*$SZREG($sp)
631         $REG_S  $s9,$FRAMESIZE-5*$SZREG($sp)
632         $REG_S  $s8,$FRAMESIZE-6*$SZREG($sp)
633         $REG_S  $s7,$FRAMESIZE-7*$SZREG($sp)
634         $REG_S  $s6,$FRAMESIZE-8*$SZREG($sp)
635         $REG_S  $s5,$FRAMESIZE-9*$SZREG($sp)
636         $REG_S  $s4,$FRAMESIZE-10*$SZREG($sp)
637 ___
638 $code.=<<___ if ($flavour =~ /nubi/i);  # optimize non-nubi prologue
639         $REG_S  \$15,$FRAMESIZE-11*$SZREG($sp)
640         $REG_S  \$14,$FRAMESIZE-12*$SZREG($sp)
641         $REG_S  \$13,$FRAMESIZE-13*$SZREG($sp)
642         $REG_S  \$12,$FRAMESIZE-14*$SZREG($sp)
643         $REG_S  $gp,$FRAMESIZE-15*$SZREG($sp)
644 ___
645 $code.=<<___ if ($flavour !~ /o32/i);   # non-o32 PIC-ification
646         .cplocal        $Tbl
647         .cpsetup        $pf,$zero,AES_encrypt
648 ___
649 $code.=<<___;
650         .set    reorder
651         $PTR_LA $Tbl,AES_Te             # PIC-ified 'load address'
652
653         lwl     $s0,0+$MSB($inp)
654         lwl     $s1,4+$MSB($inp)
655         lwl     $s2,8+$MSB($inp)
656         lwl     $s3,12+$MSB($inp)
657         lwr     $s0,0+$LSB($inp)
658         lwr     $s1,4+$LSB($inp)
659         lwr     $s2,8+$LSB($inp)
660         lwr     $s3,12+$LSB($inp)
661
662         bal     _mips_AES_encrypt
663
664         swr     $s0,0+$LSB($out)
665         swr     $s1,4+$LSB($out)
666         swr     $s2,8+$LSB($out)
667         swr     $s3,12+$LSB($out)
668         swl     $s0,0+$MSB($out)
669         swl     $s1,4+$MSB($out)
670         swl     $s2,8+$MSB($out)
671         swl     $s3,12+$MSB($out)
672
673         .set    noreorder
674         $REG_L  $ra,$FRAMESIZE-1*$SZREG($sp)
675         $REG_L  $fp,$FRAMESIZE-2*$SZREG($sp)
676         $REG_L  $s11,$FRAMESIZE-3*$SZREG($sp)
677         $REG_L  $s10,$FRAMESIZE-4*$SZREG($sp)
678         $REG_L  $s9,$FRAMESIZE-5*$SZREG($sp)
679         $REG_L  $s8,$FRAMESIZE-6*$SZREG($sp)
680         $REG_L  $s7,$FRAMESIZE-7*$SZREG($sp)
681         $REG_L  $s6,$FRAMESIZE-8*$SZREG($sp)
682         $REG_L  $s5,$FRAMESIZE-9*$SZREG($sp)
683         $REG_L  $s4,$FRAMESIZE-10*$SZREG($sp)
684 ___
685 $code.=<<___ if ($flavour =~ /nubi/i);
686         $REG_L  \$15,$FRAMESIZE-11*$SZREG($sp)
687         $REG_L  \$14,$FRAMESIZE-12*$SZREG($sp)
688         $REG_L  \$13,$FRAMESIZE-13*$SZREG($sp)
689         $REG_L  \$12,$FRAMESIZE-14*$SZREG($sp)
690         $REG_L  $gp,$FRAMESIZE-15*$SZREG($sp)
691 ___
692 $code.=<<___;
693         jr      $ra
694         $PTR_ADD $sp,$FRAMESIZE
695 .end    AES_encrypt
696 ___
697 \f
698 $code.=<<___;
699 .align  5
700 .ent    _mips_AES_decrypt
701 _mips_AES_decrypt:
702         .frame  $sp,0,$ra
703         .set    reorder
704         lw      $t0,0($key)
705         lw      $t1,4($key)
706         lw      $t2,8($key)
707         lw      $t3,12($key)
708         lw      $cnt,240($key)
709         $PTR_ADD $key0,$key,16
710
711         xor     $s0,$t0
712         xor     $s1,$t1
713         xor     $s2,$t2
714         xor     $s3,$t3
715
716         sub     $cnt,1
717 #if defined(__mips_smartmips)
718         ext     $i0,$s3,16,8
719 .Loop_dec:
720         ext     $i1,$s0,16,8
721         ext     $i2,$s1,16,8
722         ext     $i3,$s2,16,8
723         lwxs    $t0,$i0($Tbl)           # Td1[s3>>16]
724         ext     $i0,$s2,8,8
725         lwxs    $t1,$i1($Tbl)           # Td1[s0>>16]
726         ext     $i1,$s3,8,8
727         lwxs    $t2,$i2($Tbl)           # Td1[s1>>16]
728         ext     $i2,$s0,8,8
729         lwxs    $t3,$i3($Tbl)           # Td1[s2>>16]
730         ext     $i3,$s1,8,8
731
732         lwxs    $t4,$i0($Tbl)           # Td2[s2>>8]
733         ext     $i0,$s1,0,8
734         lwxs    $t5,$i1($Tbl)           # Td2[s3>>8]
735         ext     $i1,$s2,0,8
736         lwxs    $t6,$i2($Tbl)           # Td2[s0>>8]
737         ext     $i2,$s3,0,8
738         lwxs    $t7,$i3($Tbl)           # Td2[s1>>8]
739         ext     $i3,$s0,0,8
740
741         lwxs    $t8,$i0($Tbl)           # Td3[s1]
742         ext     $i0,$s0,24,8
743         lwxs    $t9,$i1($Tbl)           # Td3[s2]
744         ext     $i1,$s1,24,8
745         lwxs    $t10,$i2($Tbl)          # Td3[s3]
746         ext     $i2,$s2,24,8
747         lwxs    $t11,$i3($Tbl)          # Td3[s0]
748         ext     $i3,$s3,24,8
749
750         rotr    $t0,$t0,8
751         rotr    $t1,$t1,8
752         rotr    $t2,$t2,8
753         rotr    $t3,$t3,8
754
755         rotr    $t4,$t4,16
756         rotr    $t5,$t5,16
757         rotr    $t6,$t6,16
758         rotr    $t7,$t7,16
759
760         xor     $t0,$t4
761         lwxs    $t4,$i0($Tbl)           # Td0[s0>>24]
762         xor     $t1,$t5
763         lwxs    $t5,$i1($Tbl)           # Td0[s1>>24]
764         xor     $t2,$t6
765         lwxs    $t6,$i2($Tbl)           # Td0[s2>>24]
766         xor     $t3,$t7
767         lwxs    $t7,$i3($Tbl)           # Td0[s3>>24]
768
769         rotr    $t8,$t8,24
770         lw      $s0,0($key0)
771         rotr    $t9,$t9,24
772         lw      $s1,4($key0)
773         rotr    $t10,$t10,24
774         lw      $s2,8($key0)
775         rotr    $t11,$t11,24
776         lw      $s3,12($key0)
777
778         xor     $t0,$t8
779         xor     $t1,$t9
780         xor     $t2,$t10
781         xor     $t3,$t11
782
783         xor     $t0,$t4
784         xor     $t1,$t5
785         xor     $t2,$t6
786         xor     $t3,$t7
787
788         sub     $cnt,1
789         $PTR_ADD $key0,16
790         xor     $s0,$t0
791         xor     $s1,$t1
792         xor     $s2,$t2
793         xor     $s3,$t3
794         .set    noreorder
795         bnez    $cnt,.Loop_dec
796         ext     $i0,$s3,16,8
797
798         _xtr    $i0,$s3,16-2
799 #else
800         _xtr    $i0,$s3,16-2
801 .Loop_dec:
802         _xtr    $i1,$s0,16-2
803         _xtr    $i2,$s1,16-2
804         _xtr    $i3,$s2,16-2
805         and     $i0,0x3fc
806         and     $i1,0x3fc
807         and     $i2,0x3fc
808         and     $i3,0x3fc
809         $PTR_ADD $i0,$Tbl
810         $PTR_ADD $i1,$Tbl
811         $PTR_ADD $i2,$Tbl
812         $PTR_ADD $i3,$Tbl
813 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
814         lw      $t0,0($i0)              # Td1[s3>>16]
815         _xtr    $i0,$s2,8-2
816         lw      $t1,0($i1)              # Td1[s0>>16]
817         _xtr    $i1,$s3,8-2
818         lw      $t2,0($i2)              # Td1[s1>>16]
819         _xtr    $i2,$s0,8-2
820         lw      $t3,0($i3)              # Td1[s2>>16]
821         _xtr    $i3,$s1,8-2
822 #else
823         lwl     $t0,3($i0)              # Td1[s3>>16]
824         lwl     $t1,3($i1)              # Td1[s0>>16]
825         lwl     $t2,3($i2)              # Td1[s1>>16]
826         lwl     $t3,3($i3)              # Td1[s2>>16]
827         lwr     $t0,2($i0)              # Td1[s3>>16]
828         _xtr    $i0,$s2,8-2
829         lwr     $t1,2($i1)              # Td1[s0>>16]
830         _xtr    $i1,$s3,8-2
831         lwr     $t2,2($i2)              # Td1[s1>>16]
832         _xtr    $i2,$s0,8-2
833         lwr     $t3,2($i3)              # Td1[s2>>16]
834         _xtr    $i3,$s1,8-2
835 #endif
836
837         and     $i0,0x3fc
838         and     $i1,0x3fc
839         and     $i2,0x3fc
840         and     $i3,0x3fc
841         $PTR_ADD $i0,$Tbl
842         $PTR_ADD $i1,$Tbl
843         $PTR_ADD $i2,$Tbl
844         $PTR_ADD $i3,$Tbl
845 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
846         rotr    $t0,$t0,8
847         rotr    $t1,$t1,8
848         rotr    $t2,$t2,8
849         rotr    $t3,$t3,8
850 # if defined(_MIPSEL)
851         lw      $t4,0($i0)              # Td2[s2>>8]
852         _xtr    $i0,$s1,0-2
853         lw      $t5,0($i1)              # Td2[s3>>8]
854         _xtr    $i1,$s2,0-2
855         lw      $t6,0($i2)              # Td2[s0>>8]
856         _xtr    $i2,$s3,0-2
857         lw      $t7,0($i3)              # Td2[s1>>8]
858         _xtr    $i3,$s0,0-2
859
860         and     $i0,0x3fc
861         and     $i1,0x3fc
862         and     $i2,0x3fc
863         and     $i3,0x3fc
864         $PTR_ADD $i0,$Tbl
865         $PTR_ADD $i1,$Tbl
866         $PTR_ADD $i2,$Tbl
867         $PTR_ADD $i3,$Tbl
868         lw      $t8,0($i0)              # Td3[s1]
869         $PTR_INS $i0,$s0,2,8
870         lw      $t9,0($i1)              # Td3[s2]
871         $PTR_INS $i1,$s1,2,8
872         lw      $t10,0($i2)             # Td3[s3]
873         $PTR_INS $i2,$s2,2,8
874         lw      $t11,0($i3)             # Td3[s0]
875         $PTR_INS $i3,$s3,2,8
876 #else
877         lw      $t4,0($i0)              # Td2[s2>>8]
878         $PTR_INS $i0,$s1,2,8
879         lw      $t5,0($i1)              # Td2[s3>>8]
880         $PTR_INS $i1,$s2,2,8
881         lw      $t6,0($i2)              # Td2[s0>>8]
882         $PTR_INS $i2,$s3,2,8
883         lw      $t7,0($i3)              # Td2[s1>>8]
884         $PTR_INS $i3,$s0,2,8
885
886         lw      $t8,0($i0)              # Td3[s1]
887         _xtr    $i0,$s0,24-2
888         lw      $t9,0($i1)              # Td3[s2]
889         _xtr    $i1,$s1,24-2
890         lw      $t10,0($i2)             # Td3[s3]
891         _xtr    $i2,$s2,24-2
892         lw      $t11,0($i3)             # Td3[s0]
893         _xtr    $i3,$s3,24-2
894
895         and     $i0,0x3fc
896         and     $i1,0x3fc
897         and     $i2,0x3fc
898         and     $i3,0x3fc
899         $PTR_ADD $i0,$Tbl
900         $PTR_ADD $i1,$Tbl
901         $PTR_ADD $i2,$Tbl
902         $PTR_ADD $i3,$Tbl
903 #endif
904         rotr    $t4,$t4,16
905         rotr    $t5,$t5,16
906         rotr    $t6,$t6,16
907         rotr    $t7,$t7,16
908
909         rotr    $t8,$t8,24
910         rotr    $t9,$t9,24
911         rotr    $t10,$t10,24
912         rotr    $t11,$t11,24
913 #else
914         lwl     $t4,2($i0)              # Td2[s2>>8]
915         lwl     $t5,2($i1)              # Td2[s3>>8]
916         lwl     $t6,2($i2)              # Td2[s0>>8]
917         lwl     $t7,2($i3)              # Td2[s1>>8]
918         lwr     $t4,1($i0)              # Td2[s2>>8]
919         _xtr    $i0,$s1,0-2
920         lwr     $t5,1($i1)              # Td2[s3>>8]
921         _xtr    $i1,$s2,0-2
922         lwr     $t6,1($i2)              # Td2[s0>>8]
923         _xtr    $i2,$s3,0-2
924         lwr     $t7,1($i3)              # Td2[s1>>8]
925         _xtr    $i3,$s0,0-2
926
927         and     $i0,0x3fc
928         and     $i1,0x3fc
929         and     $i2,0x3fc
930         and     $i3,0x3fc
931         $PTR_ADD $i0,$Tbl
932         $PTR_ADD $i1,$Tbl
933         $PTR_ADD $i2,$Tbl
934         $PTR_ADD $i3,$Tbl
935         lwl     $t8,1($i0)              # Td3[s1]
936         lwl     $t9,1($i1)              # Td3[s2]
937         lwl     $t10,1($i2)             # Td3[s3]
938         lwl     $t11,1($i3)             # Td3[s0]
939         lwr     $t8,0($i0)              # Td3[s1]
940         _xtr    $i0,$s0,24-2
941         lwr     $t9,0($i1)              # Td3[s2]
942         _xtr    $i1,$s1,24-2
943         lwr     $t10,0($i2)             # Td3[s3]
944         _xtr    $i2,$s2,24-2
945         lwr     $t11,0($i3)             # Td3[s0]
946         _xtr    $i3,$s3,24-2
947
948         and     $i0,0x3fc
949         and     $i1,0x3fc
950         and     $i2,0x3fc
951         and     $i3,0x3fc
952         $PTR_ADD $i0,$Tbl
953         $PTR_ADD $i1,$Tbl
954         $PTR_ADD $i2,$Tbl
955         $PTR_ADD $i3,$Tbl
956 #endif
957
958         xor     $t0,$t4
959         lw      $t4,0($i0)              # Td0[s0>>24]
960         xor     $t1,$t5
961         lw      $t5,0($i1)              # Td0[s1>>24]
962         xor     $t2,$t6
963         lw      $t6,0($i2)              # Td0[s2>>24]
964         xor     $t3,$t7
965         lw      $t7,0($i3)              # Td0[s3>>24]
966
967         xor     $t0,$t8
968         lw      $s0,0($key0)
969         xor     $t1,$t9
970         lw      $s1,4($key0)
971         xor     $t2,$t10
972         lw      $s2,8($key0)
973         xor     $t3,$t11
974         lw      $s3,12($key0)
975
976         xor     $t0,$t4
977         xor     $t1,$t5
978         xor     $t2,$t6
979         xor     $t3,$t7
980
981         sub     $cnt,1
982         $PTR_ADD $key0,16
983         xor     $s0,$t0
984         xor     $s1,$t1
985         xor     $s2,$t2
986         xor     $s3,$t3
987         .set    noreorder
988         bnez    $cnt,.Loop_dec
989         _xtr    $i0,$s3,16-2
990 #endif
991
992         .set    reorder
993         lw      $t4,1024($Tbl)          # prefetch Td4
994         _xtr    $i0,$s3,16
995         lw      $t5,1024+32($Tbl)
996         _xtr    $i1,$s0,16
997         lw      $t6,1024+64($Tbl)
998         _xtr    $i2,$s1,16
999         lw      $t7,1024+96($Tbl)
1000         _xtr    $i3,$s2,16
1001         lw      $t8,1024+128($Tbl)
1002         and     $i0,0xff
1003         lw      $t9,1024+160($Tbl)
1004         and     $i1,0xff
1005         lw      $t10,1024+192($Tbl)
1006         and     $i2,0xff
1007         lw      $t11,1024+224($Tbl)
1008         and     $i3,0xff
1009
1010         $PTR_ADD $i0,$Tbl
1011         $PTR_ADD $i1,$Tbl
1012         $PTR_ADD $i2,$Tbl
1013         $PTR_ADD $i3,$Tbl
1014         lbu     $t0,1024($i0)           # Td4[s3>>16]
1015         _xtr    $i0,$s2,8
1016         lbu     $t1,1024($i1)           # Td4[s0>>16]
1017         _xtr    $i1,$s3,8
1018         lbu     $t2,1024($i2)           # Td4[s1>>16]
1019         _xtr    $i2,$s0,8
1020         lbu     $t3,1024($i3)           # Td4[s2>>16]
1021         _xtr    $i3,$s1,8
1022
1023         and     $i0,0xff
1024         and     $i1,0xff
1025         and     $i2,0xff
1026         and     $i3,0xff
1027         $PTR_ADD $i0,$Tbl
1028         $PTR_ADD $i1,$Tbl
1029         $PTR_ADD $i2,$Tbl
1030         $PTR_ADD $i3,$Tbl
1031 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1032 # if defined(_MIPSEL)
1033         lbu     $t4,1024($i0)           # Td4[s2>>8]
1034         $PTR_INS $i0,$s0,0,8
1035         lbu     $t5,1024($i1)           # Td4[s3>>8]
1036         $PTR_INS $i1,$s1,0,8
1037         lbu     $t6,1024($i2)           # Td4[s0>>8]
1038         $PTR_INS $i2,$s2,0,8
1039         lbu     $t7,1024($i3)           # Td4[s1>>8]
1040         $PTR_INS $i3,$s3,0,8
1041
1042         lbu     $t8,1024($i0)           # Td4[s0>>24]
1043         _xtr    $i0,$s1,0
1044         lbu     $t9,1024($i1)           # Td4[s1>>24]
1045         _xtr    $i1,$s2,0
1046         lbu     $t10,1024($i2)          # Td4[s2>>24]
1047         _xtr    $i2,$s3,0
1048         lbu     $t11,1024($i3)          # Td4[s3>>24]
1049         _xtr    $i3,$s0,0
1050
1051         $PTR_ADD $i0,$Tbl
1052         $PTR_ADD $i1,$Tbl
1053         $PTR_ADD $i2,$Tbl
1054         $PTR_ADD $i3,$Tbl
1055 # else
1056         lbu     $t4,1024($i0)           # Td4[s2>>8]
1057         _xtr    $i0,$s0,24
1058         lbu     $t5,1024($i1)           # Td4[s3>>8]
1059         _xtr    $i1,$s1,24
1060         lbu     $t6,1024($i2)           # Td4[s0>>8]
1061         _xtr    $i2,$s2,24
1062         lbu     $t7,1024($i3)           # Td4[s1>>8]
1063         _xtr    $i3,$s3,24
1064
1065         $PTR_ADD $i0,$Tbl
1066         $PTR_ADD $i1,$Tbl
1067         $PTR_ADD $i2,$Tbl
1068         $PTR_ADD $i3,$Tbl
1069         lbu     $t8,1024($i0)           # Td4[s0>>24]
1070         $PTR_INS $i0,$s1,0,8
1071         lbu     $t9,1024($i1)           # Td4[s1>>24]
1072         $PTR_INS $i1,$s2,0,8
1073         lbu     $t10,1024($i2)          # Td4[s2>>24]
1074         $PTR_INS $i2,$s3,0,8
1075         lbu     $t11,1024($i3)          # Td4[s3>>24]
1076         $PTR_INS $i3,$s0,0,8
1077 # endif
1078         _ins    $t0,16
1079         _ins    $t1,16
1080         _ins    $t2,16
1081         _ins    $t3,16
1082
1083         _ins2   $t0,$t4,8
1084         lbu     $t4,1024($i0)           # Td4[s1]
1085         _ins2   $t1,$t5,8
1086         lbu     $t5,1024($i1)           # Td4[s2]
1087         _ins2   $t2,$t6,8
1088         lbu     $t6,1024($i2)           # Td4[s3]
1089         _ins2   $t3,$t7,8
1090         lbu     $t7,1024($i3)           # Td4[s0]
1091
1092         _ins2   $t0,$t8,24
1093         lw      $s0,0($key0)
1094         _ins2   $t1,$t9,24
1095         lw      $s1,4($key0)
1096         _ins2   $t2,$t10,24
1097         lw      $s2,8($key0)
1098         _ins2   $t3,$t11,24
1099         lw      $s3,12($key0)
1100
1101         _ins2   $t0,$t4,0
1102         _ins2   $t1,$t5,0
1103         _ins2   $t2,$t6,0
1104         _ins2   $t3,$t7,0
1105 #else
1106         lbu     $t4,1024($i0)           # Td4[s2>>8]
1107         _xtr    $i0,$s0,24
1108         lbu     $t5,1024($i1)           # Td4[s3>>8]
1109         _xtr    $i1,$s1,24
1110         lbu     $t6,1024($i2)           # Td4[s0>>8]
1111         _xtr    $i2,$s2,24
1112         lbu     $t7,1024($i3)           # Td4[s1>>8]
1113         _xtr    $i3,$s3,24
1114
1115         $PTR_ADD $i0,$Tbl
1116         $PTR_ADD $i1,$Tbl
1117         $PTR_ADD $i2,$Tbl
1118         $PTR_ADD $i3,$Tbl
1119         lbu     $t8,1024($i0)           # Td4[s0>>24]
1120         _xtr    $i0,$s1,0
1121         lbu     $t9,1024($i1)           # Td4[s1>>24]
1122         _xtr    $i1,$s2,0
1123         lbu     $t10,1024($i2)          # Td4[s2>>24]
1124         _xtr    $i2,$s3,0
1125         lbu     $t11,1024($i3)          # Td4[s3>>24]
1126         _xtr    $i3,$s0,0
1127
1128         $PTR_ADD $i0,$Tbl
1129         $PTR_ADD $i1,$Tbl
1130         $PTR_ADD $i2,$Tbl
1131         $PTR_ADD $i3,$Tbl
1132
1133         _ins    $t0,16
1134         _ins    $t1,16
1135         _ins    $t2,16
1136         _ins    $t3,16
1137
1138         _ins    $t4,8
1139         _ins    $t5,8
1140         _ins    $t6,8
1141         _ins    $t7,8
1142
1143         xor     $t0,$t4
1144         lbu     $t4,1024($i0)           # Td4[s1]
1145         xor     $t1,$t5
1146         lbu     $t5,1024($i1)           # Td4[s2]
1147         xor     $t2,$t6
1148         lbu     $t6,1024($i2)           # Td4[s3]
1149         xor     $t3,$t7
1150         lbu     $t7,1024($i3)           # Td4[s0]
1151
1152         _ins    $t8,24
1153         lw      $s0,0($key0)
1154         _ins    $t9,24
1155         lw      $s1,4($key0)
1156         _ins    $t10,24
1157         lw      $s2,8($key0)
1158         _ins    $t11,24
1159         lw      $s3,12($key0)
1160
1161         xor     $t0,$t8
1162         xor     $t1,$t9
1163         xor     $t2,$t10
1164         xor     $t3,$t11
1165
1166         _ins    $t4,0
1167         _ins    $t5,0
1168         _ins    $t6,0
1169         _ins    $t7,0
1170
1171         xor     $t0,$t4
1172         xor     $t1,$t5
1173         xor     $t2,$t6
1174         xor     $t3,$t7
1175 #endif
1176
1177         xor     $s0,$t0
1178         xor     $s1,$t1
1179         xor     $s2,$t2
1180         xor     $s3,$t3
1181
1182         jr      $ra
1183 .end    _mips_AES_decrypt
1184
1185 .align  5
1186 .globl  AES_decrypt
1187 .ent    AES_decrypt
1188 AES_decrypt:
1189         .frame  $sp,$FRAMESIZE,$ra
1190         .mask   $SAVED_REGS_MASK,-$SZREG
1191         .set    noreorder
1192 ___
1193 $code.=<<___ if ($flavour =~ /o32/i);   # o32 PIC-ification
1194         .cpload $pf
1195 ___
1196 $code.=<<___;
1197         $PTR_SUB $sp,$FRAMESIZE
1198         $REG_S  $ra,$FRAMESIZE-1*$SZREG($sp)
1199         $REG_S  $fp,$FRAMESIZE-2*$SZREG($sp)
1200         $REG_S  $s11,$FRAMESIZE-3*$SZREG($sp)
1201         $REG_S  $s10,$FRAMESIZE-4*$SZREG($sp)
1202         $REG_S  $s9,$FRAMESIZE-5*$SZREG($sp)
1203         $REG_S  $s8,$FRAMESIZE-6*$SZREG($sp)
1204         $REG_S  $s7,$FRAMESIZE-7*$SZREG($sp)
1205         $REG_S  $s6,$FRAMESIZE-8*$SZREG($sp)
1206         $REG_S  $s5,$FRAMESIZE-9*$SZREG($sp)
1207         $REG_S  $s4,$FRAMESIZE-10*$SZREG($sp)
1208 ___
1209 $code.=<<___ if ($flavour =~ /nubi/i);  # optimize non-nubi prologue
1210         $REG_S  \$15,$FRAMESIZE-11*$SZREG($sp)
1211         $REG_S  \$14,$FRAMESIZE-12*$SZREG($sp)
1212         $REG_S  \$13,$FRAMESIZE-13*$SZREG($sp)
1213         $REG_S  \$12,$FRAMESIZE-14*$SZREG($sp)
1214         $REG_S  $gp,$FRAMESIZE-15*$SZREG($sp)
1215 ___
1216 $code.=<<___ if ($flavour !~ /o32/i);   # non-o32 PIC-ification
1217         .cplocal        $Tbl
1218         .cpsetup        $pf,$zero,AES_decrypt
1219 ___
1220 $code.=<<___;
1221         .set    reorder
1222         $PTR_LA $Tbl,AES_Td             # PIC-ified 'load address'
1223
1224         lwl     $s0,0+$MSB($inp)
1225         lwl     $s1,4+$MSB($inp)
1226         lwl     $s2,8+$MSB($inp)
1227         lwl     $s3,12+$MSB($inp)
1228         lwr     $s0,0+$LSB($inp)
1229         lwr     $s1,4+$LSB($inp)
1230         lwr     $s2,8+$LSB($inp)
1231         lwr     $s3,12+$LSB($inp)
1232
1233         bal     _mips_AES_decrypt
1234
1235         swr     $s0,0+$LSB($out)
1236         swr     $s1,4+$LSB($out)
1237         swr     $s2,8+$LSB($out)
1238         swr     $s3,12+$LSB($out)
1239         swl     $s0,0+$MSB($out)
1240         swl     $s1,4+$MSB($out)
1241         swl     $s2,8+$MSB($out)
1242         swl     $s3,12+$MSB($out)
1243
1244         .set    noreorder
1245         $REG_L  $ra,$FRAMESIZE-1*$SZREG($sp)
1246         $REG_L  $fp,$FRAMESIZE-2*$SZREG($sp)
1247         $REG_L  $s11,$FRAMESIZE-3*$SZREG($sp)
1248         $REG_L  $s10,$FRAMESIZE-4*$SZREG($sp)
1249         $REG_L  $s9,$FRAMESIZE-5*$SZREG($sp)
1250         $REG_L  $s8,$FRAMESIZE-6*$SZREG($sp)
1251         $REG_L  $s7,$FRAMESIZE-7*$SZREG($sp)
1252         $REG_L  $s6,$FRAMESIZE-8*$SZREG($sp)
1253         $REG_L  $s5,$FRAMESIZE-9*$SZREG($sp)
1254         $REG_L  $s4,$FRAMESIZE-10*$SZREG($sp)
1255 ___
1256 $code.=<<___ if ($flavour =~ /nubi/i);
1257         $REG_L  \$15,$FRAMESIZE-11*$SZREG($sp)
1258         $REG_L  \$14,$FRAMESIZE-12*$SZREG($sp)
1259         $REG_L  \$13,$FRAMESIZE-13*$SZREG($sp)
1260         $REG_L  \$12,$FRAMESIZE-14*$SZREG($sp)
1261         $REG_L  $gp,$FRAMESIZE-15*$SZREG($sp)
1262 ___
1263 $code.=<<___;
1264         jr      $ra
1265         $PTR_ADD $sp,$FRAMESIZE
1266 .end    AES_decrypt
1267 ___
1268 }}}
1269 \f
1270 {{{
1271 my $FRAMESIZE=8*$SZREG;
1272 my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc000f008" : "0xc0000000";
1273
1274 my ($inp,$bits,$key,$Tbl)=($a0,$a1,$a2,$a3);
1275 my ($rk0,$rk1,$rk2,$rk3,$rk4,$rk5,$rk6,$rk7)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
1276 my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
1277 my ($rcon,$cnt)=($gp,$fp);
1278
1279 $code.=<<___;
1280 .align  5
1281 .ent    _mips_AES_set_encrypt_key
1282 _mips_AES_set_encrypt_key:
1283         .frame  $sp,0,$ra
1284         .set    noreorder
1285         beqz    $inp,.Lekey_done
1286         li      $t0,-1
1287         beqz    $key,.Lekey_done
1288         $PTR_ADD $rcon,$Tbl,256
1289
1290         .set    reorder
1291         lwl     $rk0,0+$MSB($inp)       # load 128 bits
1292         lwl     $rk1,4+$MSB($inp)
1293         lwl     $rk2,8+$MSB($inp)
1294         lwl     $rk3,12+$MSB($inp)
1295         li      $at,128
1296         lwr     $rk0,0+$LSB($inp)
1297         lwr     $rk1,4+$LSB($inp)
1298         lwr     $rk2,8+$LSB($inp)
1299         lwr     $rk3,12+$LSB($inp)
1300         .set    noreorder
1301         beq     $bits,$at,.L128bits
1302         li      $cnt,10
1303
1304         .set    reorder
1305         lwl     $rk4,16+$MSB($inp)      # load 192 bits
1306         lwl     $rk5,20+$MSB($inp)
1307         li      $at,192
1308         lwr     $rk4,16+$LSB($inp)
1309         lwr     $rk5,20+$LSB($inp)
1310         .set    noreorder
1311         beq     $bits,$at,.L192bits
1312         li      $cnt,8
1313
1314         .set    reorder
1315         lwl     $rk6,24+$MSB($inp)      # load 256 bits
1316         lwl     $rk7,28+$MSB($inp)
1317         li      $at,256
1318         lwr     $rk6,24+$LSB($inp)
1319         lwr     $rk7,28+$LSB($inp)
1320         .set    noreorder
1321         beq     $bits,$at,.L256bits
1322         li      $cnt,7
1323
1324         b       .Lekey_done
1325         li      $t0,-2
1326
1327 .align  4
1328 .L128bits:
1329         .set    reorder
1330         srl     $i0,$rk3,16
1331         srl     $i1,$rk3,8
1332         and     $i0,0xff
1333         and     $i1,0xff
1334         and     $i2,$rk3,0xff
1335         srl     $i3,$rk3,24
1336         $PTR_ADD $i0,$Tbl
1337         $PTR_ADD $i1,$Tbl
1338         $PTR_ADD $i2,$Tbl
1339         $PTR_ADD $i3,$Tbl
1340         lbu     $i0,0($i0)
1341         lbu     $i1,0($i1)
1342         lbu     $i2,0($i2)
1343         lbu     $i3,0($i3)
1344
1345         sw      $rk0,0($key)
1346         sw      $rk1,4($key)
1347         sw      $rk2,8($key)
1348         sw      $rk3,12($key)
1349         sub     $cnt,1
1350         $PTR_ADD $key,16
1351
1352         _bias   $i0,24
1353         _bias   $i1,16
1354         _bias   $i2,8
1355         _bias   $i3,0
1356
1357         xor     $rk0,$i0
1358         lw      $i0,0($rcon)
1359         xor     $rk0,$i1
1360         xor     $rk0,$i2
1361         xor     $rk0,$i3
1362         xor     $rk0,$i0
1363
1364         xor     $rk1,$rk0
1365         xor     $rk2,$rk1
1366         xor     $rk3,$rk2
1367
1368         .set    noreorder
1369         bnez    $cnt,.L128bits
1370         $PTR_ADD $rcon,4
1371
1372         sw      $rk0,0($key)
1373         sw      $rk1,4($key)
1374         sw      $rk2,8($key)
1375         li      $cnt,10
1376         sw      $rk3,12($key)
1377         li      $t0,0
1378         sw      $cnt,80($key)
1379         b       .Lekey_done
1380         $PTR_SUB $key,10*16
1381
1382 .align  4
1383 .L192bits:
1384         .set    reorder
1385         srl     $i0,$rk5,16
1386         srl     $i1,$rk5,8
1387         and     $i0,0xff
1388         and     $i1,0xff
1389         and     $i2,$rk5,0xff
1390         srl     $i3,$rk5,24
1391         $PTR_ADD $i0,$Tbl
1392         $PTR_ADD $i1,$Tbl
1393         $PTR_ADD $i2,$Tbl
1394         $PTR_ADD $i3,$Tbl
1395         lbu     $i0,0($i0)
1396         lbu     $i1,0($i1)
1397         lbu     $i2,0($i2)
1398         lbu     $i3,0($i3)
1399
1400         sw      $rk0,0($key)
1401         sw      $rk1,4($key)
1402         sw      $rk2,8($key)
1403         sw      $rk3,12($key)
1404         sw      $rk4,16($key)
1405         sw      $rk5,20($key)
1406         sub     $cnt,1
1407         $PTR_ADD $key,24
1408
1409         _bias   $i0,24
1410         _bias   $i1,16
1411         _bias   $i2,8
1412         _bias   $i3,0
1413
1414         xor     $rk0,$i0
1415         lw      $i0,0($rcon)
1416         xor     $rk0,$i1
1417         xor     $rk0,$i2
1418         xor     $rk0,$i3
1419         xor     $rk0,$i0
1420
1421         xor     $rk1,$rk0
1422         xor     $rk2,$rk1
1423         xor     $rk3,$rk2
1424         xor     $rk4,$rk3
1425         xor     $rk5,$rk4
1426
1427         .set    noreorder
1428         bnez    $cnt,.L192bits
1429         $PTR_ADD $rcon,4
1430
1431         sw      $rk0,0($key)
1432         sw      $rk1,4($key)
1433         sw      $rk2,8($key)
1434         li      $cnt,12
1435         sw      $rk3,12($key)
1436         li      $t0,0
1437         sw      $cnt,48($key)
1438         b       .Lekey_done
1439         $PTR_SUB $key,12*16
1440
1441 .align  4
1442 .L256bits:
1443         .set    reorder
1444         srl     $i0,$rk7,16
1445         srl     $i1,$rk7,8
1446         and     $i0,0xff
1447         and     $i1,0xff
1448         and     $i2,$rk7,0xff
1449         srl     $i3,$rk7,24
1450         $PTR_ADD $i0,$Tbl
1451         $PTR_ADD $i1,$Tbl
1452         $PTR_ADD $i2,$Tbl
1453         $PTR_ADD $i3,$Tbl
1454         lbu     $i0,0($i0)
1455         lbu     $i1,0($i1)
1456         lbu     $i2,0($i2)
1457         lbu     $i3,0($i3)
1458
1459         sw      $rk0,0($key)
1460         sw      $rk1,4($key)
1461         sw      $rk2,8($key)
1462         sw      $rk3,12($key)
1463         sw      $rk4,16($key)
1464         sw      $rk5,20($key)
1465         sw      $rk6,24($key)
1466         sw      $rk7,28($key)
1467         sub     $cnt,1
1468
1469         _bias   $i0,24
1470         _bias   $i1,16
1471         _bias   $i2,8
1472         _bias   $i3,0
1473
1474         xor     $rk0,$i0
1475         lw      $i0,0($rcon)
1476         xor     $rk0,$i1
1477         xor     $rk0,$i2
1478         xor     $rk0,$i3
1479         xor     $rk0,$i0
1480
1481         xor     $rk1,$rk0
1482         xor     $rk2,$rk1
1483         xor     $rk3,$rk2
1484         beqz    $cnt,.L256bits_done
1485
1486         srl     $i0,$rk3,24
1487         srl     $i1,$rk3,16
1488         srl     $i2,$rk3,8
1489         and     $i3,$rk3,0xff
1490         and     $i1,0xff
1491         and     $i2,0xff
1492         $PTR_ADD $i0,$Tbl
1493         $PTR_ADD $i1,$Tbl
1494         $PTR_ADD $i2,$Tbl
1495         $PTR_ADD $i3,$Tbl
1496         lbu     $i0,0($i0)
1497         lbu     $i1,0($i1)
1498         lbu     $i2,0($i2)
1499         lbu     $i3,0($i3)
1500         sll     $i0,24
1501         sll     $i1,16
1502         sll     $i2,8
1503
1504         xor     $rk4,$i0
1505         xor     $rk4,$i1
1506         xor     $rk4,$i2
1507         xor     $rk4,$i3
1508
1509         xor     $rk5,$rk4
1510         xor     $rk6,$rk5
1511         xor     $rk7,$rk6
1512
1513         $PTR_ADD $key,32
1514         .set    noreorder
1515         b       .L256bits
1516         $PTR_ADD $rcon,4
1517
1518 .L256bits_done:
1519         sw      $rk0,32($key)
1520         sw      $rk1,36($key)
1521         sw      $rk2,40($key)
1522         li      $cnt,14
1523         sw      $rk3,44($key)
1524         li      $t0,0
1525         sw      $cnt,48($key)
1526         $PTR_SUB $key,12*16
1527
1528 .Lekey_done:
1529         jr      $ra
1530         nop
1531 .end    _mips_AES_set_encrypt_key
1532
1533 .globl  AES_set_encrypt_key
1534 .ent    AES_set_encrypt_key
1535 AES_set_encrypt_key:
1536         .frame  $sp,$FRAMESIZE,$ra
1537         .mask   $SAVED_REGS_MASK,-$SZREG
1538         .set    noreorder
1539 ___
1540 $code.=<<___ if ($flavour =~ /o32/i);   # o32 PIC-ification
1541         .cpload $pf
1542 ___
1543 $code.=<<___;
1544         $PTR_SUB $sp,$FRAMESIZE
1545         $REG_S  $ra,$FRAMESIZE-1*$SZREG($sp)
1546         $REG_S  $fp,$FRAMESIZE-2*$SZREG($sp)
1547 ___
1548 $code.=<<___ if ($flavour =~ /nubi/i);  # optimize non-nubi prologue
1549         $REG_S  $s3,$FRAMESIZE-3*$SZREG($sp)
1550         $REG_S  $s2,$FRAMESIZE-4*$SZREG($sp)
1551         $REG_S  $s1,$FRAMESIZE-5*$SZREG($sp)
1552         $REG_S  $s0,$FRAMESIZE-6*$SZREG($sp)
1553         $REG_S  $gp,$FRAMESIZE-7*$SZREG($sp)
1554 ___
1555 $code.=<<___ if ($flavour !~ /o32/i);   # non-o32 PIC-ification
1556         .cplocal        $Tbl
1557         .cpsetup        $pf,$zero,AES_set_encrypt_key
1558 ___
1559 $code.=<<___;
1560         .set    reorder
1561         $PTR_LA $Tbl,AES_Te4            # PIC-ified 'load address'
1562
1563         bal     _mips_AES_set_encrypt_key
1564
1565         .set    noreorder
1566         move    $a0,$t0
1567         $REG_L  $ra,$FRAMESIZE-1*$SZREG($sp)
1568         $REG_L  $fp,$FRAMESIZE-2*$SZREG($sp)
1569 ___
1570 $code.=<<___ if ($flavour =~ /nubi/i);
1571         $REG_L  $s3,$FRAMESIZE-11*$SZREG($sp)
1572         $REG_L  $s2,$FRAMESIZE-12*$SZREG($sp)
1573         $REG_L  $s1,$FRAMESIZE-13*$SZREG($sp)
1574         $REG_L  $s0,$FRAMESIZE-14*$SZREG($sp)
1575         $REG_L  $gp,$FRAMESIZE-15*$SZREG($sp)
1576 ___
1577 $code.=<<___;
1578         jr      $ra
1579         $PTR_ADD $sp,$FRAMESIZE
1580 .end    AES_set_encrypt_key
1581 ___
1582 \f
1583 my ($head,$tail)=($inp,$bits);
1584 my ($tp1,$tp2,$tp4,$tp8,$tp9,$tpb,$tpd,$tpe)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
1585 my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2);
1586 $code.=<<___;
1587 .align  5
1588 .globl  AES_set_decrypt_key
1589 .ent    AES_set_decrypt_key
1590 AES_set_decrypt_key:
1591         .frame  $sp,$FRAMESIZE,$ra
1592         .mask   $SAVED_REGS_MASK,-$SZREG
1593         .set    noreorder
1594 ___
1595 $code.=<<___ if ($flavour =~ /o32/i);   # o32 PIC-ification
1596         .cpload $pf
1597 ___
1598 $code.=<<___;
1599         $PTR_SUB $sp,$FRAMESIZE
1600         $REG_S  $ra,$FRAMESIZE-1*$SZREG($sp)
1601         $REG_S  $fp,$FRAMESIZE-2*$SZREG($sp)
1602 ___
1603 $code.=<<___ if ($flavour =~ /nubi/i);  # optimize non-nubi prologue
1604         $REG_S  $s3,$FRAMESIZE-3*$SZREG($sp)
1605         $REG_S  $s2,$FRAMESIZE-4*$SZREG($sp)
1606         $REG_S  $s1,$FRAMESIZE-5*$SZREG($sp)
1607         $REG_S  $s0,$FRAMESIZE-6*$SZREG($sp)
1608         $REG_S  $gp,$FRAMESIZE-7*$SZREG($sp)
1609 ___
1610 $code.=<<___ if ($flavour !~ /o32/i);   # non-o32 PIC-ification
1611         .cplocal        $Tbl
1612         .cpsetup        $pf,$zero,AES_set_decrypt_key
1613 ___
1614 $code.=<<___;
1615         .set    reorder
1616         $PTR_LA $Tbl,AES_Te4            # PIC-ified 'load address'
1617
1618         bal     _mips_AES_set_encrypt_key
1619
1620         bltz    $t0,.Ldkey_done
1621
1622         sll     $at,$cnt,4
1623         $PTR_ADD $head,$key,0
1624         $PTR_ADD $tail,$key,$at
1625 .align  4
1626 .Lswap:
1627         lw      $rk0,0($head)
1628         lw      $rk1,4($head)
1629         lw      $rk2,8($head)
1630         lw      $rk3,12($head)
1631         lw      $rk4,0($tail)
1632         lw      $rk5,4($tail)
1633         lw      $rk6,8($tail)
1634         lw      $rk7,12($tail)
1635         sw      $rk0,0($tail)
1636         sw      $rk1,4($tail)
1637         sw      $rk2,8($tail)
1638         sw      $rk3,12($tail)
1639         $PTR_ADD $head,16
1640         $PTR_SUB $tail,16
1641         sw      $rk4,-16($head)
1642         sw      $rk5,-12($head)
1643         sw      $rk6,-8($head)
1644         sw      $rk7,-4($head)
1645         bne     $head,$tail,.Lswap
1646
1647         lw      $tp1,16($key)           # modulo-scheduled
1648         lui     $x80808080,0x8080
1649         sub     $cnt,1
1650         or      $x80808080,0x8080
1651         sll     $cnt,2
1652         $PTR_ADD $key,16
1653         lui     $x1b1b1b1b,0x1b1b
1654         nor     $x7f7f7f7f,$zero,$x80808080
1655         or      $x1b1b1b1b,0x1b1b
1656 .align  4
1657 .Lmix:
1658         and     $m,$tp1,$x80808080
1659         and     $tp2,$tp1,$x7f7f7f7f
1660         srl     $tp4,$m,7
1661         addu    $tp2,$tp2               # tp2<<1
1662         subu    $m,$tp4
1663         and     $m,$x1b1b1b1b
1664         xor     $tp2,$m
1665
1666         and     $m,$tp2,$x80808080
1667         and     $tp4,$tp2,$x7f7f7f7f
1668         srl     $tp8,$m,7
1669         addu    $tp4,$tp4               # tp4<<1
1670         subu    $m,$tp8
1671         and     $m,$x1b1b1b1b
1672         xor     $tp4,$m
1673
1674         and     $m,$tp4,$x80808080
1675         and     $tp8,$tp4,$x7f7f7f7f
1676         srl     $tp9,$m,7
1677         addu    $tp8,$tp8               # tp8<<1
1678         subu    $m,$tp9
1679         and     $m,$x1b1b1b1b
1680         xor     $tp8,$m
1681
1682         xor     $tp9,$tp8,$tp1
1683         xor     $tpe,$tp8,$tp4
1684         xor     $tpb,$tp9,$tp2
1685         xor     $tpd,$tp9,$tp4
1686
1687 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1688         rotr    $tp1,$tpd,16
1689          xor    $tpe,$tp2
1690         rotr    $tp2,$tp9,8
1691         xor     $tpe,$tp1
1692         rotr    $tp4,$tpb,24
1693         xor     $tpe,$tp2
1694         lw      $tp1,4($key)            # modulo-scheduled
1695         xor     $tpe,$tp4
1696 #else
1697         _ror    $tp1,$tpd,16
1698          xor    $tpe,$tp2
1699         _ror    $tp2,$tpd,-16
1700         xor     $tpe,$tp1
1701         _ror    $tp1,$tp9,8
1702         xor     $tpe,$tp2
1703         _ror    $tp2,$tp9,-24
1704         xor     $tpe,$tp1
1705         _ror    $tp1,$tpb,24
1706         xor     $tpe,$tp2
1707         _ror    $tp2,$tpb,-8
1708         xor     $tpe,$tp1
1709         lw      $tp1,4($key)            # modulo-scheduled
1710         xor     $tpe,$tp2
1711 #endif
1712         sub     $cnt,1
1713         sw      $tpe,0($key)
1714         $PTR_ADD $key,4
1715         bnez    $cnt,.Lmix
1716
1717         li      $t0,0
1718 .Ldkey_done:
1719         .set    noreorder
1720         move    $a0,$t0
1721         $REG_L  $ra,$FRAMESIZE-1*$SZREG($sp)
1722         $REG_L  $fp,$FRAMESIZE-2*$SZREG($sp)
1723 ___
1724 $code.=<<___ if ($flavour =~ /nubi/i);
1725         $REG_L  $s3,$FRAMESIZE-11*$SZREG($sp)
1726         $REG_L  $s2,$FRAMESIZE-12*$SZREG($sp)
1727         $REG_L  $s1,$FRAMESIZE-13*$SZREG($sp)
1728         $REG_L  $s0,$FRAMESIZE-14*$SZREG($sp)
1729         $REG_L  $gp,$FRAMESIZE-15*$SZREG($sp)
1730 ___
1731 $code.=<<___;
1732         jr      $ra
1733         $PTR_ADD $sp,$FRAMESIZE
1734 .end    AES_set_decrypt_key
1735 ___
1736 }}}
1737
1738 ######################################################################
1739 # Tables are kept in endian-neutral manner
1740 $code.=<<___;
1741 .rdata
1742 .align  10
1743 AES_Te:
1744 .byte   0xc6,0x63,0x63,0xa5,    0xf8,0x7c,0x7c,0x84     # Te0
1745 .byte   0xee,0x77,0x77,0x99,    0xf6,0x7b,0x7b,0x8d
1746 .byte   0xff,0xf2,0xf2,0x0d,    0xd6,0x6b,0x6b,0xbd
1747 .byte   0xde,0x6f,0x6f,0xb1,    0x91,0xc5,0xc5,0x54
1748 .byte   0x60,0x30,0x30,0x50,    0x02,0x01,0x01,0x03
1749 .byte   0xce,0x67,0x67,0xa9,    0x56,0x2b,0x2b,0x7d
1750 .byte   0xe7,0xfe,0xfe,0x19,    0xb5,0xd7,0xd7,0x62
1751 .byte   0x4d,0xab,0xab,0xe6,    0xec,0x76,0x76,0x9a
1752 .byte   0x8f,0xca,0xca,0x45,    0x1f,0x82,0x82,0x9d
1753 .byte   0x89,0xc9,0xc9,0x40,    0xfa,0x7d,0x7d,0x87
1754 .byte   0xef,0xfa,0xfa,0x15,    0xb2,0x59,0x59,0xeb
1755 .byte   0x8e,0x47,0x47,0xc9,    0xfb,0xf0,0xf0,0x0b
1756 .byte   0x41,0xad,0xad,0xec,    0xb3,0xd4,0xd4,0x67
1757 .byte   0x5f,0xa2,0xa2,0xfd,    0x45,0xaf,0xaf,0xea
1758 .byte   0x23,0x9c,0x9c,0xbf,    0x53,0xa4,0xa4,0xf7
1759 .byte   0xe4,0x72,0x72,0x96,    0x9b,0xc0,0xc0,0x5b
1760 .byte   0x75,0xb7,0xb7,0xc2,    0xe1,0xfd,0xfd,0x1c
1761 .byte   0x3d,0x93,0x93,0xae,    0x4c,0x26,0x26,0x6a
1762 .byte   0x6c,0x36,0x36,0x5a,    0x7e,0x3f,0x3f,0x41
1763 .byte   0xf5,0xf7,0xf7,0x02,    0x83,0xcc,0xcc,0x4f
1764 .byte   0x68,0x34,0x34,0x5c,    0x51,0xa5,0xa5,0xf4
1765 .byte   0xd1,0xe5,0xe5,0x34,    0xf9,0xf1,0xf1,0x08
1766 .byte   0xe2,0x71,0x71,0x93,    0xab,0xd8,0xd8,0x73
1767 .byte   0x62,0x31,0x31,0x53,    0x2a,0x15,0x15,0x3f
1768 .byte   0x08,0x04,0x04,0x0c,    0x95,0xc7,0xc7,0x52
1769 .byte   0x46,0x23,0x23,0x65,    0x9d,0xc3,0xc3,0x5e
1770 .byte   0x30,0x18,0x18,0x28,    0x37,0x96,0x96,0xa1
1771 .byte   0x0a,0x05,0x05,0x0f,    0x2f,0x9a,0x9a,0xb5
1772 .byte   0x0e,0x07,0x07,0x09,    0x24,0x12,0x12,0x36
1773 .byte   0x1b,0x80,0x80,0x9b,    0xdf,0xe2,0xe2,0x3d
1774 .byte   0xcd,0xeb,0xeb,0x26,    0x4e,0x27,0x27,0x69
1775 .byte   0x7f,0xb2,0xb2,0xcd,    0xea,0x75,0x75,0x9f
1776 .byte   0x12,0x09,0x09,0x1b,    0x1d,0x83,0x83,0x9e
1777 .byte   0x58,0x2c,0x2c,0x74,    0x34,0x1a,0x1a,0x2e
1778 .byte   0x36,0x1b,0x1b,0x2d,    0xdc,0x6e,0x6e,0xb2
1779 .byte   0xb4,0x5a,0x5a,0xee,    0x5b,0xa0,0xa0,0xfb
1780 .byte   0xa4,0x52,0x52,0xf6,    0x76,0x3b,0x3b,0x4d
1781 .byte   0xb7,0xd6,0xd6,0x61,    0x7d,0xb3,0xb3,0xce
1782 .byte   0x52,0x29,0x29,0x7b,    0xdd,0xe3,0xe3,0x3e
1783 .byte   0x5e,0x2f,0x2f,0x71,    0x13,0x84,0x84,0x97
1784 .byte   0xa6,0x53,0x53,0xf5,    0xb9,0xd1,0xd1,0x68
1785 .byte   0x00,0x00,0x00,0x00,    0xc1,0xed,0xed,0x2c
1786 .byte   0x40,0x20,0x20,0x60,    0xe3,0xfc,0xfc,0x1f
1787 .byte   0x79,0xb1,0xb1,0xc8,    0xb6,0x5b,0x5b,0xed
1788 .byte   0xd4,0x6a,0x6a,0xbe,    0x8d,0xcb,0xcb,0x46
1789 .byte   0x67,0xbe,0xbe,0xd9,    0x72,0x39,0x39,0x4b
1790 .byte   0x94,0x4a,0x4a,0xde,    0x98,0x4c,0x4c,0xd4
1791 .byte   0xb0,0x58,0x58,0xe8,    0x85,0xcf,0xcf,0x4a
1792 .byte   0xbb,0xd0,0xd0,0x6b,    0xc5,0xef,0xef,0x2a
1793 .byte   0x4f,0xaa,0xaa,0xe5,    0xed,0xfb,0xfb,0x16
1794 .byte   0x86,0x43,0x43,0xc5,    0x9a,0x4d,0x4d,0xd7
1795 .byte   0x66,0x33,0x33,0x55,    0x11,0x85,0x85,0x94
1796 .byte   0x8a,0x45,0x45,0xcf,    0xe9,0xf9,0xf9,0x10
1797 .byte   0x04,0x02,0x02,0x06,    0xfe,0x7f,0x7f,0x81
1798 .byte   0xa0,0x50,0x50,0xf0,    0x78,0x3c,0x3c,0x44
1799 .byte   0x25,0x9f,0x9f,0xba,    0x4b,0xa8,0xa8,0xe3
1800 .byte   0xa2,0x51,0x51,0xf3,    0x5d,0xa3,0xa3,0xfe
1801 .byte   0x80,0x40,0x40,0xc0,    0x05,0x8f,0x8f,0x8a
1802 .byte   0x3f,0x92,0x92,0xad,    0x21,0x9d,0x9d,0xbc
1803 .byte   0x70,0x38,0x38,0x48,    0xf1,0xf5,0xf5,0x04
1804 .byte   0x63,0xbc,0xbc,0xdf,    0x77,0xb6,0xb6,0xc1
1805 .byte   0xaf,0xda,0xda,0x75,    0x42,0x21,0x21,0x63
1806 .byte   0x20,0x10,0x10,0x30,    0xe5,0xff,0xff,0x1a
1807 .byte   0xfd,0xf3,0xf3,0x0e,    0xbf,0xd2,0xd2,0x6d
1808 .byte   0x81,0xcd,0xcd,0x4c,    0x18,0x0c,0x0c,0x14
1809 .byte   0x26,0x13,0x13,0x35,    0xc3,0xec,0xec,0x2f
1810 .byte   0xbe,0x5f,0x5f,0xe1,    0x35,0x97,0x97,0xa2
1811 .byte   0x88,0x44,0x44,0xcc,    0x2e,0x17,0x17,0x39
1812 .byte   0x93,0xc4,0xc4,0x57,    0x55,0xa7,0xa7,0xf2
1813 .byte   0xfc,0x7e,0x7e,0x82,    0x7a,0x3d,0x3d,0x47
1814 .byte   0xc8,0x64,0x64,0xac,    0xba,0x5d,0x5d,0xe7
1815 .byte   0x32,0x19,0x19,0x2b,    0xe6,0x73,0x73,0x95
1816 .byte   0xc0,0x60,0x60,0xa0,    0x19,0x81,0x81,0x98
1817 .byte   0x9e,0x4f,0x4f,0xd1,    0xa3,0xdc,0xdc,0x7f
1818 .byte   0x44,0x22,0x22,0x66,    0x54,0x2a,0x2a,0x7e
1819 .byte   0x3b,0x90,0x90,0xab,    0x0b,0x88,0x88,0x83
1820 .byte   0x8c,0x46,0x46,0xca,    0xc7,0xee,0xee,0x29
1821 .byte   0x6b,0xb8,0xb8,0xd3,    0x28,0x14,0x14,0x3c
1822 .byte   0xa7,0xde,0xde,0x79,    0xbc,0x5e,0x5e,0xe2
1823 .byte   0x16,0x0b,0x0b,0x1d,    0xad,0xdb,0xdb,0x76
1824 .byte   0xdb,0xe0,0xe0,0x3b,    0x64,0x32,0x32,0x56
1825 .byte   0x74,0x3a,0x3a,0x4e,    0x14,0x0a,0x0a,0x1e
1826 .byte   0x92,0x49,0x49,0xdb,    0x0c,0x06,0x06,0x0a
1827 .byte   0x48,0x24,0x24,0x6c,    0xb8,0x5c,0x5c,0xe4
1828 .byte   0x9f,0xc2,0xc2,0x5d,    0xbd,0xd3,0xd3,0x6e
1829 .byte   0x43,0xac,0xac,0xef,    0xc4,0x62,0x62,0xa6
1830 .byte   0x39,0x91,0x91,0xa8,    0x31,0x95,0x95,0xa4
1831 .byte   0xd3,0xe4,0xe4,0x37,    0xf2,0x79,0x79,0x8b
1832 .byte   0xd5,0xe7,0xe7,0x32,    0x8b,0xc8,0xc8,0x43
1833 .byte   0x6e,0x37,0x37,0x59,    0xda,0x6d,0x6d,0xb7
1834 .byte   0x01,0x8d,0x8d,0x8c,    0xb1,0xd5,0xd5,0x64
1835 .byte   0x9c,0x4e,0x4e,0xd2,    0x49,0xa9,0xa9,0xe0
1836 .byte   0xd8,0x6c,0x6c,0xb4,    0xac,0x56,0x56,0xfa
1837 .byte   0xf3,0xf4,0xf4,0x07,    0xcf,0xea,0xea,0x25
1838 .byte   0xca,0x65,0x65,0xaf,    0xf4,0x7a,0x7a,0x8e
1839 .byte   0x47,0xae,0xae,0xe9,    0x10,0x08,0x08,0x18
1840 .byte   0x6f,0xba,0xba,0xd5,    0xf0,0x78,0x78,0x88
1841 .byte   0x4a,0x25,0x25,0x6f,    0x5c,0x2e,0x2e,0x72
1842 .byte   0x38,0x1c,0x1c,0x24,    0x57,0xa6,0xa6,0xf1
1843 .byte   0x73,0xb4,0xb4,0xc7,    0x97,0xc6,0xc6,0x51
1844 .byte   0xcb,0xe8,0xe8,0x23,    0xa1,0xdd,0xdd,0x7c
1845 .byte   0xe8,0x74,0x74,0x9c,    0x3e,0x1f,0x1f,0x21
1846 .byte   0x96,0x4b,0x4b,0xdd,    0x61,0xbd,0xbd,0xdc
1847 .byte   0x0d,0x8b,0x8b,0x86,    0x0f,0x8a,0x8a,0x85
1848 .byte   0xe0,0x70,0x70,0x90,    0x7c,0x3e,0x3e,0x42
1849 .byte   0x71,0xb5,0xb5,0xc4,    0xcc,0x66,0x66,0xaa
1850 .byte   0x90,0x48,0x48,0xd8,    0x06,0x03,0x03,0x05
1851 .byte   0xf7,0xf6,0xf6,0x01,    0x1c,0x0e,0x0e,0x12
1852 .byte   0xc2,0x61,0x61,0xa3,    0x6a,0x35,0x35,0x5f
1853 .byte   0xae,0x57,0x57,0xf9,    0x69,0xb9,0xb9,0xd0
1854 .byte   0x17,0x86,0x86,0x91,    0x99,0xc1,0xc1,0x58
1855 .byte   0x3a,0x1d,0x1d,0x27,    0x27,0x9e,0x9e,0xb9
1856 .byte   0xd9,0xe1,0xe1,0x38,    0xeb,0xf8,0xf8,0x13
1857 .byte   0x2b,0x98,0x98,0xb3,    0x22,0x11,0x11,0x33
1858 .byte   0xd2,0x69,0x69,0xbb,    0xa9,0xd9,0xd9,0x70
1859 .byte   0x07,0x8e,0x8e,0x89,    0x33,0x94,0x94,0xa7
1860 .byte   0x2d,0x9b,0x9b,0xb6,    0x3c,0x1e,0x1e,0x22
1861 .byte   0x15,0x87,0x87,0x92,    0xc9,0xe9,0xe9,0x20
1862 .byte   0x87,0xce,0xce,0x49,    0xaa,0x55,0x55,0xff
1863 .byte   0x50,0x28,0x28,0x78,    0xa5,0xdf,0xdf,0x7a
1864 .byte   0x03,0x8c,0x8c,0x8f,    0x59,0xa1,0xa1,0xf8
1865 .byte   0x09,0x89,0x89,0x80,    0x1a,0x0d,0x0d,0x17
1866 .byte   0x65,0xbf,0xbf,0xda,    0xd7,0xe6,0xe6,0x31
1867 .byte   0x84,0x42,0x42,0xc6,    0xd0,0x68,0x68,0xb8
1868 .byte   0x82,0x41,0x41,0xc3,    0x29,0x99,0x99,0xb0
1869 .byte   0x5a,0x2d,0x2d,0x77,    0x1e,0x0f,0x0f,0x11
1870 .byte   0x7b,0xb0,0xb0,0xcb,    0xa8,0x54,0x54,0xfc
1871 .byte   0x6d,0xbb,0xbb,0xd6,    0x2c,0x16,0x16,0x3a
1872
1873 AES_Td:
1874 .byte   0x51,0xf4,0xa7,0x50,    0x7e,0x41,0x65,0x53     # Td0
1875 .byte   0x1a,0x17,0xa4,0xc3,    0x3a,0x27,0x5e,0x96
1876 .byte   0x3b,0xab,0x6b,0xcb,    0x1f,0x9d,0x45,0xf1
1877 .byte   0xac,0xfa,0x58,0xab,    0x4b,0xe3,0x03,0x93
1878 .byte   0x20,0x30,0xfa,0x55,    0xad,0x76,0x6d,0xf6
1879 .byte   0x88,0xcc,0x76,0x91,    0xf5,0x02,0x4c,0x25
1880 .byte   0x4f,0xe5,0xd7,0xfc,    0xc5,0x2a,0xcb,0xd7
1881 .byte   0x26,0x35,0x44,0x80,    0xb5,0x62,0xa3,0x8f
1882 .byte   0xde,0xb1,0x5a,0x49,    0x25,0xba,0x1b,0x67
1883 .byte   0x45,0xea,0x0e,0x98,    0x5d,0xfe,0xc0,0xe1
1884 .byte   0xc3,0x2f,0x75,0x02,    0x81,0x4c,0xf0,0x12
1885 .byte   0x8d,0x46,0x97,0xa3,    0x6b,0xd3,0xf9,0xc6
1886 .byte   0x03,0x8f,0x5f,0xe7,    0x15,0x92,0x9c,0x95
1887 .byte   0xbf,0x6d,0x7a,0xeb,    0x95,0x52,0x59,0xda
1888 .byte   0xd4,0xbe,0x83,0x2d,    0x58,0x74,0x21,0xd3
1889 .byte   0x49,0xe0,0x69,0x29,    0x8e,0xc9,0xc8,0x44
1890 .byte   0x75,0xc2,0x89,0x6a,    0xf4,0x8e,0x79,0x78
1891 .byte   0x99,0x58,0x3e,0x6b,    0x27,0xb9,0x71,0xdd
1892 .byte   0xbe,0xe1,0x4f,0xb6,    0xf0,0x88,0xad,0x17
1893 .byte   0xc9,0x20,0xac,0x66,    0x7d,0xce,0x3a,0xb4
1894 .byte   0x63,0xdf,0x4a,0x18,    0xe5,0x1a,0x31,0x82
1895 .byte   0x97,0x51,0x33,0x60,    0x62,0x53,0x7f,0x45
1896 .byte   0xb1,0x64,0x77,0xe0,    0xbb,0x6b,0xae,0x84
1897 .byte   0xfe,0x81,0xa0,0x1c,    0xf9,0x08,0x2b,0x94
1898 .byte   0x70,0x48,0x68,0x58,    0x8f,0x45,0xfd,0x19
1899 .byte   0x94,0xde,0x6c,0x87,    0x52,0x7b,0xf8,0xb7
1900 .byte   0xab,0x73,0xd3,0x23,    0x72,0x4b,0x02,0xe2
1901 .byte   0xe3,0x1f,0x8f,0x57,    0x66,0x55,0xab,0x2a
1902 .byte   0xb2,0xeb,0x28,0x07,    0x2f,0xb5,0xc2,0x03
1903 .byte   0x86,0xc5,0x7b,0x9a,    0xd3,0x37,0x08,0xa5
1904 .byte   0x30,0x28,0x87,0xf2,    0x23,0xbf,0xa5,0xb2
1905 .byte   0x02,0x03,0x6a,0xba,    0xed,0x16,0x82,0x5c
1906 .byte   0x8a,0xcf,0x1c,0x2b,    0xa7,0x79,0xb4,0x92
1907 .byte   0xf3,0x07,0xf2,0xf0,    0x4e,0x69,0xe2,0xa1
1908 .byte   0x65,0xda,0xf4,0xcd,    0x06,0x05,0xbe,0xd5
1909 .byte   0xd1,0x34,0x62,0x1f,    0xc4,0xa6,0xfe,0x8a
1910 .byte   0x34,0x2e,0x53,0x9d,    0xa2,0xf3,0x55,0xa0
1911 .byte   0x05,0x8a,0xe1,0x32,    0xa4,0xf6,0xeb,0x75
1912 .byte   0x0b,0x83,0xec,0x39,    0x40,0x60,0xef,0xaa
1913 .byte   0x5e,0x71,0x9f,0x06,    0xbd,0x6e,0x10,0x51
1914 .byte   0x3e,0x21,0x8a,0xf9,    0x96,0xdd,0x06,0x3d
1915 .byte   0xdd,0x3e,0x05,0xae,    0x4d,0xe6,0xbd,0x46
1916 .byte   0x91,0x54,0x8d,0xb5,    0x71,0xc4,0x5d,0x05
1917 .byte   0x04,0x06,0xd4,0x6f,    0x60,0x50,0x15,0xff
1918 .byte   0x19,0x98,0xfb,0x24,    0xd6,0xbd,0xe9,0x97
1919 .byte   0x89,0x40,0x43,0xcc,    0x67,0xd9,0x9e,0x77
1920 .byte   0xb0,0xe8,0x42,0xbd,    0x07,0x89,0x8b,0x88
1921 .byte   0xe7,0x19,0x5b,0x38,    0x79,0xc8,0xee,0xdb
1922 .byte   0xa1,0x7c,0x0a,0x47,    0x7c,0x42,0x0f,0xe9
1923 .byte   0xf8,0x84,0x1e,0xc9,    0x00,0x00,0x00,0x00
1924 .byte   0x09,0x80,0x86,0x83,    0x32,0x2b,0xed,0x48
1925 .byte   0x1e,0x11,0x70,0xac,    0x6c,0x5a,0x72,0x4e
1926 .byte   0xfd,0x0e,0xff,0xfb,    0x0f,0x85,0x38,0x56
1927 .byte   0x3d,0xae,0xd5,0x1e,    0x36,0x2d,0x39,0x27
1928 .byte   0x0a,0x0f,0xd9,0x64,    0x68,0x5c,0xa6,0x21
1929 .byte   0x9b,0x5b,0x54,0xd1,    0x24,0x36,0x2e,0x3a
1930 .byte   0x0c,0x0a,0x67,0xb1,    0x93,0x57,0xe7,0x0f
1931 .byte   0xb4,0xee,0x96,0xd2,    0x1b,0x9b,0x91,0x9e
1932 .byte   0x80,0xc0,0xc5,0x4f,    0x61,0xdc,0x20,0xa2
1933 .byte   0x5a,0x77,0x4b,0x69,    0x1c,0x12,0x1a,0x16
1934 .byte   0xe2,0x93,0xba,0x0a,    0xc0,0xa0,0x2a,0xe5
1935 .byte   0x3c,0x22,0xe0,0x43,    0x12,0x1b,0x17,0x1d
1936 .byte   0x0e,0x09,0x0d,0x0b,    0xf2,0x8b,0xc7,0xad
1937 .byte   0x2d,0xb6,0xa8,0xb9,    0x14,0x1e,0xa9,0xc8
1938 .byte   0x57,0xf1,0x19,0x85,    0xaf,0x75,0x07,0x4c
1939 .byte   0xee,0x99,0xdd,0xbb,    0xa3,0x7f,0x60,0xfd
1940 .byte   0xf7,0x01,0x26,0x9f,    0x5c,0x72,0xf5,0xbc
1941 .byte   0x44,0x66,0x3b,0xc5,    0x5b,0xfb,0x7e,0x34
1942 .byte   0x8b,0x43,0x29,0x76,    0xcb,0x23,0xc6,0xdc
1943 .byte   0xb6,0xed,0xfc,0x68,    0xb8,0xe4,0xf1,0x63
1944 .byte   0xd7,0x31,0xdc,0xca,    0x42,0x63,0x85,0x10
1945 .byte   0x13,0x97,0x22,0x40,    0x84,0xc6,0x11,0x20
1946 .byte   0x85,0x4a,0x24,0x7d,    0xd2,0xbb,0x3d,0xf8
1947 .byte   0xae,0xf9,0x32,0x11,    0xc7,0x29,0xa1,0x6d
1948 .byte   0x1d,0x9e,0x2f,0x4b,    0xdc,0xb2,0x30,0xf3
1949 .byte   0x0d,0x86,0x52,0xec,    0x77,0xc1,0xe3,0xd0
1950 .byte   0x2b,0xb3,0x16,0x6c,    0xa9,0x70,0xb9,0x99
1951 .byte   0x11,0x94,0x48,0xfa,    0x47,0xe9,0x64,0x22
1952 .byte   0xa8,0xfc,0x8c,0xc4,    0xa0,0xf0,0x3f,0x1a
1953 .byte   0x56,0x7d,0x2c,0xd8,    0x22,0x33,0x90,0xef
1954 .byte   0x87,0x49,0x4e,0xc7,    0xd9,0x38,0xd1,0xc1
1955 .byte   0x8c,0xca,0xa2,0xfe,    0x98,0xd4,0x0b,0x36
1956 .byte   0xa6,0xf5,0x81,0xcf,    0xa5,0x7a,0xde,0x28
1957 .byte   0xda,0xb7,0x8e,0x26,    0x3f,0xad,0xbf,0xa4
1958 .byte   0x2c,0x3a,0x9d,0xe4,    0x50,0x78,0x92,0x0d
1959 .byte   0x6a,0x5f,0xcc,0x9b,    0x54,0x7e,0x46,0x62
1960 .byte   0xf6,0x8d,0x13,0xc2,    0x90,0xd8,0xb8,0xe8
1961 .byte   0x2e,0x39,0xf7,0x5e,    0x82,0xc3,0xaf,0xf5
1962 .byte   0x9f,0x5d,0x80,0xbe,    0x69,0xd0,0x93,0x7c
1963 .byte   0x6f,0xd5,0x2d,0xa9,    0xcf,0x25,0x12,0xb3
1964 .byte   0xc8,0xac,0x99,0x3b,    0x10,0x18,0x7d,0xa7
1965 .byte   0xe8,0x9c,0x63,0x6e,    0xdb,0x3b,0xbb,0x7b
1966 .byte   0xcd,0x26,0x78,0x09,    0x6e,0x59,0x18,0xf4
1967 .byte   0xec,0x9a,0xb7,0x01,    0x83,0x4f,0x9a,0xa8
1968 .byte   0xe6,0x95,0x6e,0x65,    0xaa,0xff,0xe6,0x7e
1969 .byte   0x21,0xbc,0xcf,0x08,    0xef,0x15,0xe8,0xe6
1970 .byte   0xba,0xe7,0x9b,0xd9,    0x4a,0x6f,0x36,0xce
1971 .byte   0xea,0x9f,0x09,0xd4,    0x29,0xb0,0x7c,0xd6
1972 .byte   0x31,0xa4,0xb2,0xaf,    0x2a,0x3f,0x23,0x31
1973 .byte   0xc6,0xa5,0x94,0x30,    0x35,0xa2,0x66,0xc0
1974 .byte   0x74,0x4e,0xbc,0x37,    0xfc,0x82,0xca,0xa6
1975 .byte   0xe0,0x90,0xd0,0xb0,    0x33,0xa7,0xd8,0x15
1976 .byte   0xf1,0x04,0x98,0x4a,    0x41,0xec,0xda,0xf7
1977 .byte   0x7f,0xcd,0x50,0x0e,    0x17,0x91,0xf6,0x2f
1978 .byte   0x76,0x4d,0xd6,0x8d,    0x43,0xef,0xb0,0x4d
1979 .byte   0xcc,0xaa,0x4d,0x54,    0xe4,0x96,0x04,0xdf
1980 .byte   0x9e,0xd1,0xb5,0xe3,    0x4c,0x6a,0x88,0x1b
1981 .byte   0xc1,0x2c,0x1f,0xb8,    0x46,0x65,0x51,0x7f
1982 .byte   0x9d,0x5e,0xea,0x04,    0x01,0x8c,0x35,0x5d
1983 .byte   0xfa,0x87,0x74,0x73,    0xfb,0x0b,0x41,0x2e
1984 .byte   0xb3,0x67,0x1d,0x5a,    0x92,0xdb,0xd2,0x52
1985 .byte   0xe9,0x10,0x56,0x33,    0x6d,0xd6,0x47,0x13
1986 .byte   0x9a,0xd7,0x61,0x8c,    0x37,0xa1,0x0c,0x7a
1987 .byte   0x59,0xf8,0x14,0x8e,    0xeb,0x13,0x3c,0x89
1988 .byte   0xce,0xa9,0x27,0xee,    0xb7,0x61,0xc9,0x35
1989 .byte   0xe1,0x1c,0xe5,0xed,    0x7a,0x47,0xb1,0x3c
1990 .byte   0x9c,0xd2,0xdf,0x59,    0x55,0xf2,0x73,0x3f
1991 .byte   0x18,0x14,0xce,0x79,    0x73,0xc7,0x37,0xbf
1992 .byte   0x53,0xf7,0xcd,0xea,    0x5f,0xfd,0xaa,0x5b
1993 .byte   0xdf,0x3d,0x6f,0x14,    0x78,0x44,0xdb,0x86
1994 .byte   0xca,0xaf,0xf3,0x81,    0xb9,0x68,0xc4,0x3e
1995 .byte   0x38,0x24,0x34,0x2c,    0xc2,0xa3,0x40,0x5f
1996 .byte   0x16,0x1d,0xc3,0x72,    0xbc,0xe2,0x25,0x0c
1997 .byte   0x28,0x3c,0x49,0x8b,    0xff,0x0d,0x95,0x41
1998 .byte   0x39,0xa8,0x01,0x71,    0x08,0x0c,0xb3,0xde
1999 .byte   0xd8,0xb4,0xe4,0x9c,    0x64,0x56,0xc1,0x90
2000 .byte   0x7b,0xcb,0x84,0x61,    0xd5,0x32,0xb6,0x70
2001 .byte   0x48,0x6c,0x5c,0x74,    0xd0,0xb8,0x57,0x42
2002
2003 .byte   0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38  # Td4
2004 .byte   0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
2005 .byte   0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
2006 .byte   0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
2007 .byte   0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
2008 .byte   0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
2009 .byte   0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
2010 .byte   0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
2011 .byte   0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
2012 .byte   0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
2013 .byte   0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
2014 .byte   0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
2015 .byte   0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
2016 .byte   0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
2017 .byte   0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
2018 .byte   0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
2019 .byte   0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
2020 .byte   0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
2021 .byte   0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
2022 .byte   0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
2023 .byte   0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
2024 .byte   0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
2025 .byte   0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
2026 .byte   0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
2027 .byte   0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
2028 .byte   0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
2029 .byte   0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
2030 .byte   0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
2031 .byte   0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
2032 .byte   0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
2033 .byte   0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
2034 .byte   0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
2035
2036 AES_Te4:
2037 .byte   0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5  # Te4
2038 .byte   0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
2039 .byte   0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
2040 .byte   0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
2041 .byte   0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
2042 .byte   0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
2043 .byte   0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
2044 .byte   0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
2045 .byte   0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
2046 .byte   0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
2047 .byte   0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
2048 .byte   0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
2049 .byte   0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
2050 .byte   0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
2051 .byte   0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
2052 .byte   0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
2053 .byte   0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
2054 .byte   0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
2055 .byte   0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
2056 .byte   0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
2057 .byte   0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
2058 .byte   0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
2059 .byte   0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
2060 .byte   0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
2061 .byte   0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
2062 .byte   0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
2063 .byte   0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
2064 .byte   0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
2065 .byte   0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
2066 .byte   0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
2067 .byte   0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
2068 .byte   0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
2069
2070 .byte   0x01,0x00,0x00,0x00,    0x02,0x00,0x00,0x00     # rcon
2071 .byte   0x04,0x00,0x00,0x00,    0x08,0x00,0x00,0x00
2072 .byte   0x10,0x00,0x00,0x00,    0x20,0x00,0x00,0x00
2073 .byte   0x40,0x00,0x00,0x00,    0x80,0x00,0x00,0x00
2074 .byte   0x1B,0x00,0x00,0x00,    0x36,0x00,0x00,0x00
2075 ___
2076 \f
2077 foreach (split("\n",$code)) {
2078         s/\`([^\`]*)\`/eval $1/ge;
2079
2080         # made-up _instructions, _xtr, _ins, _ror and _bias, cope
2081         # with byte order dependencies...
2082         if (/^\s+_/) {
2083             s/(_[a-z]+\s+)(\$[0-9]+),([^,]+)(#.*)*$/$1$2,$2,$3/;
2084
2085             s/_xtr\s+(\$[0-9]+),(\$[0-9]+),([0-9]+(\-2)*)/
2086                 sprintf("srl\t$1,$2,%d",$big_endian ?   eval($3)
2087                                         :               eval("24-$3"))/e or
2088             s/_ins\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
2089                 sprintf("sll\t$1,$2,%d",$big_endian ?   eval($3)
2090                                         :               eval("24-$3"))/e or
2091             s/_ins2\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
2092                 sprintf("ins\t$1,$2,%d,8",$big_endian ? eval($3)
2093                                         :               eval("24-$3"))/e or
2094             s/_ror\s+(\$[0-9]+),(\$[0-9]+),(\-?[0-9]+)/
2095                 sprintf("srl\t$1,$2,%d",$big_endian ?   eval($3)
2096                                         :               eval("$3*-1"))/e or
2097             s/_bias\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
2098                 sprintf("sll\t$1,$2,%d",$big_endian ?   eval($3)
2099                                         :               eval("($3-16)&31"))/e;
2100
2101             s/srl\s+(\$[0-9]+),(\$[0-9]+),\-([0-9]+)/
2102                 sprintf("sll\t$1,$2,$3")/e                              or
2103             s/srl\s+(\$[0-9]+),(\$[0-9]+),0/
2104                 sprintf("and\t$1,$2,0xff")/e                            or
2105             s/(sll\s+\$[0-9]+,\$[0-9]+,0)/#$1/;
2106         }
2107
2108         # convert lwl/lwr and swr/swl to little-endian order
2109         if (!$big_endian && /^\s+[sl]w[lr]\s+/) {
2110             s/([sl]wl.*)([0-9]+)\((\$[0-9]+)\)/
2111                 sprintf("$1%d($3)",eval("$2-$2%4+($2%4-1)&3"))/e        or
2112             s/([sl]wr.*)([0-9]+)\((\$[0-9]+)\)/
2113                 sprintf("$1%d($3)",eval("$2-$2%4+($2%4+1)&3"))/e;
2114         }
2115
2116         if (!$big_endian) {
2117             s/(rotr\s+\$[0-9]+,\$[0-9]+),([0-9]+)/sprintf("$1,%d",32-$2)/e;
2118             s/(ext\s+\$[0-9]+,\$[0-9]+),([0-9]+),8/sprintf("$1,%d,8",24-$2)/e;
2119         }
2120
2121         print $_,"\n";
2122 }
2123
2124 close STDOUT;