1d8afe9682b38d42f589279e99bd23bb515e3262
[openssl.git] / crypto / aes / asm / aes-mips.pl
1 #!/usr/bin/env perl
2
3 # ====================================================================
4 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9
10 # AES for MIPS
11
12 # October 2010
13 #
14 # Code uses 1K[+256B] S-box and on single-issue core [such as R5000]
15 # spends ~68 cycles per byte processed with 128-bit key. This is ~16%
16 # faster than gcc-generated code, which is not very impressive. But
17 # recall that compressed S-box requires extra processing, namely
18 # additional rotations. Rotations are implemented with lwl/lwr pairs,
19 # which is normally used for loading unaligned data. Another cool
20 # thing about this module is its endian neutrality, which means that
21 # it processes data without ever changing byte order...
22
23 # September 2012
24 #
25 # Add MIPS32R2 (~10% less instructions) and SmartMIPS ASE (further
26 # ~25% less instructions) code. Note that there is no run-time switch,
27 # instead, code path is chosen upon pre-process time, pass -mips32r2
28 # or/and -msmartmips.
29
30 ######################################################################
31 # There is a number of MIPS ABI in use, O32 and N32/64 are most
32 # widely used. Then there is a new contender: NUBI. It appears that if
33 # one picks the latter, it's possible to arrange code in ABI neutral
34 # manner. Therefore let's stick to NUBI register layout:
35 #
36 ($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
37 ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
38 ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
39 ($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
40 #
41 # The return value is placed in $a0. Following coding rules facilitate
42 # interoperability:
43 #
44 # - never ever touch $tp, "thread pointer", former $gp;
45 # - copy return value to $t0, former $v0 [or to $a0 if you're adapting
46 #   old code];
47 # - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
48 #
49 # For reference here is register layout for N32/64 MIPS ABIs:
50 #
51 # ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
52 # ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
53 # ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
54 # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
55 # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
56 #
57 $flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
58
59 if ($flavour =~ /64|n32/i) {
60         $PTR_ADD="dadd";        # incidentally works even on n32
61         $PTR_SUB="dsub";        # incidentally works even on n32
62         $PTR_INS="dins";
63         $REG_S="sd";
64         $REG_L="ld";
65         $PTR_SLL="dsll";        # incidentally works even on n32
66         $SZREG=8;
67 } else {
68         $PTR_ADD="add";
69         $PTR_SUB="sub";
70         $PTR_INS="ins";
71         $REG_S="sw";
72         $REG_L="lw";
73         $PTR_SLL="sll";
74         $SZREG=4;
75 }
76 $pf = ($flavour =~ /nubi/i) ? $t0 : $t2;
77 #
78 # <appro@openssl.org>
79 #
80 ######################################################################
81
82 $big_endian=(`echo MIPSEL | $ENV{CC} -E -P -`=~/MIPSEL/)?1:0;
83
84 for (@ARGV) {   $output=$_ if (/^\w[\w\-]*\.\w+$/);     }
85 open STDOUT,">$output";
86
87 if (!defined($big_endian))
88 {    $big_endian=(unpack('L',pack('N',1))==1);   }
89
90 while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
91 open STDOUT,">$output";
92
93 my ($MSB,$LSB)=(0,3);   # automatically converted to little-endian
94
95 $code.=<<___;
96 .text
97 #ifdef OPENSSL_FIPSCANISTER
98 # include <openssl/fipssyms.h>
99 #endif
100
101 #if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2)
102 #define _MIPS_ARCH_MIPS32R2
103 #endif
104
105 #if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__))
106 .option pic2
107 #endif
108 .set    noat
109 ___
110 \f
111 {{{
112 my $FRAMESIZE=16*$SZREG;
113 my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000;
114
115 my ($inp,$out,$key,$Tbl,$s0,$s1,$s2,$s3)=($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7);
116 my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
117 my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$t9,$t10,$t11) = map("\$$_",(12..23));
118 my ($key0,$cnt)=($gp,$fp);
119
120 # instuction ordering is "stolen" from output from MIPSpro assembler
121 # invoked with -mips3 -O3 arguments...
122 $code.=<<___;
123 .align  5
124 .ent    _mips_AES_encrypt
125 _mips_AES_encrypt:
126         .frame  $sp,0,$ra
127         .set    reorder
128         lw      $t0,0($key)
129         lw      $t1,4($key)
130         lw      $t2,8($key)
131         lw      $t3,12($key)
132         lw      $cnt,240($key)
133         $PTR_ADD $key0,$key,16
134
135         xor     $s0,$t0
136         xor     $s1,$t1
137         xor     $s2,$t2
138         xor     $s3,$t3
139
140         sub     $cnt,1
141 #if defined(__mips_smartmips)
142         ext     $i0,$s1,16,8
143 .Loop_enc:
144         ext     $i1,$s2,16,8
145         ext     $i2,$s3,16,8
146         ext     $i3,$s0,16,8
147         lwxs    $t0,$i0($Tbl)           # Te1[s1>>16]
148         ext     $i0,$s2,8,8
149         lwxs    $t1,$i1($Tbl)           # Te1[s2>>16]
150         ext     $i1,$s3,8,8
151         lwxs    $t2,$i2($Tbl)           # Te1[s3>>16]
152         ext     $i2,$s0,8,8
153         lwxs    $t3,$i3($Tbl)           # Te1[s0>>16]
154         ext     $i3,$s1,8,8
155
156         lwxs    $t4,$i0($Tbl)           # Te2[s2>>8]
157         ext     $i0,$s3,0,8
158         lwxs    $t5,$i1($Tbl)           # Te2[s3>>8]
159         ext     $i1,$s0,0,8
160         lwxs    $t6,$i2($Tbl)           # Te2[s0>>8]
161         ext     $i2,$s1,0,8
162         lwxs    $t7,$i3($Tbl)           # Te2[s1>>8]
163         ext     $i3,$s2,0,8
164
165         lwxs    $t8,$i0($Tbl)           # Te3[s3]
166         ext     $i0,$s0,24,8
167         lwxs    $t9,$i1($Tbl)           # Te3[s0]
168         ext     $i1,$s1,24,8
169         lwxs    $t10,$i2($Tbl)          # Te3[s1]
170         ext     $i2,$s2,24,8
171         lwxs    $t11,$i3($Tbl)          # Te3[s2]
172         ext     $i3,$s3,24,8
173
174         rotr    $t0,$t0,8
175         rotr    $t1,$t1,8
176         rotr    $t2,$t2,8
177         rotr    $t3,$t3,8
178
179         rotr    $t4,$t4,16
180         rotr    $t5,$t5,16
181         rotr    $t6,$t6,16
182         rotr    $t7,$t7,16
183
184         xor     $t0,$t4
185         lwxs    $t4,$i0($Tbl)           # Te0[s0>>24]
186         xor     $t1,$t5
187         lwxs    $t5,$i1($Tbl)           # Te0[s1>>24]
188         xor     $t2,$t6
189         lwxs    $t6,$i2($Tbl)           # Te0[s2>>24]
190         xor     $t3,$t7
191         lwxs    $t7,$i3($Tbl)           # Te0[s3>>24]
192
193         rotr    $t8,$t8,24
194         lw      $s0,0($key0)
195         rotr    $t9,$t9,24
196         lw      $s1,4($key0)
197         rotr    $t10,$t10,24
198         lw      $s2,8($key0)
199         rotr    $t11,$t11,24
200         lw      $s3,12($key0)
201
202         xor     $t0,$t8
203         xor     $t1,$t9
204         xor     $t2,$t10
205         xor     $t3,$t11
206
207         xor     $t0,$t4
208         xor     $t1,$t5
209         xor     $t2,$t6
210         xor     $t3,$t7
211
212         sub     $cnt,1
213         $PTR_ADD $key0,16
214         xor     $s0,$t0
215         xor     $s1,$t1
216         xor     $s2,$t2
217         xor     $s3,$t3
218         .set    noreorder
219         bnez    $cnt,.Loop_enc
220         ext     $i0,$s1,16,8
221
222         _xtr    $i0,$s1,16-2
223 #else
224         _xtr    $i0,$s1,16-2
225 .Loop_enc:
226         _xtr    $i1,$s2,16-2
227         _xtr    $i2,$s3,16-2
228         _xtr    $i3,$s0,16-2
229         and     $i0,0x3fc
230         and     $i1,0x3fc
231         and     $i2,0x3fc
232         and     $i3,0x3fc
233         $PTR_ADD $i0,$Tbl
234         $PTR_ADD $i1,$Tbl
235         $PTR_ADD $i2,$Tbl
236         $PTR_ADD $i3,$Tbl
237 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
238         lw      $t0,0($i0)              # Te1[s1>>16]
239         _xtr    $i0,$s2,8-2
240         lw      $t1,0($i1)              # Te1[s2>>16]
241         _xtr    $i1,$s3,8-2
242         lw      $t2,0($i2)              # Te1[s3>>16]
243         _xtr    $i2,$s0,8-2
244         lw      $t3,0($i3)              # Te1[s0>>16]
245         _xtr    $i3,$s1,8-2
246 #else
247         lwl     $t0,3($i0)              # Te1[s1>>16]
248         lwl     $t1,3($i1)              # Te1[s2>>16]
249         lwl     $t2,3($i2)              # Te1[s3>>16]
250         lwl     $t3,3($i3)              # Te1[s0>>16]
251         lwr     $t0,2($i0)              # Te1[s1>>16]
252         _xtr    $i0,$s2,8-2
253         lwr     $t1,2($i1)              # Te1[s2>>16]
254         _xtr    $i1,$s3,8-2
255         lwr     $t2,2($i2)              # Te1[s3>>16]
256         _xtr    $i2,$s0,8-2
257         lwr     $t3,2($i3)              # Te1[s0>>16]
258         _xtr    $i3,$s1,8-2
259 #endif
260         and     $i0,0x3fc
261         and     $i1,0x3fc
262         and     $i2,0x3fc
263         and     $i3,0x3fc
264         $PTR_ADD $i0,$Tbl
265         $PTR_ADD $i1,$Tbl
266         $PTR_ADD $i2,$Tbl
267         $PTR_ADD $i3,$Tbl
268 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
269         rotr    $t0,$t0,8
270         rotr    $t1,$t1,8
271         rotr    $t2,$t2,8
272         rotr    $t3,$t3,8
273 # if defined(_MIPSEL)
274         lw      $t4,0($i0)              # Te2[s2>>8]
275         _xtr    $i0,$s3,0-2
276         lw      $t5,0($i1)              # Te2[s3>>8]
277         _xtr    $i1,$s0,0-2
278         lw      $t6,0($i2)              # Te2[s0>>8]
279         _xtr    $i2,$s1,0-2
280         lw      $t7,0($i3)              # Te2[s1>>8]
281         _xtr    $i3,$s2,0-2
282
283         and     $i0,0x3fc
284         and     $i1,0x3fc
285         and     $i2,0x3fc
286         and     $i3,0x3fc
287         $PTR_ADD $i0,$Tbl
288         $PTR_ADD $i1,$Tbl
289         $PTR_ADD $i2,$Tbl
290         $PTR_ADD $i3,$Tbl
291         lw      $t8,0($i0)              # Te3[s3]
292         $PTR_INS $i0,$s0,2,8
293         lw      $t9,0($i1)              # Te3[s0]
294         $PTR_INS $i1,$s1,2,8
295         lw      $t10,0($i2)             # Te3[s1]
296         $PTR_INS $i2,$s2,2,8
297         lw      $t11,0($i3)             # Te3[s2]
298         $PTR_INS $i3,$s3,2,8
299 # else
300         lw      $t4,0($i0)              # Te2[s2>>8]
301         $PTR_INS $i0,$s3,2,8
302         lw      $t5,0($i1)              # Te2[s3>>8]
303         $PTR_INS $i1,$s0,2,8
304         lw      $t6,0($i2)              # Te2[s0>>8]
305         $PTR_INS $i2,$s1,2,8
306         lw      $t7,0($i3)              # Te2[s1>>8]
307         $PTR_INS $i3,$s2,2,8
308
309         lw      $t8,0($i0)              # Te3[s3]
310         _xtr    $i0,$s0,24-2
311         lw      $t9,0($i1)              # Te3[s0]
312         _xtr    $i1,$s1,24-2
313         lw      $t10,0($i2)             # Te3[s1]
314         _xtr    $i2,$s2,24-2
315         lw      $t11,0($i3)             # Te3[s2]
316         _xtr    $i3,$s3,24-2
317
318         and     $i0,0x3fc
319         and     $i1,0x3fc
320         and     $i2,0x3fc
321         and     $i3,0x3fc
322         $PTR_ADD $i0,$Tbl
323         $PTR_ADD $i1,$Tbl
324         $PTR_ADD $i2,$Tbl
325         $PTR_ADD $i3,$Tbl
326 # endif
327         rotr    $t4,$t4,16
328         rotr    $t5,$t5,16
329         rotr    $t6,$t6,16
330         rotr    $t7,$t7,16
331
332         rotr    $t8,$t8,24
333         rotr    $t9,$t9,24
334         rotr    $t10,$t10,24
335         rotr    $t11,$t11,24
336 #else
337         lwl     $t4,2($i0)              # Te2[s2>>8]
338         lwl     $t5,2($i1)              # Te2[s3>>8]
339         lwl     $t6,2($i2)              # Te2[s0>>8]
340         lwl     $t7,2($i3)              # Te2[s1>>8]
341         lwr     $t4,1($i0)              # Te2[s2>>8]
342         _xtr    $i0,$s3,0-2
343         lwr     $t5,1($i1)              # Te2[s3>>8]
344         _xtr    $i1,$s0,0-2
345         lwr     $t6,1($i2)              # Te2[s0>>8]
346         _xtr    $i2,$s1,0-2
347         lwr     $t7,1($i3)              # Te2[s1>>8]
348         _xtr    $i3,$s2,0-2
349
350         and     $i0,0x3fc
351         and     $i1,0x3fc
352         and     $i2,0x3fc
353         and     $i3,0x3fc
354         $PTR_ADD $i0,$Tbl
355         $PTR_ADD $i1,$Tbl
356         $PTR_ADD $i2,$Tbl
357         $PTR_ADD $i3,$Tbl
358         lwl     $t8,1($i0)              # Te3[s3]
359         lwl     $t9,1($i1)              # Te3[s0]
360         lwl     $t10,1($i2)             # Te3[s1]
361         lwl     $t11,1($i3)             # Te3[s2]
362         lwr     $t8,0($i0)              # Te3[s3]
363         _xtr    $i0,$s0,24-2
364         lwr     $t9,0($i1)              # Te3[s0]
365         _xtr    $i1,$s1,24-2
366         lwr     $t10,0($i2)             # Te3[s1]
367         _xtr    $i2,$s2,24-2
368         lwr     $t11,0($i3)             # Te3[s2]
369         _xtr    $i3,$s3,24-2
370
371         and     $i0,0x3fc
372         and     $i1,0x3fc
373         and     $i2,0x3fc
374         and     $i3,0x3fc
375         $PTR_ADD $i0,$Tbl
376         $PTR_ADD $i1,$Tbl
377         $PTR_ADD $i2,$Tbl
378         $PTR_ADD $i3,$Tbl
379 #endif
380         xor     $t0,$t4
381         lw      $t4,0($i0)              # Te0[s0>>24]
382         xor     $t1,$t5
383         lw      $t5,0($i1)              # Te0[s1>>24]
384         xor     $t2,$t6
385         lw      $t6,0($i2)              # Te0[s2>>24]
386         xor     $t3,$t7
387         lw      $t7,0($i3)              # Te0[s3>>24]
388
389         xor     $t0,$t8
390         lw      $s0,0($key0)
391         xor     $t1,$t9
392         lw      $s1,4($key0)
393         xor     $t2,$t10
394         lw      $s2,8($key0)
395         xor     $t3,$t11
396         lw      $s3,12($key0)
397
398         xor     $t0,$t4
399         xor     $t1,$t5
400         xor     $t2,$t6
401         xor     $t3,$t7
402
403         sub     $cnt,1
404         $PTR_ADD $key0,16
405         xor     $s0,$t0
406         xor     $s1,$t1
407         xor     $s2,$t2
408         xor     $s3,$t3
409         .set    noreorder
410         bnez    $cnt,.Loop_enc
411         _xtr    $i0,$s1,16-2
412 #endif
413
414         .set    reorder
415         _xtr    $i1,$s2,16-2
416         _xtr    $i2,$s3,16-2
417         _xtr    $i3,$s0,16-2
418         and     $i0,0x3fc
419         and     $i1,0x3fc
420         and     $i2,0x3fc
421         and     $i3,0x3fc
422         $PTR_ADD $i0,$Tbl
423         $PTR_ADD $i1,$Tbl
424         $PTR_ADD $i2,$Tbl
425         $PTR_ADD $i3,$Tbl
426         lbu     $t0,2($i0)              # Te4[s1>>16]
427         _xtr    $i0,$s2,8-2
428         lbu     $t1,2($i1)              # Te4[s2>>16]
429         _xtr    $i1,$s3,8-2
430         lbu     $t2,2($i2)              # Te4[s3>>16]
431         _xtr    $i2,$s0,8-2
432         lbu     $t3,2($i3)              # Te4[s0>>16]
433         _xtr    $i3,$s1,8-2
434
435         and     $i0,0x3fc
436         and     $i1,0x3fc
437         and     $i2,0x3fc
438         and     $i3,0x3fc
439         $PTR_ADD $i0,$Tbl
440         $PTR_ADD $i1,$Tbl
441         $PTR_ADD $i2,$Tbl
442         $PTR_ADD $i3,$Tbl
443 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
444 # if defined(_MIPSEL)
445         lbu     $t4,2($i0)              # Te4[s2>>8]
446         $PTR_INS $i0,$s0,2,8
447         lbu     $t5,2($i1)              # Te4[s3>>8]
448         $PTR_INS $i1,$s1,2,8
449         lbu     $t6,2($i2)              # Te4[s0>>8]
450         $PTR_INS $i2,$s2,2,8
451         lbu     $t7,2($i3)              # Te4[s1>>8]
452         $PTR_INS $i3,$s3,2,8
453
454         lbu     $t8,2($i0)              # Te4[s0>>24]
455         _xtr    $i0,$s3,0-2
456         lbu     $t9,2($i1)              # Te4[s1>>24]
457         _xtr    $i1,$s0,0-2
458         lbu     $t10,2($i2)             # Te4[s2>>24]
459         _xtr    $i2,$s1,0-2
460         lbu     $t11,2($i3)             # Te4[s3>>24]
461         _xtr    $i3,$s2,0-2
462
463         and     $i0,0x3fc
464         and     $i1,0x3fc
465         and     $i2,0x3fc
466         and     $i3,0x3fc
467         $PTR_ADD $i0,$Tbl
468         $PTR_ADD $i1,$Tbl
469         $PTR_ADD $i2,$Tbl
470         $PTR_ADD $i3,$Tbl
471 # else
472         lbu     $t4,2($i0)              # Te4[s2>>8]
473         _xtr    $i0,$s0,24-2
474         lbu     $t5,2($i1)              # Te4[s3>>8]
475         _xtr    $i1,$s1,24-2
476         lbu     $t6,2($i2)              # Te4[s0>>8]
477         _xtr    $i2,$s2,24-2
478         lbu     $t7,2($i3)              # Te4[s1>>8]
479         _xtr    $i3,$s3,24-2
480
481         and     $i0,0x3fc
482         and     $i1,0x3fc
483         and     $i2,0x3fc
484         and     $i3,0x3fc
485         $PTR_ADD $i0,$Tbl
486         $PTR_ADD $i1,$Tbl
487         $PTR_ADD $i2,$Tbl
488         $PTR_ADD $i3,$Tbl
489         lbu     $t8,2($i0)              # Te4[s0>>24]
490         $PTR_INS $i0,$s3,2,8
491         lbu     $t9,2($i1)              # Te4[s1>>24]
492         $PTR_INS $i1,$s0,2,8
493         lbu     $t10,2($i2)             # Te4[s2>>24]
494         $PTR_INS $i2,$s1,2,8
495         lbu     $t11,2($i3)             # Te4[s3>>24]
496         $PTR_INS $i3,$s2,2,8
497 # endif
498         _ins    $t0,16
499         _ins    $t1,16
500         _ins    $t2,16
501         _ins    $t3,16
502
503         _ins2   $t0,$t4,8
504         lbu     $t4,2($i0)              # Te4[s3]
505         _ins2   $t1,$t5,8
506         lbu     $t5,2($i1)              # Te4[s0]
507         _ins2   $t2,$t6,8
508         lbu     $t6,2($i2)              # Te4[s1]
509         _ins2   $t3,$t7,8
510         lbu     $t7,2($i3)              # Te4[s2]
511
512         _ins2   $t0,$t8,24
513         lw      $s0,0($key0)
514         _ins2   $t1,$t9,24
515         lw      $s1,4($key0)
516         _ins2   $t2,$t10,24
517         lw      $s2,8($key0)
518         _ins2   $t3,$t11,24
519         lw      $s3,12($key0)
520
521         _ins2   $t0,$t4,0
522         _ins2   $t1,$t5,0
523         _ins2   $t2,$t6,0
524         _ins2   $t3,$t7,0
525 #else
526         lbu     $t4,2($i0)              # Te4[s2>>8]
527         _xtr    $i0,$s0,24-2
528         lbu     $t5,2($i1)              # Te4[s3>>8]
529         _xtr    $i1,$s1,24-2
530         lbu     $t6,2($i2)              # Te4[s0>>8]
531         _xtr    $i2,$s2,24-2
532         lbu     $t7,2($i3)              # Te4[s1>>8]
533         _xtr    $i3,$s3,24-2
534
535         and     $i0,0x3fc
536         and     $i1,0x3fc
537         and     $i2,0x3fc
538         and     $i3,0x3fc
539         $PTR_ADD $i0,$Tbl
540         $PTR_ADD $i1,$Tbl
541         $PTR_ADD $i2,$Tbl
542         $PTR_ADD $i3,$Tbl
543         lbu     $t8,2($i0)              # Te4[s0>>24]
544         _xtr    $i0,$s3,0-2
545         lbu     $t9,2($i1)              # Te4[s1>>24]
546         _xtr    $i1,$s0,0-2
547         lbu     $t10,2($i2)             # Te4[s2>>24]
548         _xtr    $i2,$s1,0-2
549         lbu     $t11,2($i3)             # Te4[s3>>24]
550         _xtr    $i3,$s2,0-2
551
552         and     $i0,0x3fc
553         and     $i1,0x3fc
554         and     $i2,0x3fc
555         and     $i3,0x3fc
556         $PTR_ADD $i0,$Tbl
557         $PTR_ADD $i1,$Tbl
558         $PTR_ADD $i2,$Tbl
559         $PTR_ADD $i3,$Tbl
560
561         _ins    $t0,16
562         _ins    $t1,16
563         _ins    $t2,16
564         _ins    $t3,16
565
566         _ins    $t4,8
567         _ins    $t5,8
568         _ins    $t6,8
569         _ins    $t7,8
570
571         xor     $t0,$t4
572         lbu     $t4,2($i0)              # Te4[s3]
573         xor     $t1,$t5
574         lbu     $t5,2($i1)              # Te4[s0]
575         xor     $t2,$t6
576         lbu     $t6,2($i2)              # Te4[s1]
577         xor     $t3,$t7
578         lbu     $t7,2($i3)              # Te4[s2]
579
580         _ins    $t8,24
581         lw      $s0,0($key0)
582         _ins    $t9,24
583         lw      $s1,4($key0)
584         _ins    $t10,24
585         lw      $s2,8($key0)
586         _ins    $t11,24
587         lw      $s3,12($key0)
588
589         xor     $t0,$t8
590         xor     $t1,$t9
591         xor     $t2,$t10
592         xor     $t3,$t11
593
594         _ins    $t4,0
595         _ins    $t5,0
596         _ins    $t6,0
597         _ins    $t7,0
598
599         xor     $t0,$t4
600         xor     $t1,$t5
601         xor     $t2,$t6
602         xor     $t3,$t7
603 #endif
604         xor     $s0,$t0
605         xor     $s1,$t1
606         xor     $s2,$t2
607         xor     $s3,$t3
608
609         jr      $ra
610 .end    _mips_AES_encrypt
611
612 .align  5
613 .globl  AES_encrypt
614 .ent    AES_encrypt
615 AES_encrypt:
616         .frame  $sp,$FRAMESIZE,$ra
617         .mask   $SAVED_REGS_MASK,-$SZREG
618         .set    noreorder
619 ___
620 $code.=<<___ if ($flavour =~ /o32/i);   # o32 PIC-ification
621         .cpload $pf
622 ___
623 $code.=<<___;
624         $PTR_SUB $sp,$FRAMESIZE
625         $REG_S  $ra,$FRAMESIZE-1*$SZREG($sp)
626         $REG_S  $fp,$FRAMESIZE-2*$SZREG($sp)
627         $REG_S  $s11,$FRAMESIZE-3*$SZREG($sp)
628         $REG_S  $s10,$FRAMESIZE-4*$SZREG($sp)
629         $REG_S  $s9,$FRAMESIZE-5*$SZREG($sp)
630         $REG_S  $s8,$FRAMESIZE-6*$SZREG($sp)
631         $REG_S  $s7,$FRAMESIZE-7*$SZREG($sp)
632         $REG_S  $s6,$FRAMESIZE-8*$SZREG($sp)
633         $REG_S  $s5,$FRAMESIZE-9*$SZREG($sp)
634         $REG_S  $s4,$FRAMESIZE-10*$SZREG($sp)
635 ___
636 $code.=<<___ if ($flavour =~ /nubi/i);  # optimize non-nubi prologue
637         $REG_S  \$15,$FRAMESIZE-11*$SZREG($sp)
638         $REG_S  \$14,$FRAMESIZE-12*$SZREG($sp)
639         $REG_S  \$13,$FRAMESIZE-13*$SZREG($sp)
640         $REG_S  \$12,$FRAMESIZE-14*$SZREG($sp)
641         $REG_S  $gp,$FRAMESIZE-15*$SZREG($sp)
642 ___
643 $code.=<<___ if ($flavour !~ /o32/i);   # non-o32 PIC-ification
644         .cplocal        $Tbl
645         .cpsetup        $pf,$zero,AES_encrypt
646 ___
647 $code.=<<___;
648         .set    reorder
649         la      $Tbl,AES_Te             # PIC-ified 'load address'
650
651         lwl     $s0,0+$MSB($inp)
652         lwl     $s1,4+$MSB($inp)
653         lwl     $s2,8+$MSB($inp)
654         lwl     $s3,12+$MSB($inp)
655         lwr     $s0,0+$LSB($inp)
656         lwr     $s1,4+$LSB($inp)
657         lwr     $s2,8+$LSB($inp)
658         lwr     $s3,12+$LSB($inp)
659
660         bal     _mips_AES_encrypt
661
662         swr     $s0,0+$LSB($out)
663         swr     $s1,4+$LSB($out)
664         swr     $s2,8+$LSB($out)
665         swr     $s3,12+$LSB($out)
666         swl     $s0,0+$MSB($out)
667         swl     $s1,4+$MSB($out)
668         swl     $s2,8+$MSB($out)
669         swl     $s3,12+$MSB($out)
670
671         .set    noreorder
672         $REG_L  $ra,$FRAMESIZE-1*$SZREG($sp)
673         $REG_L  $fp,$FRAMESIZE-2*$SZREG($sp)
674         $REG_L  $s11,$FRAMESIZE-3*$SZREG($sp)
675         $REG_L  $s10,$FRAMESIZE-4*$SZREG($sp)
676         $REG_L  $s9,$FRAMESIZE-5*$SZREG($sp)
677         $REG_L  $s8,$FRAMESIZE-6*$SZREG($sp)
678         $REG_L  $s7,$FRAMESIZE-7*$SZREG($sp)
679         $REG_L  $s6,$FRAMESIZE-8*$SZREG($sp)
680         $REG_L  $s5,$FRAMESIZE-9*$SZREG($sp)
681         $REG_L  $s4,$FRAMESIZE-10*$SZREG($sp)
682 ___
683 $code.=<<___ if ($flavour =~ /nubi/i);
684         $REG_L  \$15,$FRAMESIZE-11*$SZREG($sp)
685         $REG_L  \$14,$FRAMESIZE-12*$SZREG($sp)
686         $REG_L  \$13,$FRAMESIZE-13*$SZREG($sp)
687         $REG_L  \$12,$FRAMESIZE-14*$SZREG($sp)
688         $REG_L  $gp,$FRAMESIZE-15*$SZREG($sp)
689 ___
690 $code.=<<___;
691         jr      $ra
692         $PTR_ADD $sp,$FRAMESIZE
693 .end    AES_encrypt
694 ___
695 \f
696 $code.=<<___;
697 .align  5
698 .ent    _mips_AES_decrypt
699 _mips_AES_decrypt:
700         .frame  $sp,0,$ra
701         .set    reorder
702         lw      $t0,0($key)
703         lw      $t1,4($key)
704         lw      $t2,8($key)
705         lw      $t3,12($key)
706         lw      $cnt,240($key)
707         $PTR_ADD $key0,$key,16
708
709         xor     $s0,$t0
710         xor     $s1,$t1
711         xor     $s2,$t2
712         xor     $s3,$t3
713
714         sub     $cnt,1
715 #if defined(__mips_smartmips)
716         ext     $i0,$s3,16,8
717 .Loop_dec:
718         ext     $i1,$s0,16,8
719         ext     $i2,$s1,16,8
720         ext     $i3,$s2,16,8
721         lwxs    $t0,$i0($Tbl)           # Td1[s3>>16]
722         ext     $i0,$s2,8,8
723         lwxs    $t1,$i1($Tbl)           # Td1[s0>>16]
724         ext     $i1,$s3,8,8
725         lwxs    $t2,$i2($Tbl)           # Td1[s1>>16]
726         ext     $i2,$s0,8,8
727         lwxs    $t3,$i3($Tbl)           # Td1[s2>>16]
728         ext     $i3,$s1,8,8
729
730         lwxs    $t4,$i0($Tbl)           # Td2[s2>>8]
731         ext     $i0,$s1,0,8
732         lwxs    $t5,$i1($Tbl)           # Td2[s3>>8]
733         ext     $i1,$s2,0,8
734         lwxs    $t6,$i2($Tbl)           # Td2[s0>>8]
735         ext     $i2,$s3,0,8
736         lwxs    $t7,$i3($Tbl)           # Td2[s1>>8]
737         ext     $i3,$s0,0,8
738
739         lwxs    $t8,$i0($Tbl)           # Td3[s1]
740         ext     $i0,$s0,24,8
741         lwxs    $t9,$i1($Tbl)           # Td3[s2]
742         ext     $i1,$s1,24,8
743         lwxs    $t10,$i2($Tbl)          # Td3[s3]
744         ext     $i2,$s2,24,8
745         lwxs    $t11,$i3($Tbl)          # Td3[s0]
746         ext     $i3,$s3,24,8
747
748         rotr    $t0,$t0,8
749         rotr    $t1,$t1,8
750         rotr    $t2,$t2,8
751         rotr    $t3,$t3,8
752
753         rotr    $t4,$t4,16
754         rotr    $t5,$t5,16
755         rotr    $t6,$t6,16
756         rotr    $t7,$t7,16
757
758         xor     $t0,$t4
759         lwxs    $t4,$i0($Tbl)           # Td0[s0>>24]
760         xor     $t1,$t5
761         lwxs    $t5,$i1($Tbl)           # Td0[s1>>24]
762         xor     $t2,$t6
763         lwxs    $t6,$i2($Tbl)           # Td0[s2>>24]
764         xor     $t3,$t7
765         lwxs    $t7,$i3($Tbl)           # Td0[s3>>24]
766
767         rotr    $t8,$t8,24
768         lw      $s0,0($key0)
769         rotr    $t9,$t9,24
770         lw      $s1,4($key0)
771         rotr    $t10,$t10,24
772         lw      $s2,8($key0)
773         rotr    $t11,$t11,24
774         lw      $s3,12($key0)
775
776         xor     $t0,$t8
777         xor     $t1,$t9
778         xor     $t2,$t10
779         xor     $t3,$t11
780
781         xor     $t0,$t4
782         xor     $t1,$t5
783         xor     $t2,$t6
784         xor     $t3,$t7
785
786         sub     $cnt,1
787         $PTR_ADD $key0,16
788         xor     $s0,$t0
789         xor     $s1,$t1
790         xor     $s2,$t2
791         xor     $s3,$t3
792         .set    noreorder
793         bnez    $cnt,.Loop_dec
794         ext     $i0,$s3,16,8
795
796         _xtr    $i0,$s3,16-2
797 #else
798         _xtr    $i0,$s3,16-2
799 .Loop_dec:
800         _xtr    $i1,$s0,16-2
801         _xtr    $i2,$s1,16-2
802         _xtr    $i3,$s2,16-2
803         and     $i0,0x3fc
804         and     $i1,0x3fc
805         and     $i2,0x3fc
806         and     $i3,0x3fc
807         $PTR_ADD $i0,$Tbl
808         $PTR_ADD $i1,$Tbl
809         $PTR_ADD $i2,$Tbl
810         $PTR_ADD $i3,$Tbl
811 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
812         lw      $t0,0($i0)              # Td1[s3>>16]
813         _xtr    $i0,$s2,8-2
814         lw      $t1,0($i1)              # Td1[s0>>16]
815         _xtr    $i1,$s3,8-2
816         lw      $t2,0($i2)              # Td1[s1>>16]
817         _xtr    $i2,$s0,8-2
818         lw      $t3,0($i3)              # Td1[s2>>16]
819         _xtr    $i3,$s1,8-2
820 #else
821         lwl     $t0,3($i0)              # Td1[s3>>16]
822         lwl     $t1,3($i1)              # Td1[s0>>16]
823         lwl     $t2,3($i2)              # Td1[s1>>16]
824         lwl     $t3,3($i3)              # Td1[s2>>16]
825         lwr     $t0,2($i0)              # Td1[s3>>16]
826         _xtr    $i0,$s2,8-2
827         lwr     $t1,2($i1)              # Td1[s0>>16]
828         _xtr    $i1,$s3,8-2
829         lwr     $t2,2($i2)              # Td1[s1>>16]
830         _xtr    $i2,$s0,8-2
831         lwr     $t3,2($i3)              # Td1[s2>>16]
832         _xtr    $i3,$s1,8-2
833 #endif
834
835         and     $i0,0x3fc
836         and     $i1,0x3fc
837         and     $i2,0x3fc
838         and     $i3,0x3fc
839         $PTR_ADD $i0,$Tbl
840         $PTR_ADD $i1,$Tbl
841         $PTR_ADD $i2,$Tbl
842         $PTR_ADD $i3,$Tbl
843 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
844         rotr    $t0,$t0,8
845         rotr    $t1,$t1,8
846         rotr    $t2,$t2,8
847         rotr    $t3,$t3,8
848 # if defined(_MIPSEL)
849         lw      $t4,0($i0)              # Td2[s2>>8]
850         _xtr    $i0,$s1,0-2
851         lw      $t5,0($i1)              # Td2[s3>>8]
852         _xtr    $i1,$s2,0-2
853         lw      $t6,0($i2)              # Td2[s0>>8]
854         _xtr    $i2,$s3,0-2
855         lw      $t7,0($i3)              # Td2[s1>>8]
856         _xtr    $i3,$s0,0-2
857
858         and     $i0,0x3fc
859         and     $i1,0x3fc
860         and     $i2,0x3fc
861         and     $i3,0x3fc
862         $PTR_ADD $i0,$Tbl
863         $PTR_ADD $i1,$Tbl
864         $PTR_ADD $i2,$Tbl
865         $PTR_ADD $i3,$Tbl
866         lw      $t8,0($i0)              # Td3[s1]
867         $PTR_INS $i0,$s0,2,8
868         lw      $t9,0($i1)              # Td3[s2]
869         $PTR_INS $i1,$s1,2,8
870         lw      $t10,0($i2)             # Td3[s3]
871         $PTR_INS $i2,$s2,2,8
872         lw      $t11,0($i3)             # Td3[s0]
873         $PTR_INS $i3,$s3,2,8
874 #else
875         lw      $t4,0($i0)              # Td2[s2>>8]
876         $PTR_INS $i0,$s1,2,8
877         lw      $t5,0($i1)              # Td2[s3>>8]
878         $PTR_INS $i1,$s2,2,8
879         lw      $t6,0($i2)              # Td2[s0>>8]
880         $PTR_INS $i2,$s3,2,8
881         lw      $t7,0($i3)              # Td2[s1>>8]
882         $PTR_INS $i3,$s0,2,8
883
884         lw      $t8,0($i0)              # Td3[s1]
885         _xtr    $i0,$s0,24-2
886         lw      $t9,0($i1)              # Td3[s2]
887         _xtr    $i1,$s1,24-2
888         lw      $t10,0($i2)             # Td3[s3]
889         _xtr    $i2,$s2,24-2
890         lw      $t11,0($i3)             # Td3[s0]
891         _xtr    $i3,$s3,24-2
892
893         and     $i0,0x3fc
894         and     $i1,0x3fc
895         and     $i2,0x3fc
896         and     $i3,0x3fc
897         $PTR_ADD $i0,$Tbl
898         $PTR_ADD $i1,$Tbl
899         $PTR_ADD $i2,$Tbl
900         $PTR_ADD $i3,$Tbl
901 #endif
902         rotr    $t4,$t4,16
903         rotr    $t5,$t5,16
904         rotr    $t6,$t6,16
905         rotr    $t7,$t7,16
906
907         rotr    $t8,$t8,24
908         rotr    $t9,$t9,24
909         rotr    $t10,$t10,24
910         rotr    $t11,$t11,24
911 #else
912         lwl     $t4,2($i0)              # Td2[s2>>8]
913         lwl     $t5,2($i1)              # Td2[s3>>8]
914         lwl     $t6,2($i2)              # Td2[s0>>8]
915         lwl     $t7,2($i3)              # Td2[s1>>8]
916         lwr     $t4,1($i0)              # Td2[s2>>8]
917         _xtr    $i0,$s1,0-2
918         lwr     $t5,1($i1)              # Td2[s3>>8]
919         _xtr    $i1,$s2,0-2
920         lwr     $t6,1($i2)              # Td2[s0>>8]
921         _xtr    $i2,$s3,0-2
922         lwr     $t7,1($i3)              # Td2[s1>>8]
923         _xtr    $i3,$s0,0-2
924
925         and     $i0,0x3fc
926         and     $i1,0x3fc
927         and     $i2,0x3fc
928         and     $i3,0x3fc
929         $PTR_ADD $i0,$Tbl
930         $PTR_ADD $i1,$Tbl
931         $PTR_ADD $i2,$Tbl
932         $PTR_ADD $i3,$Tbl
933         lwl     $t8,1($i0)              # Td3[s1]
934         lwl     $t9,1($i1)              # Td3[s2]
935         lwl     $t10,1($i2)             # Td3[s3]
936         lwl     $t11,1($i3)             # Td3[s0]
937         lwr     $t8,0($i0)              # Td3[s1]
938         _xtr    $i0,$s0,24-2
939         lwr     $t9,0($i1)              # Td3[s2]
940         _xtr    $i1,$s1,24-2
941         lwr     $t10,0($i2)             # Td3[s3]
942         _xtr    $i2,$s2,24-2
943         lwr     $t11,0($i3)             # Td3[s0]
944         _xtr    $i3,$s3,24-2
945
946         and     $i0,0x3fc
947         and     $i1,0x3fc
948         and     $i2,0x3fc
949         and     $i3,0x3fc
950         $PTR_ADD $i0,$Tbl
951         $PTR_ADD $i1,$Tbl
952         $PTR_ADD $i2,$Tbl
953         $PTR_ADD $i3,$Tbl
954 #endif
955
956         xor     $t0,$t4
957         lw      $t4,0($i0)              # Td0[s0>>24]
958         xor     $t1,$t5
959         lw      $t5,0($i1)              # Td0[s1>>24]
960         xor     $t2,$t6
961         lw      $t6,0($i2)              # Td0[s2>>24]
962         xor     $t3,$t7
963         lw      $t7,0($i3)              # Td0[s3>>24]
964
965         xor     $t0,$t8
966         lw      $s0,0($key0)
967         xor     $t1,$t9
968         lw      $s1,4($key0)
969         xor     $t2,$t10
970         lw      $s2,8($key0)
971         xor     $t3,$t11
972         lw      $s3,12($key0)
973
974         xor     $t0,$t4
975         xor     $t1,$t5
976         xor     $t2,$t6
977         xor     $t3,$t7
978
979         sub     $cnt,1
980         $PTR_ADD $key0,16
981         xor     $s0,$t0
982         xor     $s1,$t1
983         xor     $s2,$t2
984         xor     $s3,$t3
985         .set    noreorder
986         bnez    $cnt,.Loop_dec
987         _xtr    $i0,$s3,16-2
988 #endif
989
990         .set    reorder
991         lw      $t4,1024($Tbl)          # prefetch Td4
992         _xtr    $i0,$s3,16
993         lw      $t5,1024+32($Tbl)
994         _xtr    $i1,$s0,16
995         lw      $t6,1024+64($Tbl)
996         _xtr    $i2,$s1,16
997         lw      $t7,1024+96($Tbl)
998         _xtr    $i3,$s2,16
999         lw      $t8,1024+128($Tbl)
1000         and     $i0,0xff
1001         lw      $t9,1024+160($Tbl)
1002         and     $i1,0xff
1003         lw      $t10,1024+192($Tbl)
1004         and     $i2,0xff
1005         lw      $t11,1024+224($Tbl)
1006         and     $i3,0xff
1007
1008         $PTR_ADD $i0,$Tbl
1009         $PTR_ADD $i1,$Tbl
1010         $PTR_ADD $i2,$Tbl
1011         $PTR_ADD $i3,$Tbl
1012         lbu     $t0,1024($i0)           # Td4[s3>>16]
1013         _xtr    $i0,$s2,8
1014         lbu     $t1,1024($i1)           # Td4[s0>>16]
1015         _xtr    $i1,$s3,8
1016         lbu     $t2,1024($i2)           # Td4[s1>>16]
1017         _xtr    $i2,$s0,8
1018         lbu     $t3,1024($i3)           # Td4[s2>>16]
1019         _xtr    $i3,$s1,8
1020
1021         and     $i0,0xff
1022         and     $i1,0xff
1023         and     $i2,0xff
1024         and     $i3,0xff
1025         $PTR_ADD $i0,$Tbl
1026         $PTR_ADD $i1,$Tbl
1027         $PTR_ADD $i2,$Tbl
1028         $PTR_ADD $i3,$Tbl
1029 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1030 # if defined(_MIPSEL)
1031         lbu     $t4,1024($i0)           # Td4[s2>>8]
1032         $PTR_INS $i0,$s0,0,8
1033         lbu     $t5,1024($i1)           # Td4[s3>>8]
1034         $PTR_INS $i1,$s1,0,8
1035         lbu     $t6,1024($i2)           # Td4[s0>>8]
1036         $PTR_INS $i2,$s2,0,8
1037         lbu     $t7,1024($i3)           # Td4[s1>>8]
1038         $PTR_INS $i3,$s3,0,8
1039
1040         lbu     $t8,1024($i0)           # Td4[s0>>24]
1041         _xtr    $i0,$s1,0
1042         lbu     $t9,1024($i1)           # Td4[s1>>24]
1043         _xtr    $i1,$s2,0
1044         lbu     $t10,1024($i2)          # Td4[s2>>24]
1045         _xtr    $i2,$s3,0
1046         lbu     $t11,1024($i3)          # Td4[s3>>24]
1047         _xtr    $i3,$s0,0
1048
1049         $PTR_ADD $i0,$Tbl
1050         $PTR_ADD $i1,$Tbl
1051         $PTR_ADD $i2,$Tbl
1052         $PTR_ADD $i3,$Tbl
1053 # else
1054         lbu     $t4,1024($i0)           # Td4[s2>>8]
1055         _xtr    $i0,$s0,24
1056         lbu     $t5,1024($i1)           # Td4[s3>>8]
1057         _xtr    $i1,$s1,24
1058         lbu     $t6,1024($i2)           # Td4[s0>>8]
1059         _xtr    $i2,$s2,24
1060         lbu     $t7,1024($i3)           # Td4[s1>>8]
1061         _xtr    $i3,$s3,24
1062
1063         $PTR_ADD $i0,$Tbl
1064         $PTR_ADD $i1,$Tbl
1065         $PTR_ADD $i2,$Tbl
1066         $PTR_ADD $i3,$Tbl
1067         lbu     $t8,1024($i0)           # Td4[s0>>24]
1068         $PTR_INS $i0,$s1,0,8
1069         lbu     $t9,1024($i1)           # Td4[s1>>24]
1070         $PTR_INS $i1,$s2,0,8
1071         lbu     $t10,1024($i2)          # Td4[s2>>24]
1072         $PTR_INS $i2,$s3,0,8
1073         lbu     $t11,1024($i3)          # Td4[s3>>24]
1074         $PTR_INS $i3,$s0,0,8
1075 # endif
1076         _ins    $t0,16
1077         _ins    $t1,16
1078         _ins    $t2,16
1079         _ins    $t3,16
1080
1081         _ins2   $t0,$t4,8
1082         lbu     $t4,1024($i0)           # Td4[s1]
1083         _ins2   $t1,$t5,8
1084         lbu     $t5,1024($i1)           # Td4[s2]
1085         _ins2   $t2,$t6,8
1086         lbu     $t6,1024($i2)           # Td4[s3]
1087         _ins2   $t3,$t7,8
1088         lbu     $t7,1024($i3)           # Td4[s0]
1089
1090         _ins2   $t0,$t8,24
1091         lw      $s0,0($key0)
1092         _ins2   $t1,$t9,24
1093         lw      $s1,4($key0)
1094         _ins2   $t2,$t10,24
1095         lw      $s2,8($key0)
1096         _ins2   $t3,$t11,24
1097         lw      $s3,12($key0)
1098
1099         _ins2   $t0,$t4,0
1100         _ins2   $t1,$t5,0
1101         _ins2   $t2,$t6,0
1102         _ins2   $t3,$t7,0
1103 #else
1104         lbu     $t4,1024($i0)           # Td4[s2>>8]
1105         _xtr    $i0,$s0,24
1106         lbu     $t5,1024($i1)           # Td4[s3>>8]
1107         _xtr    $i1,$s1,24
1108         lbu     $t6,1024($i2)           # Td4[s0>>8]
1109         _xtr    $i2,$s2,24
1110         lbu     $t7,1024($i3)           # Td4[s1>>8]
1111         _xtr    $i3,$s3,24
1112
1113         $PTR_ADD $i0,$Tbl
1114         $PTR_ADD $i1,$Tbl
1115         $PTR_ADD $i2,$Tbl
1116         $PTR_ADD $i3,$Tbl
1117         lbu     $t8,1024($i0)           # Td4[s0>>24]
1118         _xtr    $i0,$s1,0
1119         lbu     $t9,1024($i1)           # Td4[s1>>24]
1120         _xtr    $i1,$s2,0
1121         lbu     $t10,1024($i2)          # Td4[s2>>24]
1122         _xtr    $i2,$s3,0
1123         lbu     $t11,1024($i3)          # Td4[s3>>24]
1124         _xtr    $i3,$s0,0
1125
1126         $PTR_ADD $i0,$Tbl
1127         $PTR_ADD $i1,$Tbl
1128         $PTR_ADD $i2,$Tbl
1129         $PTR_ADD $i3,$Tbl
1130
1131         _ins    $t0,16
1132         _ins    $t1,16
1133         _ins    $t2,16
1134         _ins    $t3,16
1135
1136         _ins    $t4,8
1137         _ins    $t5,8
1138         _ins    $t6,8
1139         _ins    $t7,8
1140
1141         xor     $t0,$t4
1142         lbu     $t4,1024($i0)           # Td4[s1]
1143         xor     $t1,$t5
1144         lbu     $t5,1024($i1)           # Td4[s2]
1145         xor     $t2,$t6
1146         lbu     $t6,1024($i2)           # Td4[s3]
1147         xor     $t3,$t7
1148         lbu     $t7,1024($i3)           # Td4[s0]
1149
1150         _ins    $t8,24
1151         lw      $s0,0($key0)
1152         _ins    $t9,24
1153         lw      $s1,4($key0)
1154         _ins    $t10,24
1155         lw      $s2,8($key0)
1156         _ins    $t11,24
1157         lw      $s3,12($key0)
1158
1159         xor     $t0,$t8
1160         xor     $t1,$t9
1161         xor     $t2,$t10
1162         xor     $t3,$t11
1163
1164         _ins    $t4,0
1165         _ins    $t5,0
1166         _ins    $t6,0
1167         _ins    $t7,0
1168
1169         xor     $t0,$t4
1170         xor     $t1,$t5
1171         xor     $t2,$t6
1172         xor     $t3,$t7
1173 #endif
1174
1175         xor     $s0,$t0
1176         xor     $s1,$t1
1177         xor     $s2,$t2
1178         xor     $s3,$t3
1179
1180         jr      $ra
1181 .end    _mips_AES_decrypt
1182
1183 .align  5
1184 .globl  AES_decrypt
1185 .ent    AES_decrypt
1186 AES_decrypt:
1187         .frame  $sp,$FRAMESIZE,$ra
1188         .mask   $SAVED_REGS_MASK,-$SZREG
1189         .set    noreorder
1190 ___
1191 $code.=<<___ if ($flavour =~ /o32/i);   # o32 PIC-ification
1192         .cpload $pf
1193 ___
1194 $code.=<<___;
1195         $PTR_SUB $sp,$FRAMESIZE
1196         $REG_S  $ra,$FRAMESIZE-1*$SZREG($sp)
1197         $REG_S  $fp,$FRAMESIZE-2*$SZREG($sp)
1198         $REG_S  $s11,$FRAMESIZE-3*$SZREG($sp)
1199         $REG_S  $s10,$FRAMESIZE-4*$SZREG($sp)
1200         $REG_S  $s9,$FRAMESIZE-5*$SZREG($sp)
1201         $REG_S  $s8,$FRAMESIZE-6*$SZREG($sp)
1202         $REG_S  $s7,$FRAMESIZE-7*$SZREG($sp)
1203         $REG_S  $s6,$FRAMESIZE-8*$SZREG($sp)
1204         $REG_S  $s5,$FRAMESIZE-9*$SZREG($sp)
1205         $REG_S  $s4,$FRAMESIZE-10*$SZREG($sp)
1206 ___
1207 $code.=<<___ if ($flavour =~ /nubi/i);  # optimize non-nubi prologue
1208         $REG_S  \$15,$FRAMESIZE-11*$SZREG($sp)
1209         $REG_S  \$14,$FRAMESIZE-12*$SZREG($sp)
1210         $REG_S  \$13,$FRAMESIZE-13*$SZREG($sp)
1211         $REG_S  \$12,$FRAMESIZE-14*$SZREG($sp)
1212         $REG_S  $gp,$FRAMESIZE-15*$SZREG($sp)
1213 ___
1214 $code.=<<___ if ($flavour !~ /o32/i);   # non-o32 PIC-ification
1215         .cplocal        $Tbl
1216         .cpsetup        $pf,$zero,AES_decrypt
1217 ___
1218 $code.=<<___;
1219         .set    reorder
1220         la      $Tbl,AES_Td             # PIC-ified 'load address'
1221
1222         lwl     $s0,0+$MSB($inp)
1223         lwl     $s1,4+$MSB($inp)
1224         lwl     $s2,8+$MSB($inp)
1225         lwl     $s3,12+$MSB($inp)
1226         lwr     $s0,0+$LSB($inp)
1227         lwr     $s1,4+$LSB($inp)
1228         lwr     $s2,8+$LSB($inp)
1229         lwr     $s3,12+$LSB($inp)
1230
1231         bal     _mips_AES_decrypt
1232
1233         swr     $s0,0+$LSB($out)
1234         swr     $s1,4+$LSB($out)
1235         swr     $s2,8+$LSB($out)
1236         swr     $s3,12+$LSB($out)
1237         swl     $s0,0+$MSB($out)
1238         swl     $s1,4+$MSB($out)
1239         swl     $s2,8+$MSB($out)
1240         swl     $s3,12+$MSB($out)
1241
1242         .set    noreorder
1243         $REG_L  $ra,$FRAMESIZE-1*$SZREG($sp)
1244         $REG_L  $fp,$FRAMESIZE-2*$SZREG($sp)
1245         $REG_L  $s11,$FRAMESIZE-3*$SZREG($sp)
1246         $REG_L  $s10,$FRAMESIZE-4*$SZREG($sp)
1247         $REG_L  $s9,$FRAMESIZE-5*$SZREG($sp)
1248         $REG_L  $s8,$FRAMESIZE-6*$SZREG($sp)
1249         $REG_L  $s7,$FRAMESIZE-7*$SZREG($sp)
1250         $REG_L  $s6,$FRAMESIZE-8*$SZREG($sp)
1251         $REG_L  $s5,$FRAMESIZE-9*$SZREG($sp)
1252         $REG_L  $s4,$FRAMESIZE-10*$SZREG($sp)
1253 ___
1254 $code.=<<___ if ($flavour =~ /nubi/i);
1255         $REG_L  \$15,$FRAMESIZE-11*$SZREG($sp)
1256         $REG_L  \$14,$FRAMESIZE-12*$SZREG($sp)
1257         $REG_L  \$13,$FRAMESIZE-13*$SZREG($sp)
1258         $REG_L  \$12,$FRAMESIZE-14*$SZREG($sp)
1259         $REG_L  $gp,$FRAMESIZE-15*$SZREG($sp)
1260 ___
1261 $code.=<<___;
1262         jr      $ra
1263         $PTR_ADD $sp,$FRAMESIZE
1264 .end    AES_decrypt
1265 ___
1266 }}}
1267 \f
1268 {{{
1269 my $FRAMESIZE=8*$SZREG;
1270 my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc000f008 : 0xc0000000;
1271
1272 my ($inp,$bits,$key,$Tbl)=($a0,$a1,$a2,$a3);
1273 my ($rk0,$rk1,$rk2,$rk3,$rk4,$rk5,$rk6,$rk7)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
1274 my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
1275 my ($rcon,$cnt)=($gp,$fp);
1276
1277 $code.=<<___;
1278 .align  5
1279 .ent    _mips_AES_set_encrypt_key
1280 _mips_AES_set_encrypt_key:
1281         .frame  $sp,0,$ra
1282         .set    noreorder
1283         beqz    $inp,.Lekey_done
1284         li      $t0,-1
1285         beqz    $key,.Lekey_done
1286         $PTR_ADD $rcon,$Tbl,256
1287
1288         .set    reorder
1289         lwl     $rk0,0+$MSB($inp)       # load 128 bits
1290         lwl     $rk1,4+$MSB($inp)
1291         lwl     $rk2,8+$MSB($inp)
1292         lwl     $rk3,12+$MSB($inp)
1293         li      $at,128
1294         lwr     $rk0,0+$LSB($inp)
1295         lwr     $rk1,4+$LSB($inp)
1296         lwr     $rk2,8+$LSB($inp)
1297         lwr     $rk3,12+$LSB($inp)
1298         .set    noreorder
1299         beq     $bits,$at,.L128bits
1300         li      $cnt,10
1301
1302         .set    reorder
1303         lwl     $rk4,16+$MSB($inp)      # load 192 bits
1304         lwl     $rk5,20+$MSB($inp)
1305         li      $at,192
1306         lwr     $rk4,16+$LSB($inp)
1307         lwr     $rk5,20+$LSB($inp)
1308         .set    noreorder
1309         beq     $bits,$at,.L192bits
1310         li      $cnt,8
1311
1312         .set    reorder
1313         lwl     $rk6,24+$MSB($inp)      # load 256 bits
1314         lwl     $rk7,28+$MSB($inp)
1315         li      $at,256
1316         lwr     $rk6,24+$LSB($inp)
1317         lwr     $rk7,28+$LSB($inp)
1318         .set    noreorder
1319         beq     $bits,$at,.L256bits
1320         li      $cnt,7
1321
1322         b       .Lekey_done
1323         li      $t0,-2
1324
1325 .align  4
1326 .L128bits:
1327         .set    reorder
1328         srl     $i0,$rk3,16
1329         srl     $i1,$rk3,8
1330         and     $i0,0xff
1331         and     $i1,0xff
1332         and     $i2,$rk3,0xff
1333         srl     $i3,$rk3,24
1334         $PTR_ADD $i0,$Tbl
1335         $PTR_ADD $i1,$Tbl
1336         $PTR_ADD $i2,$Tbl
1337         $PTR_ADD $i3,$Tbl
1338         lbu     $i0,0($i0)
1339         lbu     $i1,0($i1)
1340         lbu     $i2,0($i2)
1341         lbu     $i3,0($i3)
1342
1343         sw      $rk0,0($key)
1344         sw      $rk1,4($key)
1345         sw      $rk2,8($key)
1346         sw      $rk3,12($key)
1347         sub     $cnt,1
1348         $PTR_ADD $key,16
1349
1350         _bias   $i0,24
1351         _bias   $i1,16
1352         _bias   $i2,8
1353         _bias   $i3,0
1354
1355         xor     $rk0,$i0
1356         lw      $i0,0($rcon)
1357         xor     $rk0,$i1
1358         xor     $rk0,$i2
1359         xor     $rk0,$i3
1360         xor     $rk0,$i0
1361
1362         xor     $rk1,$rk0
1363         xor     $rk2,$rk1
1364         xor     $rk3,$rk2
1365
1366         .set    noreorder
1367         bnez    $cnt,.L128bits
1368         $PTR_ADD $rcon,4
1369
1370         sw      $rk0,0($key)
1371         sw      $rk1,4($key)
1372         sw      $rk2,8($key)
1373         li      $cnt,10
1374         sw      $rk3,12($key)
1375         li      $t0,0
1376         sw      $cnt,80($key)
1377         b       .Lekey_done
1378         $PTR_SUB $key,10*16
1379
1380 .align  4
1381 .L192bits:
1382         .set    reorder
1383         srl     $i0,$rk5,16
1384         srl     $i1,$rk5,8
1385         and     $i0,0xff
1386         and     $i1,0xff
1387         and     $i2,$rk5,0xff
1388         srl     $i3,$rk5,24
1389         $PTR_ADD $i0,$Tbl
1390         $PTR_ADD $i1,$Tbl
1391         $PTR_ADD $i2,$Tbl
1392         $PTR_ADD $i3,$Tbl
1393         lbu     $i0,0($i0)
1394         lbu     $i1,0($i1)
1395         lbu     $i2,0($i2)
1396         lbu     $i3,0($i3)
1397
1398         sw      $rk0,0($key)
1399         sw      $rk1,4($key)
1400         sw      $rk2,8($key)
1401         sw      $rk3,12($key)
1402         sw      $rk4,16($key)
1403         sw      $rk5,20($key)
1404         sub     $cnt,1
1405         $PTR_ADD $key,24
1406
1407         _bias   $i0,24
1408         _bias   $i1,16
1409         _bias   $i2,8
1410         _bias   $i3,0
1411
1412         xor     $rk0,$i0
1413         lw      $i0,0($rcon)
1414         xor     $rk0,$i1
1415         xor     $rk0,$i2
1416         xor     $rk0,$i3
1417         xor     $rk0,$i0
1418
1419         xor     $rk1,$rk0
1420         xor     $rk2,$rk1
1421         xor     $rk3,$rk2
1422         xor     $rk4,$rk3
1423         xor     $rk5,$rk4
1424
1425         .set    noreorder
1426         bnez    $cnt,.L192bits
1427         $PTR_ADD $rcon,4
1428
1429         sw      $rk0,0($key)
1430         sw      $rk1,4($key)
1431         sw      $rk2,8($key)
1432         li      $cnt,12
1433         sw      $rk3,12($key)
1434         li      $t0,0
1435         sw      $cnt,48($key)
1436         b       .Lekey_done
1437         $PTR_SUB $key,12*16
1438
1439 .align  4
1440 .L256bits:
1441         .set    reorder
1442         srl     $i0,$rk7,16
1443         srl     $i1,$rk7,8
1444         and     $i0,0xff
1445         and     $i1,0xff
1446         and     $i2,$rk7,0xff
1447         srl     $i3,$rk7,24
1448         $PTR_ADD $i0,$Tbl
1449         $PTR_ADD $i1,$Tbl
1450         $PTR_ADD $i2,$Tbl
1451         $PTR_ADD $i3,$Tbl
1452         lbu     $i0,0($i0)
1453         lbu     $i1,0($i1)
1454         lbu     $i2,0($i2)
1455         lbu     $i3,0($i3)
1456
1457         sw      $rk0,0($key)
1458         sw      $rk1,4($key)
1459         sw      $rk2,8($key)
1460         sw      $rk3,12($key)
1461         sw      $rk4,16($key)
1462         sw      $rk5,20($key)
1463         sw      $rk6,24($key)
1464         sw      $rk7,28($key)
1465         sub     $cnt,1
1466
1467         _bias   $i0,24
1468         _bias   $i1,16
1469         _bias   $i2,8
1470         _bias   $i3,0
1471
1472         xor     $rk0,$i0
1473         lw      $i0,0($rcon)
1474         xor     $rk0,$i1
1475         xor     $rk0,$i2
1476         xor     $rk0,$i3
1477         xor     $rk0,$i0
1478
1479         xor     $rk1,$rk0
1480         xor     $rk2,$rk1
1481         xor     $rk3,$rk2
1482         beqz    $cnt,.L256bits_done
1483
1484         srl     $i0,$rk3,24
1485         srl     $i1,$rk3,16
1486         srl     $i2,$rk3,8
1487         and     $i3,$rk3,0xff
1488         and     $i1,0xff
1489         and     $i2,0xff
1490         $PTR_ADD $i0,$Tbl
1491         $PTR_ADD $i1,$Tbl
1492         $PTR_ADD $i2,$Tbl
1493         $PTR_ADD $i3,$Tbl
1494         lbu     $i0,0($i0)
1495         lbu     $i1,0($i1)
1496         lbu     $i2,0($i2)
1497         lbu     $i3,0($i3)
1498         sll     $i0,24
1499         sll     $i1,16
1500         sll     $i2,8
1501
1502         xor     $rk4,$i0
1503         xor     $rk4,$i1
1504         xor     $rk4,$i2
1505         xor     $rk4,$i3
1506
1507         xor     $rk5,$rk4
1508         xor     $rk6,$rk5
1509         xor     $rk7,$rk6
1510
1511         $PTR_ADD $key,32
1512         .set    noreorder
1513         b       .L256bits
1514         $PTR_ADD $rcon,4
1515
1516 .L256bits_done:
1517         sw      $rk0,32($key)
1518         sw      $rk1,36($key)
1519         sw      $rk2,40($key)
1520         li      $cnt,14
1521         sw      $rk3,44($key)
1522         li      $t0,0
1523         sw      $cnt,48($key)
1524         $PTR_SUB $key,12*16
1525
1526 .Lekey_done:
1527         jr      $ra
1528         nop
1529 .end    _mips_AES_set_encrypt_key
1530
1531 .globl  AES_set_encrypt_key
1532 .ent    AES_set_encrypt_key
1533 AES_set_encrypt_key:
1534         .frame  $sp,$FRAMESIZE,$ra
1535         .mask   $SAVED_REGS_MASK,-$SZREG
1536         .set    noreorder
1537 ___
1538 $code.=<<___ if ($flavour =~ /o32/i);   # o32 PIC-ification
1539         .cpload $pf
1540 ___
1541 $code.=<<___;
1542         $PTR_SUB $sp,$FRAMESIZE
1543         $REG_S  $ra,$FRAMESIZE-1*$SZREG($sp)
1544         $REG_S  $fp,$FRAMESIZE-2*$SZREG($sp)
1545 ___
1546 $code.=<<___ if ($flavour =~ /nubi/i);  # optimize non-nubi prologue
1547         $REG_S  $s3,$FRAMESIZE-3*$SZREG($sp)
1548         $REG_S  $s2,$FRAMESIZE-4*$SZREG($sp)
1549         $REG_S  $s1,$FRAMESIZE-5*$SZREG($sp)
1550         $REG_S  $s0,$FRAMESIZE-6*$SZREG($sp)
1551         $REG_S  $gp,$FRAMESIZE-7*$SZREG($sp)
1552 ___
1553 $code.=<<___ if ($flavour !~ /o32/i);   # non-o32 PIC-ification
1554         .cplocal        $Tbl
1555         .cpsetup        $pf,$zero,AES_set_encrypt_key
1556 ___
1557 $code.=<<___;
1558         .set    reorder
1559         la      $Tbl,AES_Te4            # PIC-ified 'load address'
1560
1561         bal     _mips_AES_set_encrypt_key
1562
1563         .set    noreorder
1564         move    $a0,$t0
1565         $REG_L  $ra,$FRAMESIZE-1*$SZREG($sp)
1566         $REG_L  $fp,$FRAMESIZE-2*$SZREG($sp)
1567 ___
1568 $code.=<<___ if ($flavour =~ /nubi/i);
1569         $REG_L  $s3,$FRAMESIZE-11*$SZREG($sp)
1570         $REG_L  $s2,$FRAMESIZE-12*$SZREG($sp)
1571         $REG_L  $s1,$FRAMESIZE-13*$SZREG($sp)
1572         $REG_L  $s0,$FRAMESIZE-14*$SZREG($sp)
1573         $REG_L  $gp,$FRAMESIZE-15*$SZREG($sp)
1574 ___
1575 $code.=<<___;
1576         jr      $ra
1577         $PTR_ADD $sp,$FRAMESIZE
1578 .end    AES_set_encrypt_key
1579 ___
1580 \f
1581 my ($head,$tail)=($inp,$bits);
1582 my ($tp1,$tp2,$tp4,$tp8,$tp9,$tpb,$tpd,$tpe)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
1583 my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2);
1584 $code.=<<___;
1585 .align  5
1586 .globl  AES_set_decrypt_key
1587 .ent    AES_set_decrypt_key
1588 AES_set_decrypt_key:
1589         .frame  $sp,$FRAMESIZE,$ra
1590         .mask   $SAVED_REGS_MASK,-$SZREG
1591         .set    noreorder
1592 ___
1593 $code.=<<___ if ($flavour =~ /o32/i);   # o32 PIC-ification
1594         .cpload $pf
1595 ___
1596 $code.=<<___;
1597         $PTR_SUB $sp,$FRAMESIZE
1598         $REG_S  $ra,$FRAMESIZE-1*$SZREG($sp)
1599         $REG_S  $fp,$FRAMESIZE-2*$SZREG($sp)
1600 ___
1601 $code.=<<___ if ($flavour =~ /nubi/i);  # optimize non-nubi prologue
1602         $REG_S  $s3,$FRAMESIZE-3*$SZREG($sp)
1603         $REG_S  $s2,$FRAMESIZE-4*$SZREG($sp)
1604         $REG_S  $s1,$FRAMESIZE-5*$SZREG($sp)
1605         $REG_S  $s0,$FRAMESIZE-6*$SZREG($sp)
1606         $REG_S  $gp,$FRAMESIZE-7*$SZREG($sp)
1607 ___
1608 $code.=<<___ if ($flavour !~ /o32/i);   # non-o32 PIC-ification
1609         .cplocal        $Tbl
1610         .cpsetup        $pf,$zero,AES_set_decrypt_key
1611 ___
1612 $code.=<<___;
1613         .set    reorder
1614         la      $Tbl,AES_Te4            # PIC-ified 'load address'
1615
1616         bal     _mips_AES_set_encrypt_key
1617
1618         bltz    $t0,.Ldkey_done
1619
1620         sll     $at,$cnt,4
1621         $PTR_ADD $head,$key,0
1622         $PTR_ADD $tail,$key,$at
1623 .align  4
1624 .Lswap:
1625         lw      $rk0,0($head)
1626         lw      $rk1,4($head)
1627         lw      $rk2,8($head)
1628         lw      $rk3,12($head)
1629         lw      $rk4,0($tail)
1630         lw      $rk5,4($tail)
1631         lw      $rk6,8($tail)
1632         lw      $rk7,12($tail)
1633         sw      $rk0,0($tail)
1634         sw      $rk1,4($tail)
1635         sw      $rk2,8($tail)
1636         sw      $rk3,12($tail)
1637         $PTR_ADD $head,16
1638         $PTR_SUB $tail,16
1639         sw      $rk4,-16($head)
1640         sw      $rk5,-12($head)
1641         sw      $rk6,-8($head)
1642         sw      $rk7,-4($head)
1643         bne     $head,$tail,.Lswap
1644
1645         lw      $tp1,16($key)           # modulo-scheduled
1646         lui     $x80808080,0x8080
1647         sub     $cnt,1
1648         or      $x80808080,0x8080
1649         sll     $cnt,2
1650         $PTR_ADD $key,16
1651         lui     $x1b1b1b1b,0x1b1b
1652         nor     $x7f7f7f7f,$zero,$x80808080
1653         or      $x1b1b1b1b,0x1b1b
1654 .align  4
1655 .Lmix:
1656         and     $m,$tp1,$x80808080
1657         and     $tp2,$tp1,$x7f7f7f7f
1658         srl     $tp4,$m,7
1659         addu    $tp2,$tp2               # tp2<<1
1660         subu    $m,$tp4
1661         and     $m,$x1b1b1b1b
1662         xor     $tp2,$m
1663
1664         and     $m,$tp2,$x80808080
1665         and     $tp4,$tp2,$x7f7f7f7f
1666         srl     $tp8,$m,7
1667         addu    $tp4,$tp4               # tp4<<1
1668         subu    $m,$tp8
1669         and     $m,$x1b1b1b1b
1670         xor     $tp4,$m
1671
1672         and     $m,$tp4,$x80808080
1673         and     $tp8,$tp4,$x7f7f7f7f
1674         srl     $tp9,$m,7
1675         addu    $tp8,$tp8               # tp8<<1
1676         subu    $m,$tp9
1677         and     $m,$x1b1b1b1b
1678         xor     $tp8,$m
1679
1680         xor     $tp9,$tp8,$tp1
1681         xor     $tpe,$tp8,$tp4
1682         xor     $tpb,$tp9,$tp2
1683         xor     $tpd,$tp9,$tp4
1684
1685 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1686         rotr    $tp1,$tpd,16
1687          xor    $tpe,$tp2
1688         rotr    $tp2,$tp9,8
1689         xor     $tpe,$tp1
1690         rotr    $tp4,$tpb,24
1691         xor     $tpe,$tp2
1692         lw      $tp1,4($key)            # modulo-scheduled
1693         xor     $tpe,$tp4
1694 #else
1695         _ror    $tp1,$tpd,16
1696          xor    $tpe,$tp2
1697         _ror    $tp2,$tpd,-16
1698         xor     $tpe,$tp1
1699         _ror    $tp1,$tp9,8
1700         xor     $tpe,$tp2
1701         _ror    $tp2,$tp9,-24
1702         xor     $tpe,$tp1
1703         _ror    $tp1,$tpb,24
1704         xor     $tpe,$tp2
1705         _ror    $tp2,$tpb,-8
1706         xor     $tpe,$tp1
1707         lw      $tp1,4($key)            # modulo-scheduled
1708         xor     $tpe,$tp2
1709 #endif
1710         sub     $cnt,1
1711         sw      $tpe,0($key)
1712         $PTR_ADD $key,4
1713         bnez    $cnt,.Lmix
1714
1715         li      $t0,0
1716 .Ldkey_done:
1717         .set    noreorder
1718         move    $a0,$t0
1719         $REG_L  $ra,$FRAMESIZE-1*$SZREG($sp)
1720         $REG_L  $fp,$FRAMESIZE-2*$SZREG($sp)
1721 ___
1722 $code.=<<___ if ($flavour =~ /nubi/i);
1723         $REG_L  $s3,$FRAMESIZE-11*$SZREG($sp)
1724         $REG_L  $s2,$FRAMESIZE-12*$SZREG($sp)
1725         $REG_L  $s1,$FRAMESIZE-13*$SZREG($sp)
1726         $REG_L  $s0,$FRAMESIZE-14*$SZREG($sp)
1727         $REG_L  $gp,$FRAMESIZE-15*$SZREG($sp)
1728 ___
1729 $code.=<<___;
1730         jr      $ra
1731         $PTR_ADD $sp,$FRAMESIZE
1732 .end    AES_set_decrypt_key
1733 ___
1734 }}}
1735
1736 ######################################################################
1737 # Tables are kept in endian-neutral manner
1738 $code.=<<___;
1739 .rdata
1740 .align  10
1741 AES_Te:
1742 .byte   0xc6,0x63,0x63,0xa5,    0xf8,0x7c,0x7c,0x84     # Te0
1743 .byte   0xee,0x77,0x77,0x99,    0xf6,0x7b,0x7b,0x8d
1744 .byte   0xff,0xf2,0xf2,0x0d,    0xd6,0x6b,0x6b,0xbd
1745 .byte   0xde,0x6f,0x6f,0xb1,    0x91,0xc5,0xc5,0x54
1746 .byte   0x60,0x30,0x30,0x50,    0x02,0x01,0x01,0x03
1747 .byte   0xce,0x67,0x67,0xa9,    0x56,0x2b,0x2b,0x7d
1748 .byte   0xe7,0xfe,0xfe,0x19,    0xb5,0xd7,0xd7,0x62
1749 .byte   0x4d,0xab,0xab,0xe6,    0xec,0x76,0x76,0x9a
1750 .byte   0x8f,0xca,0xca,0x45,    0x1f,0x82,0x82,0x9d
1751 .byte   0x89,0xc9,0xc9,0x40,    0xfa,0x7d,0x7d,0x87
1752 .byte   0xef,0xfa,0xfa,0x15,    0xb2,0x59,0x59,0xeb
1753 .byte   0x8e,0x47,0x47,0xc9,    0xfb,0xf0,0xf0,0x0b
1754 .byte   0x41,0xad,0xad,0xec,    0xb3,0xd4,0xd4,0x67
1755 .byte   0x5f,0xa2,0xa2,0xfd,    0x45,0xaf,0xaf,0xea
1756 .byte   0x23,0x9c,0x9c,0xbf,    0x53,0xa4,0xa4,0xf7
1757 .byte   0xe4,0x72,0x72,0x96,    0x9b,0xc0,0xc0,0x5b
1758 .byte   0x75,0xb7,0xb7,0xc2,    0xe1,0xfd,0xfd,0x1c
1759 .byte   0x3d,0x93,0x93,0xae,    0x4c,0x26,0x26,0x6a
1760 .byte   0x6c,0x36,0x36,0x5a,    0x7e,0x3f,0x3f,0x41
1761 .byte   0xf5,0xf7,0xf7,0x02,    0x83,0xcc,0xcc,0x4f
1762 .byte   0x68,0x34,0x34,0x5c,    0x51,0xa5,0xa5,0xf4
1763 .byte   0xd1,0xe5,0xe5,0x34,    0xf9,0xf1,0xf1,0x08
1764 .byte   0xe2,0x71,0x71,0x93,    0xab,0xd8,0xd8,0x73
1765 .byte   0x62,0x31,0x31,0x53,    0x2a,0x15,0x15,0x3f
1766 .byte   0x08,0x04,0x04,0x0c,    0x95,0xc7,0xc7,0x52
1767 .byte   0x46,0x23,0x23,0x65,    0x9d,0xc3,0xc3,0x5e
1768 .byte   0x30,0x18,0x18,0x28,    0x37,0x96,0x96,0xa1
1769 .byte   0x0a,0x05,0x05,0x0f,    0x2f,0x9a,0x9a,0xb5
1770 .byte   0x0e,0x07,0x07,0x09,    0x24,0x12,0x12,0x36
1771 .byte   0x1b,0x80,0x80,0x9b,    0xdf,0xe2,0xe2,0x3d
1772 .byte   0xcd,0xeb,0xeb,0x26,    0x4e,0x27,0x27,0x69
1773 .byte   0x7f,0xb2,0xb2,0xcd,    0xea,0x75,0x75,0x9f
1774 .byte   0x12,0x09,0x09,0x1b,    0x1d,0x83,0x83,0x9e
1775 .byte   0x58,0x2c,0x2c,0x74,    0x34,0x1a,0x1a,0x2e
1776 .byte   0x36,0x1b,0x1b,0x2d,    0xdc,0x6e,0x6e,0xb2
1777 .byte   0xb4,0x5a,0x5a,0xee,    0x5b,0xa0,0xa0,0xfb
1778 .byte   0xa4,0x52,0x52,0xf6,    0x76,0x3b,0x3b,0x4d
1779 .byte   0xb7,0xd6,0xd6,0x61,    0x7d,0xb3,0xb3,0xce
1780 .byte   0x52,0x29,0x29,0x7b,    0xdd,0xe3,0xe3,0x3e
1781 .byte   0x5e,0x2f,0x2f,0x71,    0x13,0x84,0x84,0x97
1782 .byte   0xa6,0x53,0x53,0xf5,    0xb9,0xd1,0xd1,0x68
1783 .byte   0x00,0x00,0x00,0x00,    0xc1,0xed,0xed,0x2c
1784 .byte   0x40,0x20,0x20,0x60,    0xe3,0xfc,0xfc,0x1f
1785 .byte   0x79,0xb1,0xb1,0xc8,    0xb6,0x5b,0x5b,0xed
1786 .byte   0xd4,0x6a,0x6a,0xbe,    0x8d,0xcb,0xcb,0x46
1787 .byte   0x67,0xbe,0xbe,0xd9,    0x72,0x39,0x39,0x4b
1788 .byte   0x94,0x4a,0x4a,0xde,    0x98,0x4c,0x4c,0xd4
1789 .byte   0xb0,0x58,0x58,0xe8,    0x85,0xcf,0xcf,0x4a
1790 .byte   0xbb,0xd0,0xd0,0x6b,    0xc5,0xef,0xef,0x2a
1791 .byte   0x4f,0xaa,0xaa,0xe5,    0xed,0xfb,0xfb,0x16
1792 .byte   0x86,0x43,0x43,0xc5,    0x9a,0x4d,0x4d,0xd7
1793 .byte   0x66,0x33,0x33,0x55,    0x11,0x85,0x85,0x94
1794 .byte   0x8a,0x45,0x45,0xcf,    0xe9,0xf9,0xf9,0x10
1795 .byte   0x04,0x02,0x02,0x06,    0xfe,0x7f,0x7f,0x81
1796 .byte   0xa0,0x50,0x50,0xf0,    0x78,0x3c,0x3c,0x44
1797 .byte   0x25,0x9f,0x9f,0xba,    0x4b,0xa8,0xa8,0xe3
1798 .byte   0xa2,0x51,0x51,0xf3,    0x5d,0xa3,0xa3,0xfe
1799 .byte   0x80,0x40,0x40,0xc0,    0x05,0x8f,0x8f,0x8a
1800 .byte   0x3f,0x92,0x92,0xad,    0x21,0x9d,0x9d,0xbc
1801 .byte   0x70,0x38,0x38,0x48,    0xf1,0xf5,0xf5,0x04
1802 .byte   0x63,0xbc,0xbc,0xdf,    0x77,0xb6,0xb6,0xc1
1803 .byte   0xaf,0xda,0xda,0x75,    0x42,0x21,0x21,0x63
1804 .byte   0x20,0x10,0x10,0x30,    0xe5,0xff,0xff,0x1a
1805 .byte   0xfd,0xf3,0xf3,0x0e,    0xbf,0xd2,0xd2,0x6d
1806 .byte   0x81,0xcd,0xcd,0x4c,    0x18,0x0c,0x0c,0x14
1807 .byte   0x26,0x13,0x13,0x35,    0xc3,0xec,0xec,0x2f
1808 .byte   0xbe,0x5f,0x5f,0xe1,    0x35,0x97,0x97,0xa2
1809 .byte   0x88,0x44,0x44,0xcc,    0x2e,0x17,0x17,0x39
1810 .byte   0x93,0xc4,0xc4,0x57,    0x55,0xa7,0xa7,0xf2
1811 .byte   0xfc,0x7e,0x7e,0x82,    0x7a,0x3d,0x3d,0x47
1812 .byte   0xc8,0x64,0x64,0xac,    0xba,0x5d,0x5d,0xe7
1813 .byte   0x32,0x19,0x19,0x2b,    0xe6,0x73,0x73,0x95
1814 .byte   0xc0,0x60,0x60,0xa0,    0x19,0x81,0x81,0x98
1815 .byte   0x9e,0x4f,0x4f,0xd1,    0xa3,0xdc,0xdc,0x7f
1816 .byte   0x44,0x22,0x22,0x66,    0x54,0x2a,0x2a,0x7e
1817 .byte   0x3b,0x90,0x90,0xab,    0x0b,0x88,0x88,0x83
1818 .byte   0x8c,0x46,0x46,0xca,    0xc7,0xee,0xee,0x29
1819 .byte   0x6b,0xb8,0xb8,0xd3,    0x28,0x14,0x14,0x3c
1820 .byte   0xa7,0xde,0xde,0x79,    0xbc,0x5e,0x5e,0xe2
1821 .byte   0x16,0x0b,0x0b,0x1d,    0xad,0xdb,0xdb,0x76
1822 .byte   0xdb,0xe0,0xe0,0x3b,    0x64,0x32,0x32,0x56
1823 .byte   0x74,0x3a,0x3a,0x4e,    0x14,0x0a,0x0a,0x1e
1824 .byte   0x92,0x49,0x49,0xdb,    0x0c,0x06,0x06,0x0a
1825 .byte   0x48,0x24,0x24,0x6c,    0xb8,0x5c,0x5c,0xe4
1826 .byte   0x9f,0xc2,0xc2,0x5d,    0xbd,0xd3,0xd3,0x6e
1827 .byte   0x43,0xac,0xac,0xef,    0xc4,0x62,0x62,0xa6
1828 .byte   0x39,0x91,0x91,0xa8,    0x31,0x95,0x95,0xa4
1829 .byte   0xd3,0xe4,0xe4,0x37,    0xf2,0x79,0x79,0x8b
1830 .byte   0xd5,0xe7,0xe7,0x32,    0x8b,0xc8,0xc8,0x43
1831 .byte   0x6e,0x37,0x37,0x59,    0xda,0x6d,0x6d,0xb7
1832 .byte   0x01,0x8d,0x8d,0x8c,    0xb1,0xd5,0xd5,0x64
1833 .byte   0x9c,0x4e,0x4e,0xd2,    0x49,0xa9,0xa9,0xe0
1834 .byte   0xd8,0x6c,0x6c,0xb4,    0xac,0x56,0x56,0xfa
1835 .byte   0xf3,0xf4,0xf4,0x07,    0xcf,0xea,0xea,0x25
1836 .byte   0xca,0x65,0x65,0xaf,    0xf4,0x7a,0x7a,0x8e
1837 .byte   0x47,0xae,0xae,0xe9,    0x10,0x08,0x08,0x18
1838 .byte   0x6f,0xba,0xba,0xd5,    0xf0,0x78,0x78,0x88
1839 .byte   0x4a,0x25,0x25,0x6f,    0x5c,0x2e,0x2e,0x72
1840 .byte   0x38,0x1c,0x1c,0x24,    0x57,0xa6,0xa6,0xf1
1841 .byte   0x73,0xb4,0xb4,0xc7,    0x97,0xc6,0xc6,0x51
1842 .byte   0xcb,0xe8,0xe8,0x23,    0xa1,0xdd,0xdd,0x7c
1843 .byte   0xe8,0x74,0x74,0x9c,    0x3e,0x1f,0x1f,0x21
1844 .byte   0x96,0x4b,0x4b,0xdd,    0x61,0xbd,0xbd,0xdc
1845 .byte   0x0d,0x8b,0x8b,0x86,    0x0f,0x8a,0x8a,0x85
1846 .byte   0xe0,0x70,0x70,0x90,    0x7c,0x3e,0x3e,0x42
1847 .byte   0x71,0xb5,0xb5,0xc4,    0xcc,0x66,0x66,0xaa
1848 .byte   0x90,0x48,0x48,0xd8,    0x06,0x03,0x03,0x05
1849 .byte   0xf7,0xf6,0xf6,0x01,    0x1c,0x0e,0x0e,0x12
1850 .byte   0xc2,0x61,0x61,0xa3,    0x6a,0x35,0x35,0x5f
1851 .byte   0xae,0x57,0x57,0xf9,    0x69,0xb9,0xb9,0xd0
1852 .byte   0x17,0x86,0x86,0x91,    0x99,0xc1,0xc1,0x58
1853 .byte   0x3a,0x1d,0x1d,0x27,    0x27,0x9e,0x9e,0xb9
1854 .byte   0xd9,0xe1,0xe1,0x38,    0xeb,0xf8,0xf8,0x13
1855 .byte   0x2b,0x98,0x98,0xb3,    0x22,0x11,0x11,0x33
1856 .byte   0xd2,0x69,0x69,0xbb,    0xa9,0xd9,0xd9,0x70
1857 .byte   0x07,0x8e,0x8e,0x89,    0x33,0x94,0x94,0xa7
1858 .byte   0x2d,0x9b,0x9b,0xb6,    0x3c,0x1e,0x1e,0x22
1859 .byte   0x15,0x87,0x87,0x92,    0xc9,0xe9,0xe9,0x20
1860 .byte   0x87,0xce,0xce,0x49,    0xaa,0x55,0x55,0xff
1861 .byte   0x50,0x28,0x28,0x78,    0xa5,0xdf,0xdf,0x7a
1862 .byte   0x03,0x8c,0x8c,0x8f,    0x59,0xa1,0xa1,0xf8
1863 .byte   0x09,0x89,0x89,0x80,    0x1a,0x0d,0x0d,0x17
1864 .byte   0x65,0xbf,0xbf,0xda,    0xd7,0xe6,0xe6,0x31
1865 .byte   0x84,0x42,0x42,0xc6,    0xd0,0x68,0x68,0xb8
1866 .byte   0x82,0x41,0x41,0xc3,    0x29,0x99,0x99,0xb0
1867 .byte   0x5a,0x2d,0x2d,0x77,    0x1e,0x0f,0x0f,0x11
1868 .byte   0x7b,0xb0,0xb0,0xcb,    0xa8,0x54,0x54,0xfc
1869 .byte   0x6d,0xbb,0xbb,0xd6,    0x2c,0x16,0x16,0x3a
1870
1871 AES_Td:
1872 .byte   0x51,0xf4,0xa7,0x50,    0x7e,0x41,0x65,0x53     # Td0
1873 .byte   0x1a,0x17,0xa4,0xc3,    0x3a,0x27,0x5e,0x96
1874 .byte   0x3b,0xab,0x6b,0xcb,    0x1f,0x9d,0x45,0xf1
1875 .byte   0xac,0xfa,0x58,0xab,    0x4b,0xe3,0x03,0x93
1876 .byte   0x20,0x30,0xfa,0x55,    0xad,0x76,0x6d,0xf6
1877 .byte   0x88,0xcc,0x76,0x91,    0xf5,0x02,0x4c,0x25
1878 .byte   0x4f,0xe5,0xd7,0xfc,    0xc5,0x2a,0xcb,0xd7
1879 .byte   0x26,0x35,0x44,0x80,    0xb5,0x62,0xa3,0x8f
1880 .byte   0xde,0xb1,0x5a,0x49,    0x25,0xba,0x1b,0x67
1881 .byte   0x45,0xea,0x0e,0x98,    0x5d,0xfe,0xc0,0xe1
1882 .byte   0xc3,0x2f,0x75,0x02,    0x81,0x4c,0xf0,0x12
1883 .byte   0x8d,0x46,0x97,0xa3,    0x6b,0xd3,0xf9,0xc6
1884 .byte   0x03,0x8f,0x5f,0xe7,    0x15,0x92,0x9c,0x95
1885 .byte   0xbf,0x6d,0x7a,0xeb,    0x95,0x52,0x59,0xda
1886 .byte   0xd4,0xbe,0x83,0x2d,    0x58,0x74,0x21,0xd3
1887 .byte   0x49,0xe0,0x69,0x29,    0x8e,0xc9,0xc8,0x44
1888 .byte   0x75,0xc2,0x89,0x6a,    0xf4,0x8e,0x79,0x78
1889 .byte   0x99,0x58,0x3e,0x6b,    0x27,0xb9,0x71,0xdd
1890 .byte   0xbe,0xe1,0x4f,0xb6,    0xf0,0x88,0xad,0x17
1891 .byte   0xc9,0x20,0xac,0x66,    0x7d,0xce,0x3a,0xb4
1892 .byte   0x63,0xdf,0x4a,0x18,    0xe5,0x1a,0x31,0x82
1893 .byte   0x97,0x51,0x33,0x60,    0x62,0x53,0x7f,0x45
1894 .byte   0xb1,0x64,0x77,0xe0,    0xbb,0x6b,0xae,0x84
1895 .byte   0xfe,0x81,0xa0,0x1c,    0xf9,0x08,0x2b,0x94
1896 .byte   0x70,0x48,0x68,0x58,    0x8f,0x45,0xfd,0x19
1897 .byte   0x94,0xde,0x6c,0x87,    0x52,0x7b,0xf8,0xb7
1898 .byte   0xab,0x73,0xd3,0x23,    0x72,0x4b,0x02,0xe2
1899 .byte   0xe3,0x1f,0x8f,0x57,    0x66,0x55,0xab,0x2a
1900 .byte   0xb2,0xeb,0x28,0x07,    0x2f,0xb5,0xc2,0x03
1901 .byte   0x86,0xc5,0x7b,0x9a,    0xd3,0x37,0x08,0xa5
1902 .byte   0x30,0x28,0x87,0xf2,    0x23,0xbf,0xa5,0xb2
1903 .byte   0x02,0x03,0x6a,0xba,    0xed,0x16,0x82,0x5c
1904 .byte   0x8a,0xcf,0x1c,0x2b,    0xa7,0x79,0xb4,0x92
1905 .byte   0xf3,0x07,0xf2,0xf0,    0x4e,0x69,0xe2,0xa1
1906 .byte   0x65,0xda,0xf4,0xcd,    0x06,0x05,0xbe,0xd5
1907 .byte   0xd1,0x34,0x62,0x1f,    0xc4,0xa6,0xfe,0x8a
1908 .byte   0x34,0x2e,0x53,0x9d,    0xa2,0xf3,0x55,0xa0
1909 .byte   0x05,0x8a,0xe1,0x32,    0xa4,0xf6,0xeb,0x75
1910 .byte   0x0b,0x83,0xec,0x39,    0x40,0x60,0xef,0xaa
1911 .byte   0x5e,0x71,0x9f,0x06,    0xbd,0x6e,0x10,0x51
1912 .byte   0x3e,0x21,0x8a,0xf9,    0x96,0xdd,0x06,0x3d
1913 .byte   0xdd,0x3e,0x05,0xae,    0x4d,0xe6,0xbd,0x46
1914 .byte   0x91,0x54,0x8d,0xb5,    0x71,0xc4,0x5d,0x05
1915 .byte   0x04,0x06,0xd4,0x6f,    0x60,0x50,0x15,0xff
1916 .byte   0x19,0x98,0xfb,0x24,    0xd6,0xbd,0xe9,0x97
1917 .byte   0x89,0x40,0x43,0xcc,    0x67,0xd9,0x9e,0x77
1918 .byte   0xb0,0xe8,0x42,0xbd,    0x07,0x89,0x8b,0x88
1919 .byte   0xe7,0x19,0x5b,0x38,    0x79,0xc8,0xee,0xdb
1920 .byte   0xa1,0x7c,0x0a,0x47,    0x7c,0x42,0x0f,0xe9
1921 .byte   0xf8,0x84,0x1e,0xc9,    0x00,0x00,0x00,0x00
1922 .byte   0x09,0x80,0x86,0x83,    0x32,0x2b,0xed,0x48
1923 .byte   0x1e,0x11,0x70,0xac,    0x6c,0x5a,0x72,0x4e
1924 .byte   0xfd,0x0e,0xff,0xfb,    0x0f,0x85,0x38,0x56
1925 .byte   0x3d,0xae,0xd5,0x1e,    0x36,0x2d,0x39,0x27
1926 .byte   0x0a,0x0f,0xd9,0x64,    0x68,0x5c,0xa6,0x21
1927 .byte   0x9b,0x5b,0x54,0xd1,    0x24,0x36,0x2e,0x3a
1928 .byte   0x0c,0x0a,0x67,0xb1,    0x93,0x57,0xe7,0x0f
1929 .byte   0xb4,0xee,0x96,0xd2,    0x1b,0x9b,0x91,0x9e
1930 .byte   0x80,0xc0,0xc5,0x4f,    0x61,0xdc,0x20,0xa2
1931 .byte   0x5a,0x77,0x4b,0x69,    0x1c,0x12,0x1a,0x16
1932 .byte   0xe2,0x93,0xba,0x0a,    0xc0,0xa0,0x2a,0xe5
1933 .byte   0x3c,0x22,0xe0,0x43,    0x12,0x1b,0x17,0x1d
1934 .byte   0x0e,0x09,0x0d,0x0b,    0xf2,0x8b,0xc7,0xad
1935 .byte   0x2d,0xb6,0xa8,0xb9,    0x14,0x1e,0xa9,0xc8
1936 .byte   0x57,0xf1,0x19,0x85,    0xaf,0x75,0x07,0x4c
1937 .byte   0xee,0x99,0xdd,0xbb,    0xa3,0x7f,0x60,0xfd
1938 .byte   0xf7,0x01,0x26,0x9f,    0x5c,0x72,0xf5,0xbc
1939 .byte   0x44,0x66,0x3b,0xc5,    0x5b,0xfb,0x7e,0x34
1940 .byte   0x8b,0x43,0x29,0x76,    0xcb,0x23,0xc6,0xdc
1941 .byte   0xb6,0xed,0xfc,0x68,    0xb8,0xe4,0xf1,0x63
1942 .byte   0xd7,0x31,0xdc,0xca,    0x42,0x63,0x85,0x10
1943 .byte   0x13,0x97,0x22,0x40,    0x84,0xc6,0x11,0x20
1944 .byte   0x85,0x4a,0x24,0x7d,    0xd2,0xbb,0x3d,0xf8
1945 .byte   0xae,0xf9,0x32,0x11,    0xc7,0x29,0xa1,0x6d
1946 .byte   0x1d,0x9e,0x2f,0x4b,    0xdc,0xb2,0x30,0xf3
1947 .byte   0x0d,0x86,0x52,0xec,    0x77,0xc1,0xe3,0xd0
1948 .byte   0x2b,0xb3,0x16,0x6c,    0xa9,0x70,0xb9,0x99
1949 .byte   0x11,0x94,0x48,0xfa,    0x47,0xe9,0x64,0x22
1950 .byte   0xa8,0xfc,0x8c,0xc4,    0xa0,0xf0,0x3f,0x1a
1951 .byte   0x56,0x7d,0x2c,0xd8,    0x22,0x33,0x90,0xef
1952 .byte   0x87,0x49,0x4e,0xc7,    0xd9,0x38,0xd1,0xc1
1953 .byte   0x8c,0xca,0xa2,0xfe,    0x98,0xd4,0x0b,0x36
1954 .byte   0xa6,0xf5,0x81,0xcf,    0xa5,0x7a,0xde,0x28
1955 .byte   0xda,0xb7,0x8e,0x26,    0x3f,0xad,0xbf,0xa4
1956 .byte   0x2c,0x3a,0x9d,0xe4,    0x50,0x78,0x92,0x0d
1957 .byte   0x6a,0x5f,0xcc,0x9b,    0x54,0x7e,0x46,0x62
1958 .byte   0xf6,0x8d,0x13,0xc2,    0x90,0xd8,0xb8,0xe8
1959 .byte   0x2e,0x39,0xf7,0x5e,    0x82,0xc3,0xaf,0xf5
1960 .byte   0x9f,0x5d,0x80,0xbe,    0x69,0xd0,0x93,0x7c
1961 .byte   0x6f,0xd5,0x2d,0xa9,    0xcf,0x25,0x12,0xb3
1962 .byte   0xc8,0xac,0x99,0x3b,    0x10,0x18,0x7d,0xa7
1963 .byte   0xe8,0x9c,0x63,0x6e,    0xdb,0x3b,0xbb,0x7b
1964 .byte   0xcd,0x26,0x78,0x09,    0x6e,0x59,0x18,0xf4
1965 .byte   0xec,0x9a,0xb7,0x01,    0x83,0x4f,0x9a,0xa8
1966 .byte   0xe6,0x95,0x6e,0x65,    0xaa,0xff,0xe6,0x7e
1967 .byte   0x21,0xbc,0xcf,0x08,    0xef,0x15,0xe8,0xe6
1968 .byte   0xba,0xe7,0x9b,0xd9,    0x4a,0x6f,0x36,0xce
1969 .byte   0xea,0x9f,0x09,0xd4,    0x29,0xb0,0x7c,0xd6
1970 .byte   0x31,0xa4,0xb2,0xaf,    0x2a,0x3f,0x23,0x31
1971 .byte   0xc6,0xa5,0x94,0x30,    0x35,0xa2,0x66,0xc0
1972 .byte   0x74,0x4e,0xbc,0x37,    0xfc,0x82,0xca,0xa6
1973 .byte   0xe0,0x90,0xd0,0xb0,    0x33,0xa7,0xd8,0x15
1974 .byte   0xf1,0x04,0x98,0x4a,    0x41,0xec,0xda,0xf7
1975 .byte   0x7f,0xcd,0x50,0x0e,    0x17,0x91,0xf6,0x2f
1976 .byte   0x76,0x4d,0xd6,0x8d,    0x43,0xef,0xb0,0x4d
1977 .byte   0xcc,0xaa,0x4d,0x54,    0xe4,0x96,0x04,0xdf
1978 .byte   0x9e,0xd1,0xb5,0xe3,    0x4c,0x6a,0x88,0x1b
1979 .byte   0xc1,0x2c,0x1f,0xb8,    0x46,0x65,0x51,0x7f
1980 .byte   0x9d,0x5e,0xea,0x04,    0x01,0x8c,0x35,0x5d
1981 .byte   0xfa,0x87,0x74,0x73,    0xfb,0x0b,0x41,0x2e
1982 .byte   0xb3,0x67,0x1d,0x5a,    0x92,0xdb,0xd2,0x52
1983 .byte   0xe9,0x10,0x56,0x33,    0x6d,0xd6,0x47,0x13
1984 .byte   0x9a,0xd7,0x61,0x8c,    0x37,0xa1,0x0c,0x7a
1985 .byte   0x59,0xf8,0x14,0x8e,    0xeb,0x13,0x3c,0x89
1986 .byte   0xce,0xa9,0x27,0xee,    0xb7,0x61,0xc9,0x35
1987 .byte   0xe1,0x1c,0xe5,0xed,    0x7a,0x47,0xb1,0x3c
1988 .byte   0x9c,0xd2,0xdf,0x59,    0x55,0xf2,0x73,0x3f
1989 .byte   0x18,0x14,0xce,0x79,    0x73,0xc7,0x37,0xbf
1990 .byte   0x53,0xf7,0xcd,0xea,    0x5f,0xfd,0xaa,0x5b
1991 .byte   0xdf,0x3d,0x6f,0x14,    0x78,0x44,0xdb,0x86
1992 .byte   0xca,0xaf,0xf3,0x81,    0xb9,0x68,0xc4,0x3e
1993 .byte   0x38,0x24,0x34,0x2c,    0xc2,0xa3,0x40,0x5f
1994 .byte   0x16,0x1d,0xc3,0x72,    0xbc,0xe2,0x25,0x0c
1995 .byte   0x28,0x3c,0x49,0x8b,    0xff,0x0d,0x95,0x41
1996 .byte   0x39,0xa8,0x01,0x71,    0x08,0x0c,0xb3,0xde
1997 .byte   0xd8,0xb4,0xe4,0x9c,    0x64,0x56,0xc1,0x90
1998 .byte   0x7b,0xcb,0x84,0x61,    0xd5,0x32,0xb6,0x70
1999 .byte   0x48,0x6c,0x5c,0x74,    0xd0,0xb8,0x57,0x42
2000
2001 .byte   0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38  # Td4
2002 .byte   0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
2003 .byte   0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
2004 .byte   0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
2005 .byte   0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
2006 .byte   0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
2007 .byte   0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
2008 .byte   0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
2009 .byte   0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
2010 .byte   0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
2011 .byte   0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
2012 .byte   0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
2013 .byte   0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
2014 .byte   0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
2015 .byte   0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
2016 .byte   0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
2017 .byte   0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
2018 .byte   0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
2019 .byte   0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
2020 .byte   0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
2021 .byte   0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
2022 .byte   0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
2023 .byte   0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
2024 .byte   0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
2025 .byte   0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
2026 .byte   0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
2027 .byte   0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
2028 .byte   0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
2029 .byte   0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
2030 .byte   0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
2031 .byte   0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
2032 .byte   0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
2033
2034 AES_Te4:
2035 .byte   0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5  # Te4
2036 .byte   0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
2037 .byte   0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
2038 .byte   0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
2039 .byte   0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
2040 .byte   0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
2041 .byte   0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
2042 .byte   0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
2043 .byte   0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
2044 .byte   0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
2045 .byte   0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
2046 .byte   0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
2047 .byte   0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
2048 .byte   0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
2049 .byte   0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
2050 .byte   0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
2051 .byte   0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
2052 .byte   0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
2053 .byte   0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
2054 .byte   0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
2055 .byte   0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
2056 .byte   0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
2057 .byte   0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
2058 .byte   0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
2059 .byte   0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
2060 .byte   0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
2061 .byte   0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
2062 .byte   0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
2063 .byte   0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
2064 .byte   0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
2065 .byte   0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
2066 .byte   0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
2067
2068 .byte   0x01,0x00,0x00,0x00,    0x02,0x00,0x00,0x00     # rcon
2069 .byte   0x04,0x00,0x00,0x00,    0x08,0x00,0x00,0x00
2070 .byte   0x10,0x00,0x00,0x00,    0x20,0x00,0x00,0x00
2071 .byte   0x40,0x00,0x00,0x00,    0x80,0x00,0x00,0x00
2072 .byte   0x1B,0x00,0x00,0x00,    0x36,0x00,0x00,0x00
2073 ___
2074 \f
2075 foreach (split("\n",$code)) {
2076         s/\`([^\`]*)\`/eval $1/ge;
2077
2078         # made-up _instructions, _xtr, _ins, _ror and _bias, cope
2079         # with byte order dependencies...
2080         if (/^\s+_/) {
2081             s/(_[a-z]+\s+)(\$[0-9]+),([^,]+)(#.*)*$/$1$2,$2,$3/;
2082
2083             s/_xtr\s+(\$[0-9]+),(\$[0-9]+),([0-9]+(\-2)*)/
2084                 sprintf("srl\t$1,$2,%d",$big_endian ?   eval($3)
2085                                         :               eval("24-$3"))/e or
2086             s/_ins\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
2087                 sprintf("sll\t$1,$2,%d",$big_endian ?   eval($3)
2088                                         :               eval("24-$3"))/e or
2089             s/_ins2\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
2090                 sprintf("ins\t$1,$2,%d,8",$big_endian ? eval($3)
2091                                         :               eval("24-$3"))/e or
2092             s/_ror\s+(\$[0-9]+),(\$[0-9]+),(\-?[0-9]+)/
2093                 sprintf("srl\t$1,$2,%d",$big_endian ?   eval($3)
2094                                         :               eval("$3*-1"))/e or
2095             s/_bias\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
2096                 sprintf("sll\t$1,$2,%d",$big_endian ?   eval($3)
2097                                         :               eval("($3-16)&31"))/e;
2098
2099             s/srl\s+(\$[0-9]+),(\$[0-9]+),\-([0-9]+)/
2100                 sprintf("sll\t$1,$2,$3")/e                              or
2101             s/srl\s+(\$[0-9]+),(\$[0-9]+),0/
2102                 sprintf("and\t$1,$2,0xff")/e                            or
2103             s/(sll\s+\$[0-9]+,\$[0-9]+,0)/#$1/;
2104         }
2105
2106         # convert lwl/lwr and swr/swl to little-endian order
2107         if (!$big_endian && /^\s+[sl]w[lr]\s+/) {
2108             s/([sl]wl.*)([0-9]+)\((\$[0-9]+)\)/
2109                 sprintf("$1%d($3)",eval("$2-$2%4+($2%4-1)&3"))/e        or
2110             s/([sl]wr.*)([0-9]+)\((\$[0-9]+)\)/
2111                 sprintf("$1%d($3)",eval("$2-$2%4+($2%4+1)&3"))/e;
2112         }
2113
2114         if (!$big_endian) {
2115             s/(rotr\s+\$[0-9]+,\$[0-9]+),([0-9]+)/sprintf("$1,%d",32-$2)/e;
2116             s/(ext\s+\$[0-9]+,\$[0-9]+),([0-9]+),8/sprintf("$1,%d,8",24-$2)/e;
2117         }
2118
2119         print $_,"\n";
2120 }
2121
2122 close STDOUT;