aesni-x86[_64].pl: optimize for Sandy Bridge and add XTS mode.
[openssl.git] / crypto / aes / asm / aes-mips.pl
1 #!/usr/bin/env perl
2
3 # ====================================================================
4 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9
10 # AES for MIPS
11
12 # October 2010
13 #
14 # Code uses 1K[+256B] S-box and on single-issue core [such as R5000]
15 # spends ~68 cycles per byte processed with 128-bit key. This is ~16%
16 # faster than gcc-generated code, which is not very impressive. But
17 # recall that compressed S-box requires extra processing, namely
18 # additional rotations. Rotations are implemented with lwl/lwr pairs,
19 # which is normally used for loading unaligned data. Another cool
20 # thing about this module is its endian neutrality, which means that
21 # it processes data without ever changing byte order...
22
23 ######################################################################
24 # There is a number of MIPS ABI in use, O32 and N32/64 are most
25 # widely used. Then there is a new contender: NUBI. It appears that if
26 # one picks the latter, it's possible to arrange code in ABI neutral
27 # manner. Therefore let's stick to NUBI register layout:
28 #
29 ($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
30 ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
31 ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
32 ($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
33 #
34 # The return value is placed in $a0. Following coding rules facilitate
35 # interoperability:
36 #
37 # - never ever touch $tp, "thread pointer", former $gp;
38 # - copy return value to $t0, former $v0 [or to $a0 if you're adapting
39 #   old code];
40 # - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
41 #
42 # For reference here is register layout for N32/64 MIPS ABIs:
43 #
44 # ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
45 # ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
46 # ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
47 # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
48 # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
49 #
50 $flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64
51
52 if ($flavour =~ /64|n32/i) {
53         $PTR_ADD="dadd";        # incidentally works even on n32
54         $PTR_SUB="dsub";        # incidentally works even on n32
55         $REG_S="sd";
56         $REG_L="ld";
57         $PTR_SLL="dsll";        # incidentally works even on n32
58         $SZREG=8;
59 } else {
60         $PTR_ADD="add";
61         $PTR_SUB="sub";
62         $REG_S="sw";
63         $REG_L="lw";
64         $PTR_SLL="sll";
65         $SZREG=4;
66 }
67 $pf = ($flavour =~ /nubi/i) ? $t0 : $t2;
68 #
69 # <appro@openssl.org>
70 #
71 ######################################################################
72
73 for (@ARGV) {   $big_endian=1 if (/\-DB_ENDIAN/);
74                 $big_endian=0 if (/\-DL_ENDIAN/);
75                 $output=$_ if (/^\w[\w\-]*\.\w+$/);     }
76 open STDOUT,">$output";
77
78 if (!defined($big_endian))
79 {    $big_endian=(unpack('L',pack('N',1))==1);   }
80
81 while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
82 open STDOUT,">$output";
83
84 my ($MSB,$LSB)=(0,3);   # automatically converted to little-endian
85
86 $code.=<<___;
87 .text
88
89 .option pic2
90 .set    noat
91 ___
92 \f
93 {{{
94 my $FRAMESIZE=16*$SZREG;
95 my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000;
96
97 my ($inp,$out,$key,$Tbl,$s0,$s1,$s2,$s3)=($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7);
98 my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
99 my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$t9,$t10,$t11) = map("\$$_",(12..23));
100 my ($key0,$cnt)=($gp,$fp);
101
102 # instuction ordering is "stolen" from output from MIPSpro assembler
103 # invoked with -mips3 -O3 arguments...
104 $code.=<<___;
105 .align  5
106 .ent    _mips_AES_encrypt
107 _mips_AES_encrypt:
108         .frame  $sp,0,$ra
109         .set    reorder
110         lw      $t0,0($key)
111         lw      $t1,4($key)
112         lw      $t2,8($key)
113         lw      $t3,12($key)
114         lw      $cnt,240($key)
115         $PTR_ADD $key0,$key,16
116
117         xor     $s0,$t0
118         xor     $s1,$t1
119         xor     $s2,$t2
120         xor     $s3,$t3
121
122         sub     $cnt,1
123         _xtr    $i0,$s1,16-2
124 .Loop_enc:
125         _xtr    $i1,$s2,16-2
126         _xtr    $i2,$s3,16-2
127         _xtr    $i3,$s0,16-2
128         and     $i0,0x3fc
129         and     $i1,0x3fc
130         and     $i2,0x3fc
131         and     $i3,0x3fc
132         $PTR_ADD $i0,$Tbl
133         $PTR_ADD $i1,$Tbl
134         $PTR_ADD $i2,$Tbl
135         $PTR_ADD $i3,$Tbl
136         lwl     $t0,3($i0)              # Te1[s1>>16]
137         lwl     $t1,3($i1)              # Te1[s2>>16]
138         lwl     $t2,3($i2)              # Te1[s3>>16]
139         lwl     $t3,3($i3)              # Te1[s0>>16]
140         lwr     $t0,2($i0)              # Te1[s1>>16]
141         lwr     $t1,2($i1)              # Te1[s2>>16]
142         lwr     $t2,2($i2)              # Te1[s3>>16]
143         lwr     $t3,2($i3)              # Te1[s0>>16]
144
145         _xtr    $i0,$s2,8-2
146         _xtr    $i1,$s3,8-2
147         _xtr    $i2,$s0,8-2
148         _xtr    $i3,$s1,8-2
149         and     $i0,0x3fc
150         and     $i1,0x3fc
151         and     $i2,0x3fc
152         and     $i3,0x3fc
153         $PTR_ADD $i0,$Tbl
154         $PTR_ADD $i1,$Tbl
155         $PTR_ADD $i2,$Tbl
156         $PTR_ADD $i3,$Tbl
157         lwl     $t4,2($i0)              # Te2[s2>>8]
158         lwl     $t5,2($i1)              # Te2[s3>>8]
159         lwl     $t6,2($i2)              # Te2[s0>>8]
160         lwl     $t7,2($i3)              # Te2[s1>>8]
161         lwr     $t4,1($i0)              # Te2[s2>>8]
162         lwr     $t5,1($i1)              # Te2[s3>>8]
163         lwr     $t6,1($i2)              # Te2[s0>>8]
164         lwr     $t7,1($i3)              # Te2[s1>>8]
165
166         _xtr    $i0,$s3,0-2
167         _xtr    $i1,$s0,0-2
168         _xtr    $i2,$s1,0-2
169         _xtr    $i3,$s2,0-2
170         and     $i0,0x3fc
171         and     $i1,0x3fc
172         and     $i2,0x3fc
173         and     $i3,0x3fc
174         $PTR_ADD $i0,$Tbl
175         $PTR_ADD $i1,$Tbl
176         $PTR_ADD $i2,$Tbl
177         $PTR_ADD $i3,$Tbl
178         lwl     $t8,1($i0)              # Te3[s3]
179         lwl     $t9,1($i1)              # Te3[s0]
180         lwl     $t10,1($i2)             # Te3[s1]
181         lwl     $t11,1($i3)             # Te3[s2]
182         lwr     $t8,0($i0)              # Te3[s3]
183         lwr     $t9,0($i1)              # Te3[s0]
184         lwr     $t10,0($i2)             # Te3[s1]
185         lwr     $t11,0($i3)             # Te3[s2]
186
187         _xtr    $i0,$s0,24-2
188         _xtr    $i1,$s1,24-2
189         _xtr    $i2,$s2,24-2
190         _xtr    $i3,$s3,24-2
191         and     $i0,0x3fc
192         and     $i1,0x3fc
193         and     $i2,0x3fc
194         and     $i3,0x3fc
195         $PTR_ADD $i0,$Tbl
196         $PTR_ADD $i1,$Tbl
197         $PTR_ADD $i2,$Tbl
198         $PTR_ADD $i3,$Tbl
199         xor     $t0,$t4
200         xor     $t1,$t5
201         xor     $t2,$t6
202         xor     $t3,$t7
203         lw      $t4,0($i0)              # Te0[s0>>24]
204         lw      $t5,0($i1)              # Te0[s1>>24]
205         lw      $t6,0($i2)              # Te0[s2>>24]
206         lw      $t7,0($i3)              # Te0[s3>>24]
207
208         lw      $s0,0($key0)
209         lw      $s1,4($key0)
210         lw      $s2,8($key0)
211         lw      $s3,12($key0)
212
213         xor     $t0,$t8
214         xor     $t1,$t9
215         xor     $t2,$t10
216         xor     $t3,$t11
217
218         xor     $t0,$t4
219         xor     $t1,$t5
220         xor     $t2,$t6
221         xor     $t3,$t7
222
223         sub     $cnt,1
224         $PTR_ADD $key0,16
225         xor     $s0,$t0
226         xor     $s1,$t1
227         xor     $s2,$t2
228         xor     $s3,$t3
229         .set    noreorder
230         bnez    $cnt,.Loop_enc
231         _xtr    $i0,$s1,16-2
232
233         .set    reorder
234         _xtr    $i1,$s2,16-2
235         _xtr    $i2,$s3,16-2
236         _xtr    $i3,$s0,16-2
237         and     $i0,0x3fc
238         and     $i1,0x3fc
239         and     $i2,0x3fc
240         and     $i3,0x3fc
241         $PTR_ADD $i0,$Tbl
242         $PTR_ADD $i1,$Tbl
243         $PTR_ADD $i2,$Tbl
244         $PTR_ADD $i3,$Tbl
245         lbu     $t0,2($i0)              # Te4[s1>>16]
246         lbu     $t1,2($i1)              # Te4[s2>>16]
247         lbu     $t2,2($i2)              # Te4[s3>>16]
248         lbu     $t3,2($i3)              # Te4[s0>>16]
249
250         _xtr    $i0,$s2,8-2
251         _xtr    $i1,$s3,8-2
252         _xtr    $i2,$s0,8-2
253         _xtr    $i3,$s1,8-2
254         and     $i0,0x3fc
255         and     $i1,0x3fc
256         and     $i2,0x3fc
257         and     $i3,0x3fc
258         $PTR_ADD $i0,$Tbl
259         $PTR_ADD $i1,$Tbl
260         $PTR_ADD $i2,$Tbl
261         $PTR_ADD $i3,$Tbl
262         lbu     $t4,2($i0)              # Te4[s2>>8]
263         lbu     $t5,2($i1)              # Te4[s3>>8]
264         lbu     $t6,2($i2)              # Te4[s0>>8]
265         lbu     $t7,2($i3)              # Te4[s1>>8]
266
267         _xtr    $i0,$s0,24-2
268         _xtr    $i1,$s1,24-2
269         _xtr    $i2,$s2,24-2
270         _xtr    $i3,$s3,24-2
271         and     $i0,0x3fc
272         and     $i1,0x3fc
273         and     $i2,0x3fc
274         and     $i3,0x3fc
275         $PTR_ADD $i0,$Tbl
276         $PTR_ADD $i1,$Tbl
277         $PTR_ADD $i2,$Tbl
278         $PTR_ADD $i3,$Tbl
279         lbu     $t8,2($i0)              # Te4[s0>>24]
280         lbu     $t9,2($i1)              # Te4[s1>>24]
281         lbu     $t10,2($i2)             # Te4[s2>>24]
282         lbu     $t11,2($i3)             # Te4[s3>>24]
283
284         _xtr    $i0,$s3,0-2
285         _xtr    $i1,$s0,0-2
286         _xtr    $i2,$s1,0-2
287         _xtr    $i3,$s2,0-2
288         and     $i0,0x3fc
289         and     $i1,0x3fc
290         and     $i2,0x3fc
291         and     $i3,0x3fc
292
293         _ins    $t0,16
294         _ins    $t1,16
295         _ins    $t2,16
296         _ins    $t3,16
297
298         _ins    $t4,8
299         _ins    $t5,8
300         _ins    $t6,8
301         _ins    $t7,8
302
303         xor     $t0,$t4
304         xor     $t1,$t5
305         xor     $t2,$t6
306         xor     $t3,$t7
307
308         $PTR_ADD $i0,$Tbl
309         $PTR_ADD $i1,$Tbl
310         $PTR_ADD $i2,$Tbl
311         $PTR_ADD $i3,$Tbl
312         lbu     $t4,2($i0)              # Te4[s3]
313         lbu     $t5,2($i1)              # Te4[s0]
314         lbu     $t6,2($i2)              # Te4[s1]
315         lbu     $t7,2($i3)              # Te4[s2]
316
317         _ins    $t8,24
318         _ins    $t9,24
319         _ins    $t10,24
320         _ins    $t11,24
321
322         lw      $s0,0($key0)
323         lw      $s1,4($key0)
324         lw      $s2,8($key0)
325         lw      $s3,12($key0)
326
327         xor     $t0,$t8
328         xor     $t1,$t9
329         xor     $t2,$t10
330         xor     $t3,$t11
331
332         _ins    $t4,0
333         _ins    $t5,0
334         _ins    $t6,0
335         _ins    $t7,0
336
337         xor     $t0,$t4
338         xor     $t1,$t5
339         xor     $t2,$t6
340         xor     $t3,$t7
341
342         xor     $s0,$t0
343         xor     $s1,$t1
344         xor     $s2,$t2
345         xor     $s3,$t3
346
347         jr      $ra
348 .end    _mips_AES_encrypt
349
350 .align  5
351 .globl  AES_encrypt
352 .ent    AES_encrypt
353 AES_encrypt:
354         .frame  $sp,$FRAMESIZE,$ra
355         .mask   $SAVED_REGS_MASK,-$SZREG
356         .set    noreorder
357 ___
358 $code.=<<___ if ($flavour =~ /o32/i);   # o32 PIC-ification
359         .cpload $pf
360 ___
361 $code.=<<___;
362         $PTR_SUB $sp,$FRAMESIZE
363         $REG_S  $ra,$FRAMESIZE-1*$SZREG($sp)
364         $REG_S  $fp,$FRAMESIZE-2*$SZREG($sp)
365         $REG_S  $s11,$FRAMESIZE-3*$SZREG($sp)
366         $REG_S  $s10,$FRAMESIZE-4*$SZREG($sp)
367         $REG_S  $s9,$FRAMESIZE-5*$SZREG($sp)
368         $REG_S  $s8,$FRAMESIZE-6*$SZREG($sp)
369         $REG_S  $s7,$FRAMESIZE-7*$SZREG($sp)
370         $REG_S  $s6,$FRAMESIZE-8*$SZREG($sp)
371         $REG_S  $s5,$FRAMESIZE-9*$SZREG($sp)
372         $REG_S  $s4,$FRAMESIZE-10*$SZREG($sp)
373 ___
374 $code.=<<___ if ($flavour =~ /nubi/i);  # optimize non-nubi prologue
375         $REG_S  \$15,$FRAMESIZE-11*$SZREG($sp)
376         $REG_S  \$14,$FRAMESIZE-12*$SZREG($sp)
377         $REG_S  \$13,$FRAMESIZE-13*$SZREG($sp)
378         $REG_S  \$12,$FRAMESIZE-14*$SZREG($sp)
379         $REG_S  $gp,$FRAMESIZE-15*$SZREG($sp)
380 ___
381 $code.=<<___ if ($flavour !~ /o32/i);   # non-o32 PIC-ification
382         .cplocal        $Tbl
383         .cpsetup        $pf,$zero,AES_encrypt
384 ___
385 $code.=<<___;
386         .set    reorder
387         la      $Tbl,AES_Te             # PIC-ified 'load address'
388
389         lwl     $s0,0+$MSB($inp)
390         lwl     $s1,4+$MSB($inp)
391         lwl     $s2,8+$MSB($inp)
392         lwl     $s3,12+$MSB($inp)
393         lwr     $s0,0+$LSB($inp)
394         lwr     $s1,4+$LSB($inp)
395         lwr     $s2,8+$LSB($inp)
396         lwr     $s3,12+$LSB($inp)
397
398         bal     _mips_AES_encrypt
399
400         swr     $s0,0+$LSB($out)
401         swr     $s1,4+$LSB($out)
402         swr     $s2,8+$LSB($out)
403         swr     $s3,12+$LSB($out)
404         swl     $s0,0+$MSB($out)
405         swl     $s1,4+$MSB($out)
406         swl     $s2,8+$MSB($out)
407         swl     $s3,12+$MSB($out)
408
409         .set    noreorder
410         $REG_L  $ra,$FRAMESIZE-1*$SZREG($sp)
411         $REG_L  $fp,$FRAMESIZE-2*$SZREG($sp)
412         $REG_L  $s11,$FRAMESIZE-3*$SZREG($sp)
413         $REG_L  $s10,$FRAMESIZE-4*$SZREG($sp)
414         $REG_L  $s9,$FRAMESIZE-5*$SZREG($sp)
415         $REG_L  $s8,$FRAMESIZE-6*$SZREG($sp)
416         $REG_L  $s7,$FRAMESIZE-7*$SZREG($sp)
417         $REG_L  $s6,$FRAMESIZE-8*$SZREG($sp)
418         $REG_L  $s5,$FRAMESIZE-9*$SZREG($sp)
419         $REG_L  $s4,$FRAMESIZE-10*$SZREG($sp)
420 ___
421 $code.=<<___ if ($flavour =~ /nubi/i);
422         $REG_L  \$15,$FRAMESIZE-11*$SZREG($sp)
423         $REG_L  \$14,$FRAMESIZE-12*$SZREG($sp)
424         $REG_L  \$13,$FRAMESIZE-13*$SZREG($sp)
425         $REG_L  \$12,$FRAMESIZE-14*$SZREG($sp)
426         $REG_L  $gp,$FRAMESIZE-15*$SZREG($sp)
427 ___
428 $code.=<<___;
429         jr      $ra
430         $PTR_ADD $sp,$FRAMESIZE
431 .end    AES_encrypt
432 ___
433 \f
434 $code.=<<___;
435 .align  5
436 .ent    _mips_AES_decrypt
437 _mips_AES_decrypt:
438         .frame  $sp,0,$ra
439         .set    reorder
440         lw      $t0,0($key)
441         lw      $t1,4($key)
442         lw      $t2,8($key)
443         lw      $t3,12($key)
444         lw      $cnt,240($key)
445         $PTR_ADD $key0,$key,16
446
447         xor     $s0,$t0
448         xor     $s1,$t1
449         xor     $s2,$t2
450         xor     $s3,$t3
451
452         sub     $cnt,1
453         _xtr    $i0,$s3,16-2
454 .Loop_dec:
455         _xtr    $i1,$s0,16-2
456         _xtr    $i2,$s1,16-2
457         _xtr    $i3,$s2,16-2
458         and     $i0,0x3fc
459         and     $i1,0x3fc
460         and     $i2,0x3fc
461         and     $i3,0x3fc
462         $PTR_ADD $i0,$Tbl
463         $PTR_ADD $i1,$Tbl
464         $PTR_ADD $i2,$Tbl
465         $PTR_ADD $i3,$Tbl
466         lwl     $t0,3($i0)              # Td1[s3>>16]
467         lwl     $t1,3($i1)              # Td1[s0>>16]
468         lwl     $t2,3($i2)              # Td1[s1>>16]
469         lwl     $t3,3($i3)              # Td1[s2>>16]
470         lwr     $t0,2($i0)              # Td1[s3>>16]
471         lwr     $t1,2($i1)              # Td1[s0>>16]
472         lwr     $t2,2($i2)              # Td1[s1>>16]
473         lwr     $t3,2($i3)              # Td1[s2>>16]
474
475         _xtr    $i0,$s2,8-2
476         _xtr    $i1,$s3,8-2
477         _xtr    $i2,$s0,8-2
478         _xtr    $i3,$s1,8-2
479         and     $i0,0x3fc
480         and     $i1,0x3fc
481         and     $i2,0x3fc
482         and     $i3,0x3fc
483         $PTR_ADD $i0,$Tbl
484         $PTR_ADD $i1,$Tbl
485         $PTR_ADD $i2,$Tbl
486         $PTR_ADD $i3,$Tbl
487         lwl     $t4,2($i0)              # Td2[s2>>8]
488         lwl     $t5,2($i1)              # Td2[s3>>8]
489         lwl     $t6,2($i2)              # Td2[s0>>8]
490         lwl     $t7,2($i3)              # Td2[s1>>8]
491         lwr     $t4,1($i0)              # Td2[s2>>8]
492         lwr     $t5,1($i1)              # Td2[s3>>8]
493         lwr     $t6,1($i2)              # Td2[s0>>8]
494         lwr     $t7,1($i3)              # Td2[s1>>8]
495
496         _xtr    $i0,$s1,0-2
497         _xtr    $i1,$s2,0-2
498         _xtr    $i2,$s3,0-2
499         _xtr    $i3,$s0,0-2
500         and     $i0,0x3fc
501         and     $i1,0x3fc
502         and     $i2,0x3fc
503         and     $i3,0x3fc
504         $PTR_ADD $i0,$Tbl
505         $PTR_ADD $i1,$Tbl
506         $PTR_ADD $i2,$Tbl
507         $PTR_ADD $i3,$Tbl
508         lwl     $t8,1($i0)              # Td3[s1]
509         lwl     $t9,1($i1)              # Td3[s2]
510         lwl     $t10,1($i2)             # Td3[s3]
511         lwl     $t11,1($i3)             # Td3[s0]
512         lwr     $t8,0($i0)              # Td3[s1]
513         lwr     $t9,0($i1)              # Td3[s2]
514         lwr     $t10,0($i2)             # Td3[s3]
515         lwr     $t11,0($i3)             # Td3[s0]
516
517         _xtr    $i0,$s0,24-2
518         _xtr    $i1,$s1,24-2
519         _xtr    $i2,$s2,24-2
520         _xtr    $i3,$s3,24-2
521         and     $i0,0x3fc
522         and     $i1,0x3fc
523         and     $i2,0x3fc
524         and     $i3,0x3fc
525         $PTR_ADD $i0,$Tbl
526         $PTR_ADD $i1,$Tbl
527         $PTR_ADD $i2,$Tbl
528         $PTR_ADD $i3,$Tbl
529
530         xor     $t0,$t4
531         xor     $t1,$t5
532         xor     $t2,$t6
533         xor     $t3,$t7
534
535
536         lw      $t4,0($i0)              # Td0[s0>>24]
537         lw      $t5,0($i1)              # Td0[s1>>24]
538         lw      $t6,0($i2)              # Td0[s2>>24]
539         lw      $t7,0($i3)              # Td0[s3>>24]
540
541         lw      $s0,0($key0)
542         lw      $s1,4($key0)
543         lw      $s2,8($key0)
544         lw      $s3,12($key0)
545
546         xor     $t0,$t8
547         xor     $t1,$t9
548         xor     $t2,$t10
549         xor     $t3,$t11
550
551         xor     $t0,$t4
552         xor     $t1,$t5
553         xor     $t2,$t6
554         xor     $t3,$t7
555
556         sub     $cnt,1
557         $PTR_ADD $key0,16
558         xor     $s0,$t0
559         xor     $s1,$t1
560         xor     $s2,$t2
561         xor     $s3,$t3
562         .set    noreorder
563         bnez    $cnt,.Loop_dec
564         _xtr    $i0,$s3,16-2
565
566         .set    reorder
567         lw      $t4,1024($Tbl)          # prefetch Td4
568         lw      $t5,1024+32($Tbl)
569         lw      $t6,1024+64($Tbl)
570         lw      $t7,1024+96($Tbl)
571         lw      $t8,1024+128($Tbl)
572         lw      $t9,1024+160($Tbl)
573         lw      $t10,1024+192($Tbl)
574         lw      $t11,1024+224($Tbl)
575
576         _xtr    $i0,$s3,16
577         _xtr    $i1,$s0,16
578         _xtr    $i2,$s1,16
579         _xtr    $i3,$s2,16
580         and     $i0,0xff
581         and     $i1,0xff
582         and     $i2,0xff
583         and     $i3,0xff
584         $PTR_ADD $i0,$Tbl
585         $PTR_ADD $i1,$Tbl
586         $PTR_ADD $i2,$Tbl
587         $PTR_ADD $i3,$Tbl
588         lbu     $t0,1024($i0)           # Td4[s3>>16]
589         lbu     $t1,1024($i1)           # Td4[s0>>16]
590         lbu     $t2,1024($i2)           # Td4[s1>>16]
591         lbu     $t3,1024($i3)           # Td4[s2>>16]
592
593         _xtr    $i0,$s2,8
594         _xtr    $i1,$s3,8
595         _xtr    $i2,$s0,8
596         _xtr    $i3,$s1,8
597         and     $i0,0xff
598         and     $i1,0xff
599         and     $i2,0xff
600         and     $i3,0xff
601         $PTR_ADD $i0,$Tbl
602         $PTR_ADD $i1,$Tbl
603         $PTR_ADD $i2,$Tbl
604         $PTR_ADD $i3,$Tbl
605         lbu     $t4,1024($i0)           # Td4[s2>>8]
606         lbu     $t5,1024($i1)           # Td4[s3>>8]
607         lbu     $t6,1024($i2)           # Td4[s0>>8]
608         lbu     $t7,1024($i3)           # Td4[s1>>8]
609
610         _xtr    $i0,$s0,24
611         _xtr    $i1,$s1,24
612         _xtr    $i2,$s2,24
613         _xtr    $i3,$s3,24
614         $PTR_ADD $i0,$Tbl
615         $PTR_ADD $i1,$Tbl
616         $PTR_ADD $i2,$Tbl
617         $PTR_ADD $i3,$Tbl
618         lbu     $t8,1024($i0)           # Td4[s0>>24]
619         lbu     $t9,1024($i1)           # Td4[s1>>24]
620         lbu     $t10,1024($i2)          # Td4[s2>>24]
621         lbu     $t11,1024($i3)          # Td4[s3>>24]
622
623         _xtr    $i0,$s1,0
624         _xtr    $i1,$s2,0
625         _xtr    $i2,$s3,0
626         _xtr    $i3,$s0,0
627
628         _ins    $t0,16
629         _ins    $t1,16
630         _ins    $t2,16
631         _ins    $t3,16
632
633         _ins    $t4,8
634         _ins    $t5,8
635         _ins    $t6,8
636         _ins    $t7,8
637
638         xor     $t0,$t4
639         xor     $t1,$t5
640         xor     $t2,$t6
641         xor     $t3,$t7
642
643         $PTR_ADD $i0,$Tbl
644         $PTR_ADD $i1,$Tbl
645         $PTR_ADD $i2,$Tbl
646         $PTR_ADD $i3,$Tbl
647         lbu     $t4,1024($i0)           # Td4[s1]
648         lbu     $t5,1024($i1)           # Td4[s2]
649         lbu     $t6,1024($i2)           # Td4[s3]
650         lbu     $t7,1024($i3)           # Td4[s0]
651
652         _ins    $t8,24
653         _ins    $t9,24
654         _ins    $t10,24
655         _ins    $t11,24
656
657         lw      $s0,0($key0)
658         lw      $s1,4($key0)
659         lw      $s2,8($key0)
660         lw      $s3,12($key0)
661
662         _ins    $t4,0
663         _ins    $t5,0
664         _ins    $t6,0
665         _ins    $t7,0
666
667
668         xor     $t0,$t8
669         xor     $t1,$t9
670         xor     $t2,$t10
671         xor     $t3,$t11
672
673         xor     $t0,$t4
674         xor     $t1,$t5
675         xor     $t2,$t6
676         xor     $t3,$t7
677
678         xor     $s0,$t0
679         xor     $s1,$t1
680         xor     $s2,$t2
681         xor     $s3,$t3
682
683         jr      $ra
684 .end    _mips_AES_decrypt
685
686 .align  5
687 .globl  AES_decrypt
688 .ent    AES_decrypt
689 AES_decrypt:
690         .frame  $sp,$FRAMESIZE,$ra
691         .mask   $SAVED_REGS_MASK,-$SZREG
692         .set    noreorder
693 ___
694 $code.=<<___ if ($flavour =~ /o32/i);   # o32 PIC-ification
695         .cpload $pf
696 ___
697 $code.=<<___;
698         $PTR_SUB $sp,$FRAMESIZE
699         $REG_S  $ra,$FRAMESIZE-1*$SZREG($sp)
700         $REG_S  $fp,$FRAMESIZE-2*$SZREG($sp)
701         $REG_S  $s11,$FRAMESIZE-3*$SZREG($sp)
702         $REG_S  $s10,$FRAMESIZE-4*$SZREG($sp)
703         $REG_S  $s9,$FRAMESIZE-5*$SZREG($sp)
704         $REG_S  $s8,$FRAMESIZE-6*$SZREG($sp)
705         $REG_S  $s7,$FRAMESIZE-7*$SZREG($sp)
706         $REG_S  $s6,$FRAMESIZE-8*$SZREG($sp)
707         $REG_S  $s5,$FRAMESIZE-9*$SZREG($sp)
708         $REG_S  $s4,$FRAMESIZE-10*$SZREG($sp)
709 ___
710 $code.=<<___ if ($flavour =~ /nubi/i);  # optimize non-nubi prologue
711         $REG_S  \$15,$FRAMESIZE-11*$SZREG($sp)
712         $REG_S  \$14,$FRAMESIZE-12*$SZREG($sp)
713         $REG_S  \$13,$FRAMESIZE-13*$SZREG($sp)
714         $REG_S  \$12,$FRAMESIZE-14*$SZREG($sp)
715         $REG_S  $gp,$FRAMESIZE-15*$SZREG($sp)
716 ___
717 $code.=<<___ if ($flavour !~ /o32/i);   # non-o32 PIC-ification
718         .cplocal        $Tbl
719         .cpsetup        $pf,$zero,AES_decrypt
720 ___
721 $code.=<<___;
722         .set    reorder
723         la      $Tbl,AES_Td             # PIC-ified 'load address'
724
725         lwl     $s0,0+$MSB($inp)
726         lwl     $s1,4+$MSB($inp)
727         lwl     $s2,8+$MSB($inp)
728         lwl     $s3,12+$MSB($inp)
729         lwr     $s0,0+$LSB($inp)
730         lwr     $s1,4+$LSB($inp)
731         lwr     $s2,8+$LSB($inp)
732         lwr     $s3,12+$LSB($inp)
733
734         bal     _mips_AES_decrypt
735
736         swr     $s0,0+$LSB($out)
737         swr     $s1,4+$LSB($out)
738         swr     $s2,8+$LSB($out)
739         swr     $s3,12+$LSB($out)
740         swl     $s0,0+$MSB($out)
741         swl     $s1,4+$MSB($out)
742         swl     $s2,8+$MSB($out)
743         swl     $s3,12+$MSB($out)
744
745         .set    noreorder
746         $REG_L  $ra,$FRAMESIZE-1*$SZREG($sp)
747         $REG_L  $fp,$FRAMESIZE-2*$SZREG($sp)
748         $REG_L  $s11,$FRAMESIZE-3*$SZREG($sp)
749         $REG_L  $s10,$FRAMESIZE-4*$SZREG($sp)
750         $REG_L  $s9,$FRAMESIZE-5*$SZREG($sp)
751         $REG_L  $s8,$FRAMESIZE-6*$SZREG($sp)
752         $REG_L  $s7,$FRAMESIZE-7*$SZREG($sp)
753         $REG_L  $s6,$FRAMESIZE-8*$SZREG($sp)
754         $REG_L  $s5,$FRAMESIZE-9*$SZREG($sp)
755         $REG_L  $s4,$FRAMESIZE-10*$SZREG($sp)
756 ___
757 $code.=<<___ if ($flavour =~ /nubi/i);
758         $REG_L  \$15,$FRAMESIZE-11*$SZREG($sp)
759         $REG_L  \$14,$FRAMESIZE-12*$SZREG($sp)
760         $REG_L  \$13,$FRAMESIZE-13*$SZREG($sp)
761         $REG_L  \$12,$FRAMESIZE-14*$SZREG($sp)
762         $REG_L  $gp,$FRAMESIZE-15*$SZREG($sp)
763 ___
764 $code.=<<___;
765         jr      $ra
766         $PTR_ADD $sp,$FRAMESIZE
767 .end    AES_decrypt
768 ___
769 }}}
770 \f
771 {{{
772 my $FRAMESIZE=8*$SZREG;
773 my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc000f008 : 0xc0000000;
774
775 my ($inp,$bits,$key,$Tbl)=($a0,$a1,$a2,$a3);
776 my ($rk0,$rk1,$rk2,$rk3,$rk4,$rk5,$rk6,$rk7)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
777 my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
778 my ($rcon,$cnt)=($gp,$fp);
779
780 $code.=<<___;
781 .align  5
782 .ent    _mips_AES_set_encrypt_key
783 _mips_AES_set_encrypt_key:
784         .frame  $sp,0,$ra
785         .set    noreorder
786         beqz    $inp,.Lekey_done
787         li      $t0,-1
788         beqz    $key,.Lekey_done
789         $PTR_ADD $rcon,$Tbl,1024+256
790
791         .set    reorder
792         lwl     $rk0,0+$MSB($inp)       # load 128 bits
793         lwl     $rk1,4+$MSB($inp)
794         lwl     $rk2,8+$MSB($inp)
795         lwl     $rk3,12+$MSB($inp)
796         li      $at,128
797         lwr     $rk0,0+$LSB($inp)
798         lwr     $rk1,4+$LSB($inp)
799         lwr     $rk2,8+$LSB($inp)
800         lwr     $rk3,12+$LSB($inp)
801         .set    noreorder
802         beq     $bits,$at,.L128bits
803         li      $cnt,10
804
805         .set    reorder
806         lwl     $rk4,16+$MSB($inp)      # load 192 bits
807         lwl     $rk5,20+$MSB($inp)
808         li      $at,192
809         lwr     $rk4,16+$LSB($inp)
810         lwr     $rk5,20+$LSB($inp)
811         .set    noreorder
812         beq     $bits,$at,.L192bits
813         li      $cnt,8
814
815         .set    reorder
816         lwl     $rk6,24+$MSB($inp)      # load 256 bits
817         lwl     $rk7,28+$MSB($inp)
818         li      $at,256
819         lwr     $rk6,24+$LSB($inp)
820         lwr     $rk7,28+$LSB($inp)
821         .set    noreorder
822         beq     $bits,$at,.L256bits
823         li      $cnt,7
824
825         b       .Lekey_done
826         li      $t0,-2
827
828 .align  4
829 .L128bits:
830         .set    reorder
831         srl     $i0,$rk3,16
832         srl     $i1,$rk3,8
833         and     $i0,0xff
834         and     $i1,0xff
835         and     $i2,$rk3,0xff
836         srl     $i3,$rk3,24
837         $PTR_ADD $i0,$Tbl
838         $PTR_ADD $i1,$Tbl
839         $PTR_ADD $i2,$Tbl
840         $PTR_ADD $i3,$Tbl
841         lbu     $i0,1024($i0)
842         lbu     $i1,1024($i1)
843         lbu     $i2,1024($i2)
844         lbu     $i3,1024($i3)
845
846         sw      $rk0,0($key)
847         sw      $rk1,4($key)
848         sw      $rk2,8($key)
849         sw      $rk3,12($key)
850         sub     $cnt,1
851         $PTR_ADD $key,16
852
853         _bias   $i0,24
854         _bias   $i1,16
855         _bias   $i2,8
856         _bias   $i3,0
857
858         xor     $rk0,$i0
859         lw      $i0,0($rcon)
860         xor     $rk0,$i1
861         xor     $rk0,$i2
862         xor     $rk0,$i3
863         xor     $rk0,$i0
864
865         xor     $rk1,$rk0
866         xor     $rk2,$rk1
867         xor     $rk3,$rk2
868
869         .set    noreorder
870         bnez    $cnt,.L128bits
871         $PTR_ADD $rcon,4
872
873         sw      $rk0,0($key)
874         sw      $rk1,4($key)
875         sw      $rk2,8($key)
876         li      $cnt,10
877         sw      $rk3,12($key)
878         li      $t0,0
879         sw      $cnt,80($key)
880         b       .Lekey_done
881         $PTR_SUB $key,10*16
882
883 .align  4
884 .L192bits:
885         .set    reorder
886         srl     $i0,$rk5,16
887         srl     $i1,$rk5,8
888         and     $i0,0xff
889         and     $i1,0xff
890         and     $i2,$rk5,0xff
891         srl     $i3,$rk5,24
892         $PTR_ADD $i0,$Tbl
893         $PTR_ADD $i1,$Tbl
894         $PTR_ADD $i2,$Tbl
895         $PTR_ADD $i3,$Tbl
896         lbu     $i0,1024($i0)
897         lbu     $i1,1024($i1)
898         lbu     $i2,1024($i2)
899         lbu     $i3,1024($i3)
900
901         sw      $rk0,0($key)
902         sw      $rk1,4($key)
903         sw      $rk2,8($key)
904         sw      $rk3,12($key)
905         sw      $rk4,16($key)
906         sw      $rk5,20($key)
907         sub     $cnt,1
908         $PTR_ADD $key,24
909
910         _bias   $i0,24
911         _bias   $i1,16
912         _bias   $i2,8
913         _bias   $i3,0
914
915         xor     $rk0,$i0
916         lw      $i0,0($rcon)
917         xor     $rk0,$i1
918         xor     $rk0,$i2
919         xor     $rk0,$i3
920         xor     $rk0,$i0
921
922         xor     $rk1,$rk0
923         xor     $rk2,$rk1
924         xor     $rk3,$rk2
925         xor     $rk4,$rk3
926         xor     $rk5,$rk4
927
928         .set    noreorder
929         bnez    $cnt,.L192bits
930         $PTR_ADD $rcon,4
931
932         sw      $rk0,0($key)
933         sw      $rk1,4($key)
934         sw      $rk2,8($key)
935         li      $cnt,12
936         sw      $rk3,12($key)
937         li      $t0,0
938         sw      $cnt,48($key)
939         b       .Lekey_done
940         $PTR_SUB $key,12*16
941
942 .align  4
943 .L256bits:
944         .set    reorder
945         srl     $i0,$rk7,16
946         srl     $i1,$rk7,8
947         and     $i0,0xff
948         and     $i1,0xff
949         and     $i2,$rk7,0xff
950         srl     $i3,$rk7,24
951         $PTR_ADD $i0,$Tbl
952         $PTR_ADD $i1,$Tbl
953         $PTR_ADD $i2,$Tbl
954         $PTR_ADD $i3,$Tbl
955         lbu     $i0,1024($i0)
956         lbu     $i1,1024($i1)
957         lbu     $i2,1024($i2)
958         lbu     $i3,1024($i3)
959
960         sw      $rk0,0($key)
961         sw      $rk1,4($key)
962         sw      $rk2,8($key)
963         sw      $rk3,12($key)
964         sw      $rk4,16($key)
965         sw      $rk5,20($key)
966         sw      $rk6,24($key)
967         sw      $rk7,28($key)
968         sub     $cnt,1
969
970         _bias   $i0,24
971         _bias   $i1,16
972         _bias   $i2,8
973         _bias   $i3,0
974
975         xor     $rk0,$i0
976         lw      $i0,0($rcon)
977         xor     $rk0,$i1
978         xor     $rk0,$i2
979         xor     $rk0,$i3
980         xor     $rk0,$i0
981
982         xor     $rk1,$rk0
983         xor     $rk2,$rk1
984         xor     $rk3,$rk2
985         beqz    $cnt,.L256bits_done
986
987         srl     $i0,$rk3,24
988         srl     $i1,$rk3,16
989         srl     $i2,$rk3,8
990         and     $i3,$rk3,0xff
991         and     $i1,0xff
992         and     $i2,0xff
993         $PTR_ADD $i0,$Tbl
994         $PTR_ADD $i1,$Tbl
995         $PTR_ADD $i2,$Tbl
996         $PTR_ADD $i3,$Tbl
997         lbu     $i0,1024($i0)
998         lbu     $i1,1024($i1)
999         lbu     $i2,1024($i2)
1000         lbu     $i3,1024($i3)
1001         sll     $i0,24
1002         sll     $i1,16
1003         sll     $i2,8
1004
1005         xor     $rk4,$i0
1006         xor     $rk4,$i1
1007         xor     $rk4,$i2
1008         xor     $rk4,$i3
1009
1010         xor     $rk5,$rk4
1011         xor     $rk6,$rk5
1012         xor     $rk7,$rk6
1013
1014         $PTR_ADD $key,32
1015         .set    noreorder
1016         b       .L256bits
1017         $PTR_ADD $rcon,4
1018
1019 .L256bits_done:
1020         sw      $rk0,32($key)
1021         sw      $rk1,36($key)
1022         sw      $rk2,40($key)
1023         li      $cnt,14
1024         sw      $rk3,44($key)
1025         li      $t0,0
1026         sw      $cnt,48($key)
1027         $PTR_SUB $key,12*16
1028
1029 .Lekey_done:
1030         jr      $ra
1031         nop
1032 .end    _mips_AES_set_encrypt_key
1033
1034 .globl  AES_set_encrypt_key
1035 .ent    AES_set_encrypt_key
1036 AES_set_encrypt_key:
1037         .frame  $sp,$FRAMESIZE,$ra
1038         .mask   $SAVED_REGS_MASK,-$SZREG
1039         .set    noreorder
1040 ___
1041 $code.=<<___ if ($flavour =~ /o32/i);   # o32 PIC-ification
1042         .cpload $pf
1043 ___
1044 $code.=<<___;
1045         $PTR_SUB $sp,$FRAMESIZE
1046         $REG_S  $ra,$FRAMESIZE-1*$SZREG($sp)
1047         $REG_S  $fp,$FRAMESIZE-2*$SZREG($sp)
1048 ___
1049 $code.=<<___ if ($flavour =~ /nubi/i);  # optimize non-nubi prologue
1050         $REG_S  $s3,$FRAMESIZE-3*$SZREG($sp)
1051         $REG_S  $s2,$FRAMESIZE-4*$SZREG($sp)
1052         $REG_S  $s1,$FRAMESIZE-5*$SZREG($sp)
1053         $REG_S  $s0,$FRAMESIZE-6*$SZREG($sp)
1054         $REG_S  $gp,$FRAMESIZE-7*$SZREG($sp)
1055 ___
1056 $code.=<<___ if ($flavour !~ /o32/i);   # non-o32 PIC-ification
1057         .cplocal        $Tbl
1058         .cpsetup        $pf,$zero,AES_set_encrypt_key
1059 ___
1060 $code.=<<___;
1061         .set    reorder
1062         la      $Tbl,AES_Te             # PIC-ified 'load address'
1063
1064         bal     _mips_AES_set_encrypt_key
1065
1066         .set    noreorder
1067         move    $a0,$t0
1068         $REG_L  $ra,$FRAMESIZE-1*$SZREG($sp)
1069         $REG_L  $fp,$FRAMESIZE-2*$SZREG($sp)
1070 ___
1071 $code.=<<___ if ($flavour =~ /nubi/i);
1072         $REG_L  $s3,$FRAMESIZE-11*$SZREG($sp)
1073         $REG_L  $s2,$FRAMESIZE-12*$SZREG($sp)
1074         $REG_L  $s1,$FRAMESIZE-13*$SZREG($sp)
1075         $REG_L  $s0,$FRAMESIZE-14*$SZREG($sp)
1076         $REG_L  $gp,$FRAMESIZE-15*$SZREG($sp)
1077 ___
1078 $code.=<<___;
1079         jr      $ra
1080         $PTR_ADD $sp,$FRAMESIZE
1081 .end    AES_set_encrypt_key
1082 ___
1083 \f
1084 my ($head,$tail)=($inp,$bits);
1085 my ($tp1,$tp2,$tp4,$tp8,$tp9,$tpb,$tpd,$tpe)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
1086 my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2);
1087 $code.=<<___;
1088 .align  5
1089 .globl  AES_set_decrypt_key
1090 .ent    AES_set_decrypt_key
1091 AES_set_decrypt_key:
1092         .frame  $sp,$FRAMESIZE,$ra
1093         .mask   $SAVED_REGS_MASK,-$SZREG
1094         .set    noreorder
1095 ___
1096 $code.=<<___ if ($flavour =~ /o32/i);   # o32 PIC-ification
1097         .cpload $pf
1098 ___
1099 $code.=<<___;
1100         $PTR_SUB $sp,$FRAMESIZE
1101         $REG_S  $ra,$FRAMESIZE-1*$SZREG($sp)
1102         $REG_S  $fp,$FRAMESIZE-2*$SZREG($sp)
1103 ___
1104 $code.=<<___ if ($flavour =~ /nubi/i);  # optimize non-nubi prologue
1105         $REG_S  $s3,$FRAMESIZE-3*$SZREG($sp)
1106         $REG_S  $s2,$FRAMESIZE-4*$SZREG($sp)
1107         $REG_S  $s1,$FRAMESIZE-5*$SZREG($sp)
1108         $REG_S  $s0,$FRAMESIZE-6*$SZREG($sp)
1109         $REG_S  $gp,$FRAMESIZE-7*$SZREG($sp)
1110 ___
1111 $code.=<<___ if ($flavour !~ /o32/i);   # non-o32 PIC-ification
1112         .cplocal        $Tbl
1113         .cpsetup        $pf,$zero,AES_set_decrypt_key
1114 ___
1115 $code.=<<___;
1116         .set    reorder
1117         la      $Tbl,AES_Te             # PIC-ified 'load address'
1118
1119         bal     _mips_AES_set_encrypt_key
1120
1121         bltz    $t0,.Ldkey_done
1122
1123         sll     $at,$cnt,4
1124         $PTR_ADD $head,$key,0
1125         $PTR_ADD $tail,$key,$at
1126 .align  4
1127 .Lswap:
1128         lw      $rk0,0($head)
1129         lw      $rk1,4($head)
1130         lw      $rk2,8($head)
1131         lw      $rk3,12($head)
1132         lw      $rk4,0($tail)
1133         lw      $rk5,4($tail)
1134         lw      $rk6,8($tail)
1135         lw      $rk7,12($tail)
1136         sw      $rk0,0($tail)
1137         sw      $rk1,4($tail)
1138         sw      $rk2,8($tail)
1139         sw      $rk3,12($tail)
1140         $PTR_ADD $head,16
1141         $PTR_SUB $tail,16
1142         sw      $rk4,-16($head)
1143         sw      $rk5,-12($head)
1144         sw      $rk6,-8($head)
1145         sw      $rk7,-4($head)
1146         bne     $head,$tail,.Lswap
1147
1148         lw      $tp1,16($key)           # modulo-scheduled
1149         lui     $x80808080,0x8080
1150         sub     $cnt,1
1151         or      $x80808080,0x8080
1152         sll     $cnt,2
1153         $PTR_ADD $key,16
1154         lui     $x1b1b1b1b,0x1b1b
1155         nor     $x7f7f7f7f,$zero,$x80808080
1156         or      $x1b1b1b1b,0x1b1b
1157 .align  4
1158 .Lmix:
1159         and     $m,$tp1,$x80808080
1160         and     $tp2,$tp1,$x7f7f7f7f
1161         srl     $tp4,$m,7
1162         addu    $tp2,$tp2               # tp2<<1
1163         subu    $m,$tp4
1164         and     $m,$x1b1b1b1b
1165         xor     $tp2,$m
1166
1167         and     $m,$tp2,$x80808080
1168         and     $tp4,$tp2,$x7f7f7f7f
1169         srl     $tp8,$m,7
1170         addu    $tp4,$tp4               # tp4<<1
1171         subu    $m,$tp8
1172         and     $m,$x1b1b1b1b
1173         xor     $tp4,$m
1174
1175         and     $m,$tp4,$x80808080
1176         and     $tp8,$tp4,$x7f7f7f7f
1177         srl     $tp9,$m,7
1178         addu    $tp8,$tp8               # tp8<<1
1179         subu    $m,$tp9
1180         and     $m,$x1b1b1b1b
1181         xor     $tp8,$m
1182
1183         xor     $tp9,$tp8,$tp1
1184         xor     $tpe,$tp8,$tp4
1185         xor     $tpb,$tp9,$tp2
1186         xor     $tpd,$tp9,$tp4
1187
1188         _ror    $tp1,$tpd,16
1189          xor    $tpe,$tp2
1190         _ror    $tp2,$tpd,-16
1191         xor     $tpe,$tp1
1192         _ror    $tp1,$tp9,8
1193         xor     $tpe,$tp2
1194         _ror    $tp2,$tp9,-24
1195         xor     $tpe,$tp1
1196         _ror    $tp1,$tpb,24
1197         xor     $tpe,$tp2
1198         _ror    $tp2,$tpb,-8
1199         xor     $tpe,$tp1
1200         lw      $tp1,4($key)            # modulo-scheduled
1201         xor     $tpe,$tp2
1202         sub     $cnt,1
1203         sw      $tpe,0($key)
1204         $PTR_ADD $key,4
1205         bnez    $cnt,.Lmix
1206
1207         li      $t0,0
1208 .Ldkey_done:
1209         .set    noreorder
1210         move    $a0,$t0
1211         $REG_L  $ra,$FRAMESIZE-1*$SZREG($sp)
1212         $REG_L  $fp,$FRAMESIZE-2*$SZREG($sp)
1213 ___
1214 $code.=<<___ if ($flavour =~ /nubi/i);
1215         $REG_L  $s3,$FRAMESIZE-11*$SZREG($sp)
1216         $REG_L  $s2,$FRAMESIZE-12*$SZREG($sp)
1217         $REG_L  $s1,$FRAMESIZE-13*$SZREG($sp)
1218         $REG_L  $s0,$FRAMESIZE-14*$SZREG($sp)
1219         $REG_L  $gp,$FRAMESIZE-15*$SZREG($sp)
1220 ___
1221 $code.=<<___;
1222         jr      $ra
1223         $PTR_ADD $sp,$FRAMESIZE
1224 .end    AES_set_decrypt_key
1225 ___
1226 }}}
1227
1228 ######################################################################
1229 # Tables are kept in endian-neutral manner
1230 $code.=<<___;
1231 .rdata
1232 .align  6
1233 AES_Te:
1234 .byte   0xc6,0x63,0x63,0xa5,    0xf8,0x7c,0x7c,0x84     # Te0
1235 .byte   0xee,0x77,0x77,0x99,    0xf6,0x7b,0x7b,0x8d
1236 .byte   0xff,0xf2,0xf2,0x0d,    0xd6,0x6b,0x6b,0xbd
1237 .byte   0xde,0x6f,0x6f,0xb1,    0x91,0xc5,0xc5,0x54
1238 .byte   0x60,0x30,0x30,0x50,    0x02,0x01,0x01,0x03
1239 .byte   0xce,0x67,0x67,0xa9,    0x56,0x2b,0x2b,0x7d
1240 .byte   0xe7,0xfe,0xfe,0x19,    0xb5,0xd7,0xd7,0x62
1241 .byte   0x4d,0xab,0xab,0xe6,    0xec,0x76,0x76,0x9a
1242 .byte   0x8f,0xca,0xca,0x45,    0x1f,0x82,0x82,0x9d
1243 .byte   0x89,0xc9,0xc9,0x40,    0xfa,0x7d,0x7d,0x87
1244 .byte   0xef,0xfa,0xfa,0x15,    0xb2,0x59,0x59,0xeb
1245 .byte   0x8e,0x47,0x47,0xc9,    0xfb,0xf0,0xf0,0x0b
1246 .byte   0x41,0xad,0xad,0xec,    0xb3,0xd4,0xd4,0x67
1247 .byte   0x5f,0xa2,0xa2,0xfd,    0x45,0xaf,0xaf,0xea
1248 .byte   0x23,0x9c,0x9c,0xbf,    0x53,0xa4,0xa4,0xf7
1249 .byte   0xe4,0x72,0x72,0x96,    0x9b,0xc0,0xc0,0x5b
1250 .byte   0x75,0xb7,0xb7,0xc2,    0xe1,0xfd,0xfd,0x1c
1251 .byte   0x3d,0x93,0x93,0xae,    0x4c,0x26,0x26,0x6a
1252 .byte   0x6c,0x36,0x36,0x5a,    0x7e,0x3f,0x3f,0x41
1253 .byte   0xf5,0xf7,0xf7,0x02,    0x83,0xcc,0xcc,0x4f
1254 .byte   0x68,0x34,0x34,0x5c,    0x51,0xa5,0xa5,0xf4
1255 .byte   0xd1,0xe5,0xe5,0x34,    0xf9,0xf1,0xf1,0x08
1256 .byte   0xe2,0x71,0x71,0x93,    0xab,0xd8,0xd8,0x73
1257 .byte   0x62,0x31,0x31,0x53,    0x2a,0x15,0x15,0x3f
1258 .byte   0x08,0x04,0x04,0x0c,    0x95,0xc7,0xc7,0x52
1259 .byte   0x46,0x23,0x23,0x65,    0x9d,0xc3,0xc3,0x5e
1260 .byte   0x30,0x18,0x18,0x28,    0x37,0x96,0x96,0xa1
1261 .byte   0x0a,0x05,0x05,0x0f,    0x2f,0x9a,0x9a,0xb5
1262 .byte   0x0e,0x07,0x07,0x09,    0x24,0x12,0x12,0x36
1263 .byte   0x1b,0x80,0x80,0x9b,    0xdf,0xe2,0xe2,0x3d
1264 .byte   0xcd,0xeb,0xeb,0x26,    0x4e,0x27,0x27,0x69
1265 .byte   0x7f,0xb2,0xb2,0xcd,    0xea,0x75,0x75,0x9f
1266 .byte   0x12,0x09,0x09,0x1b,    0x1d,0x83,0x83,0x9e
1267 .byte   0x58,0x2c,0x2c,0x74,    0x34,0x1a,0x1a,0x2e
1268 .byte   0x36,0x1b,0x1b,0x2d,    0xdc,0x6e,0x6e,0xb2
1269 .byte   0xb4,0x5a,0x5a,0xee,    0x5b,0xa0,0xa0,0xfb
1270 .byte   0xa4,0x52,0x52,0xf6,    0x76,0x3b,0x3b,0x4d
1271 .byte   0xb7,0xd6,0xd6,0x61,    0x7d,0xb3,0xb3,0xce
1272 .byte   0x52,0x29,0x29,0x7b,    0xdd,0xe3,0xe3,0x3e
1273 .byte   0x5e,0x2f,0x2f,0x71,    0x13,0x84,0x84,0x97
1274 .byte   0xa6,0x53,0x53,0xf5,    0xb9,0xd1,0xd1,0x68
1275 .byte   0x00,0x00,0x00,0x00,    0xc1,0xed,0xed,0x2c
1276 .byte   0x40,0x20,0x20,0x60,    0xe3,0xfc,0xfc,0x1f
1277 .byte   0x79,0xb1,0xb1,0xc8,    0xb6,0x5b,0x5b,0xed
1278 .byte   0xd4,0x6a,0x6a,0xbe,    0x8d,0xcb,0xcb,0x46
1279 .byte   0x67,0xbe,0xbe,0xd9,    0x72,0x39,0x39,0x4b
1280 .byte   0x94,0x4a,0x4a,0xde,    0x98,0x4c,0x4c,0xd4
1281 .byte   0xb0,0x58,0x58,0xe8,    0x85,0xcf,0xcf,0x4a
1282 .byte   0xbb,0xd0,0xd0,0x6b,    0xc5,0xef,0xef,0x2a
1283 .byte   0x4f,0xaa,0xaa,0xe5,    0xed,0xfb,0xfb,0x16
1284 .byte   0x86,0x43,0x43,0xc5,    0x9a,0x4d,0x4d,0xd7
1285 .byte   0x66,0x33,0x33,0x55,    0x11,0x85,0x85,0x94
1286 .byte   0x8a,0x45,0x45,0xcf,    0xe9,0xf9,0xf9,0x10
1287 .byte   0x04,0x02,0x02,0x06,    0xfe,0x7f,0x7f,0x81
1288 .byte   0xa0,0x50,0x50,0xf0,    0x78,0x3c,0x3c,0x44
1289 .byte   0x25,0x9f,0x9f,0xba,    0x4b,0xa8,0xa8,0xe3
1290 .byte   0xa2,0x51,0x51,0xf3,    0x5d,0xa3,0xa3,0xfe
1291 .byte   0x80,0x40,0x40,0xc0,    0x05,0x8f,0x8f,0x8a
1292 .byte   0x3f,0x92,0x92,0xad,    0x21,0x9d,0x9d,0xbc
1293 .byte   0x70,0x38,0x38,0x48,    0xf1,0xf5,0xf5,0x04
1294 .byte   0x63,0xbc,0xbc,0xdf,    0x77,0xb6,0xb6,0xc1
1295 .byte   0xaf,0xda,0xda,0x75,    0x42,0x21,0x21,0x63
1296 .byte   0x20,0x10,0x10,0x30,    0xe5,0xff,0xff,0x1a
1297 .byte   0xfd,0xf3,0xf3,0x0e,    0xbf,0xd2,0xd2,0x6d
1298 .byte   0x81,0xcd,0xcd,0x4c,    0x18,0x0c,0x0c,0x14
1299 .byte   0x26,0x13,0x13,0x35,    0xc3,0xec,0xec,0x2f
1300 .byte   0xbe,0x5f,0x5f,0xe1,    0x35,0x97,0x97,0xa2
1301 .byte   0x88,0x44,0x44,0xcc,    0x2e,0x17,0x17,0x39
1302 .byte   0x93,0xc4,0xc4,0x57,    0x55,0xa7,0xa7,0xf2
1303 .byte   0xfc,0x7e,0x7e,0x82,    0x7a,0x3d,0x3d,0x47
1304 .byte   0xc8,0x64,0x64,0xac,    0xba,0x5d,0x5d,0xe7
1305 .byte   0x32,0x19,0x19,0x2b,    0xe6,0x73,0x73,0x95
1306 .byte   0xc0,0x60,0x60,0xa0,    0x19,0x81,0x81,0x98
1307 .byte   0x9e,0x4f,0x4f,0xd1,    0xa3,0xdc,0xdc,0x7f
1308 .byte   0x44,0x22,0x22,0x66,    0x54,0x2a,0x2a,0x7e
1309 .byte   0x3b,0x90,0x90,0xab,    0x0b,0x88,0x88,0x83
1310 .byte   0x8c,0x46,0x46,0xca,    0xc7,0xee,0xee,0x29
1311 .byte   0x6b,0xb8,0xb8,0xd3,    0x28,0x14,0x14,0x3c
1312 .byte   0xa7,0xde,0xde,0x79,    0xbc,0x5e,0x5e,0xe2
1313 .byte   0x16,0x0b,0x0b,0x1d,    0xad,0xdb,0xdb,0x76
1314 .byte   0xdb,0xe0,0xe0,0x3b,    0x64,0x32,0x32,0x56
1315 .byte   0x74,0x3a,0x3a,0x4e,    0x14,0x0a,0x0a,0x1e
1316 .byte   0x92,0x49,0x49,0xdb,    0x0c,0x06,0x06,0x0a
1317 .byte   0x48,0x24,0x24,0x6c,    0xb8,0x5c,0x5c,0xe4
1318 .byte   0x9f,0xc2,0xc2,0x5d,    0xbd,0xd3,0xd3,0x6e
1319 .byte   0x43,0xac,0xac,0xef,    0xc4,0x62,0x62,0xa6
1320 .byte   0x39,0x91,0x91,0xa8,    0x31,0x95,0x95,0xa4
1321 .byte   0xd3,0xe4,0xe4,0x37,    0xf2,0x79,0x79,0x8b
1322 .byte   0xd5,0xe7,0xe7,0x32,    0x8b,0xc8,0xc8,0x43
1323 .byte   0x6e,0x37,0x37,0x59,    0xda,0x6d,0x6d,0xb7
1324 .byte   0x01,0x8d,0x8d,0x8c,    0xb1,0xd5,0xd5,0x64
1325 .byte   0x9c,0x4e,0x4e,0xd2,    0x49,0xa9,0xa9,0xe0
1326 .byte   0xd8,0x6c,0x6c,0xb4,    0xac,0x56,0x56,0xfa
1327 .byte   0xf3,0xf4,0xf4,0x07,    0xcf,0xea,0xea,0x25
1328 .byte   0xca,0x65,0x65,0xaf,    0xf4,0x7a,0x7a,0x8e
1329 .byte   0x47,0xae,0xae,0xe9,    0x10,0x08,0x08,0x18
1330 .byte   0x6f,0xba,0xba,0xd5,    0xf0,0x78,0x78,0x88
1331 .byte   0x4a,0x25,0x25,0x6f,    0x5c,0x2e,0x2e,0x72
1332 .byte   0x38,0x1c,0x1c,0x24,    0x57,0xa6,0xa6,0xf1
1333 .byte   0x73,0xb4,0xb4,0xc7,    0x97,0xc6,0xc6,0x51
1334 .byte   0xcb,0xe8,0xe8,0x23,    0xa1,0xdd,0xdd,0x7c
1335 .byte   0xe8,0x74,0x74,0x9c,    0x3e,0x1f,0x1f,0x21
1336 .byte   0x96,0x4b,0x4b,0xdd,    0x61,0xbd,0xbd,0xdc
1337 .byte   0x0d,0x8b,0x8b,0x86,    0x0f,0x8a,0x8a,0x85
1338 .byte   0xe0,0x70,0x70,0x90,    0x7c,0x3e,0x3e,0x42
1339 .byte   0x71,0xb5,0xb5,0xc4,    0xcc,0x66,0x66,0xaa
1340 .byte   0x90,0x48,0x48,0xd8,    0x06,0x03,0x03,0x05
1341 .byte   0xf7,0xf6,0xf6,0x01,    0x1c,0x0e,0x0e,0x12
1342 .byte   0xc2,0x61,0x61,0xa3,    0x6a,0x35,0x35,0x5f
1343 .byte   0xae,0x57,0x57,0xf9,    0x69,0xb9,0xb9,0xd0
1344 .byte   0x17,0x86,0x86,0x91,    0x99,0xc1,0xc1,0x58
1345 .byte   0x3a,0x1d,0x1d,0x27,    0x27,0x9e,0x9e,0xb9
1346 .byte   0xd9,0xe1,0xe1,0x38,    0xeb,0xf8,0xf8,0x13
1347 .byte   0x2b,0x98,0x98,0xb3,    0x22,0x11,0x11,0x33
1348 .byte   0xd2,0x69,0x69,0xbb,    0xa9,0xd9,0xd9,0x70
1349 .byte   0x07,0x8e,0x8e,0x89,    0x33,0x94,0x94,0xa7
1350 .byte   0x2d,0x9b,0x9b,0xb6,    0x3c,0x1e,0x1e,0x22
1351 .byte   0x15,0x87,0x87,0x92,    0xc9,0xe9,0xe9,0x20
1352 .byte   0x87,0xce,0xce,0x49,    0xaa,0x55,0x55,0xff
1353 .byte   0x50,0x28,0x28,0x78,    0xa5,0xdf,0xdf,0x7a
1354 .byte   0x03,0x8c,0x8c,0x8f,    0x59,0xa1,0xa1,0xf8
1355 .byte   0x09,0x89,0x89,0x80,    0x1a,0x0d,0x0d,0x17
1356 .byte   0x65,0xbf,0xbf,0xda,    0xd7,0xe6,0xe6,0x31
1357 .byte   0x84,0x42,0x42,0xc6,    0xd0,0x68,0x68,0xb8
1358 .byte   0x82,0x41,0x41,0xc3,    0x29,0x99,0x99,0xb0
1359 .byte   0x5a,0x2d,0x2d,0x77,    0x1e,0x0f,0x0f,0x11
1360 .byte   0x7b,0xb0,0xb0,0xcb,    0xa8,0x54,0x54,0xfc
1361 .byte   0x6d,0xbb,0xbb,0xd6,    0x2c,0x16,0x16,0x3a
1362
1363 .byte   0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5  # Te4
1364 .byte   0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
1365 .byte   0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
1366 .byte   0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
1367 .byte   0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
1368 .byte   0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
1369 .byte   0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
1370 .byte   0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
1371 .byte   0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
1372 .byte   0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
1373 .byte   0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
1374 .byte   0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
1375 .byte   0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
1376 .byte   0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
1377 .byte   0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
1378 .byte   0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
1379 .byte   0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
1380 .byte   0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
1381 .byte   0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
1382 .byte   0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
1383 .byte   0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
1384 .byte   0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
1385 .byte   0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
1386 .byte   0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
1387 .byte   0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
1388 .byte   0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
1389 .byte   0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
1390 .byte   0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
1391 .byte   0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
1392 .byte   0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
1393 .byte   0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
1394 .byte   0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
1395
1396 .byte   0x01,0x00,0x00,0x00,    0x02,0x00,0x00,0x00     # rcon
1397 .byte   0x04,0x00,0x00,0x00,    0x08,0x00,0x00,0x00
1398 .byte   0x10,0x00,0x00,0x00,    0x20,0x00,0x00,0x00
1399 .byte   0x40,0x00,0x00,0x00,    0x80,0x00,0x00,0x00
1400 .byte   0x1B,0x00,0x00,0x00,    0x36,0x00,0x00,0x00
1401
1402 .align  6
1403 AES_Td:
1404 .byte   0x51,0xf4,0xa7,0x50,    0x7e,0x41,0x65,0x53     # Td0
1405 .byte   0x1a,0x17,0xa4,0xc3,    0x3a,0x27,0x5e,0x96
1406 .byte   0x3b,0xab,0x6b,0xcb,    0x1f,0x9d,0x45,0xf1
1407 .byte   0xac,0xfa,0x58,0xab,    0x4b,0xe3,0x03,0x93
1408 .byte   0x20,0x30,0xfa,0x55,    0xad,0x76,0x6d,0xf6
1409 .byte   0x88,0xcc,0x76,0x91,    0xf5,0x02,0x4c,0x25
1410 .byte   0x4f,0xe5,0xd7,0xfc,    0xc5,0x2a,0xcb,0xd7
1411 .byte   0x26,0x35,0x44,0x80,    0xb5,0x62,0xa3,0x8f
1412 .byte   0xde,0xb1,0x5a,0x49,    0x25,0xba,0x1b,0x67
1413 .byte   0x45,0xea,0x0e,0x98,    0x5d,0xfe,0xc0,0xe1
1414 .byte   0xc3,0x2f,0x75,0x02,    0x81,0x4c,0xf0,0x12
1415 .byte   0x8d,0x46,0x97,0xa3,    0x6b,0xd3,0xf9,0xc6
1416 .byte   0x03,0x8f,0x5f,0xe7,    0x15,0x92,0x9c,0x95
1417 .byte   0xbf,0x6d,0x7a,0xeb,    0x95,0x52,0x59,0xda
1418 .byte   0xd4,0xbe,0x83,0x2d,    0x58,0x74,0x21,0xd3
1419 .byte   0x49,0xe0,0x69,0x29,    0x8e,0xc9,0xc8,0x44
1420 .byte   0x75,0xc2,0x89,0x6a,    0xf4,0x8e,0x79,0x78
1421 .byte   0x99,0x58,0x3e,0x6b,    0x27,0xb9,0x71,0xdd
1422 .byte   0xbe,0xe1,0x4f,0xb6,    0xf0,0x88,0xad,0x17
1423 .byte   0xc9,0x20,0xac,0x66,    0x7d,0xce,0x3a,0xb4
1424 .byte   0x63,0xdf,0x4a,0x18,    0xe5,0x1a,0x31,0x82
1425 .byte   0x97,0x51,0x33,0x60,    0x62,0x53,0x7f,0x45
1426 .byte   0xb1,0x64,0x77,0xe0,    0xbb,0x6b,0xae,0x84
1427 .byte   0xfe,0x81,0xa0,0x1c,    0xf9,0x08,0x2b,0x94
1428 .byte   0x70,0x48,0x68,0x58,    0x8f,0x45,0xfd,0x19
1429 .byte   0x94,0xde,0x6c,0x87,    0x52,0x7b,0xf8,0xb7
1430 .byte   0xab,0x73,0xd3,0x23,    0x72,0x4b,0x02,0xe2
1431 .byte   0xe3,0x1f,0x8f,0x57,    0x66,0x55,0xab,0x2a
1432 .byte   0xb2,0xeb,0x28,0x07,    0x2f,0xb5,0xc2,0x03
1433 .byte   0x86,0xc5,0x7b,0x9a,    0xd3,0x37,0x08,0xa5
1434 .byte   0x30,0x28,0x87,0xf2,    0x23,0xbf,0xa5,0xb2
1435 .byte   0x02,0x03,0x6a,0xba,    0xed,0x16,0x82,0x5c
1436 .byte   0x8a,0xcf,0x1c,0x2b,    0xa7,0x79,0xb4,0x92
1437 .byte   0xf3,0x07,0xf2,0xf0,    0x4e,0x69,0xe2,0xa1
1438 .byte   0x65,0xda,0xf4,0xcd,    0x06,0x05,0xbe,0xd5
1439 .byte   0xd1,0x34,0x62,0x1f,    0xc4,0xa6,0xfe,0x8a
1440 .byte   0x34,0x2e,0x53,0x9d,    0xa2,0xf3,0x55,0xa0
1441 .byte   0x05,0x8a,0xe1,0x32,    0xa4,0xf6,0xeb,0x75
1442 .byte   0x0b,0x83,0xec,0x39,    0x40,0x60,0xef,0xaa
1443 .byte   0x5e,0x71,0x9f,0x06,    0xbd,0x6e,0x10,0x51
1444 .byte   0x3e,0x21,0x8a,0xf9,    0x96,0xdd,0x06,0x3d
1445 .byte   0xdd,0x3e,0x05,0xae,    0x4d,0xe6,0xbd,0x46
1446 .byte   0x91,0x54,0x8d,0xb5,    0x71,0xc4,0x5d,0x05
1447 .byte   0x04,0x06,0xd4,0x6f,    0x60,0x50,0x15,0xff
1448 .byte   0x19,0x98,0xfb,0x24,    0xd6,0xbd,0xe9,0x97
1449 .byte   0x89,0x40,0x43,0xcc,    0x67,0xd9,0x9e,0x77
1450 .byte   0xb0,0xe8,0x42,0xbd,    0x07,0x89,0x8b,0x88
1451 .byte   0xe7,0x19,0x5b,0x38,    0x79,0xc8,0xee,0xdb
1452 .byte   0xa1,0x7c,0x0a,0x47,    0x7c,0x42,0x0f,0xe9
1453 .byte   0xf8,0x84,0x1e,0xc9,    0x00,0x00,0x00,0x00
1454 .byte   0x09,0x80,0x86,0x83,    0x32,0x2b,0xed,0x48
1455 .byte   0x1e,0x11,0x70,0xac,    0x6c,0x5a,0x72,0x4e
1456 .byte   0xfd,0x0e,0xff,0xfb,    0x0f,0x85,0x38,0x56
1457 .byte   0x3d,0xae,0xd5,0x1e,    0x36,0x2d,0x39,0x27
1458 .byte   0x0a,0x0f,0xd9,0x64,    0x68,0x5c,0xa6,0x21
1459 .byte   0x9b,0x5b,0x54,0xd1,    0x24,0x36,0x2e,0x3a
1460 .byte   0x0c,0x0a,0x67,0xb1,    0x93,0x57,0xe7,0x0f
1461 .byte   0xb4,0xee,0x96,0xd2,    0x1b,0x9b,0x91,0x9e
1462 .byte   0x80,0xc0,0xc5,0x4f,    0x61,0xdc,0x20,0xa2
1463 .byte   0x5a,0x77,0x4b,0x69,    0x1c,0x12,0x1a,0x16
1464 .byte   0xe2,0x93,0xba,0x0a,    0xc0,0xa0,0x2a,0xe5
1465 .byte   0x3c,0x22,0xe0,0x43,    0x12,0x1b,0x17,0x1d
1466 .byte   0x0e,0x09,0x0d,0x0b,    0xf2,0x8b,0xc7,0xad
1467 .byte   0x2d,0xb6,0xa8,0xb9,    0x14,0x1e,0xa9,0xc8
1468 .byte   0x57,0xf1,0x19,0x85,    0xaf,0x75,0x07,0x4c
1469 .byte   0xee,0x99,0xdd,0xbb,    0xa3,0x7f,0x60,0xfd
1470 .byte   0xf7,0x01,0x26,0x9f,    0x5c,0x72,0xf5,0xbc
1471 .byte   0x44,0x66,0x3b,0xc5,    0x5b,0xfb,0x7e,0x34
1472 .byte   0x8b,0x43,0x29,0x76,    0xcb,0x23,0xc6,0xdc
1473 .byte   0xb6,0xed,0xfc,0x68,    0xb8,0xe4,0xf1,0x63
1474 .byte   0xd7,0x31,0xdc,0xca,    0x42,0x63,0x85,0x10
1475 .byte   0x13,0x97,0x22,0x40,    0x84,0xc6,0x11,0x20
1476 .byte   0x85,0x4a,0x24,0x7d,    0xd2,0xbb,0x3d,0xf8
1477 .byte   0xae,0xf9,0x32,0x11,    0xc7,0x29,0xa1,0x6d
1478 .byte   0x1d,0x9e,0x2f,0x4b,    0xdc,0xb2,0x30,0xf3
1479 .byte   0x0d,0x86,0x52,0xec,    0x77,0xc1,0xe3,0xd0
1480 .byte   0x2b,0xb3,0x16,0x6c,    0xa9,0x70,0xb9,0x99
1481 .byte   0x11,0x94,0x48,0xfa,    0x47,0xe9,0x64,0x22
1482 .byte   0xa8,0xfc,0x8c,0xc4,    0xa0,0xf0,0x3f,0x1a
1483 .byte   0x56,0x7d,0x2c,0xd8,    0x22,0x33,0x90,0xef
1484 .byte   0x87,0x49,0x4e,0xc7,    0xd9,0x38,0xd1,0xc1
1485 .byte   0x8c,0xca,0xa2,0xfe,    0x98,0xd4,0x0b,0x36
1486 .byte   0xa6,0xf5,0x81,0xcf,    0xa5,0x7a,0xde,0x28
1487 .byte   0xda,0xb7,0x8e,0x26,    0x3f,0xad,0xbf,0xa4
1488 .byte   0x2c,0x3a,0x9d,0xe4,    0x50,0x78,0x92,0x0d
1489 .byte   0x6a,0x5f,0xcc,0x9b,    0x54,0x7e,0x46,0x62
1490 .byte   0xf6,0x8d,0x13,0xc2,    0x90,0xd8,0xb8,0xe8
1491 .byte   0x2e,0x39,0xf7,0x5e,    0x82,0xc3,0xaf,0xf5
1492 .byte   0x9f,0x5d,0x80,0xbe,    0x69,0xd0,0x93,0x7c
1493 .byte   0x6f,0xd5,0x2d,0xa9,    0xcf,0x25,0x12,0xb3
1494 .byte   0xc8,0xac,0x99,0x3b,    0x10,0x18,0x7d,0xa7
1495 .byte   0xe8,0x9c,0x63,0x6e,    0xdb,0x3b,0xbb,0x7b
1496 .byte   0xcd,0x26,0x78,0x09,    0x6e,0x59,0x18,0xf4
1497 .byte   0xec,0x9a,0xb7,0x01,    0x83,0x4f,0x9a,0xa8
1498 .byte   0xe6,0x95,0x6e,0x65,    0xaa,0xff,0xe6,0x7e
1499 .byte   0x21,0xbc,0xcf,0x08,    0xef,0x15,0xe8,0xe6
1500 .byte   0xba,0xe7,0x9b,0xd9,    0x4a,0x6f,0x36,0xce
1501 .byte   0xea,0x9f,0x09,0xd4,    0x29,0xb0,0x7c,0xd6
1502 .byte   0x31,0xa4,0xb2,0xaf,    0x2a,0x3f,0x23,0x31
1503 .byte   0xc6,0xa5,0x94,0x30,    0x35,0xa2,0x66,0xc0
1504 .byte   0x74,0x4e,0xbc,0x37,    0xfc,0x82,0xca,0xa6
1505 .byte   0xe0,0x90,0xd0,0xb0,    0x33,0xa7,0xd8,0x15
1506 .byte   0xf1,0x04,0x98,0x4a,    0x41,0xec,0xda,0xf7
1507 .byte   0x7f,0xcd,0x50,0x0e,    0x17,0x91,0xf6,0x2f
1508 .byte   0x76,0x4d,0xd6,0x8d,    0x43,0xef,0xb0,0x4d
1509 .byte   0xcc,0xaa,0x4d,0x54,    0xe4,0x96,0x04,0xdf
1510 .byte   0x9e,0xd1,0xb5,0xe3,    0x4c,0x6a,0x88,0x1b
1511 .byte   0xc1,0x2c,0x1f,0xb8,    0x46,0x65,0x51,0x7f
1512 .byte   0x9d,0x5e,0xea,0x04,    0x01,0x8c,0x35,0x5d
1513 .byte   0xfa,0x87,0x74,0x73,    0xfb,0x0b,0x41,0x2e
1514 .byte   0xb3,0x67,0x1d,0x5a,    0x92,0xdb,0xd2,0x52
1515 .byte   0xe9,0x10,0x56,0x33,    0x6d,0xd6,0x47,0x13
1516 .byte   0x9a,0xd7,0x61,0x8c,    0x37,0xa1,0x0c,0x7a
1517 .byte   0x59,0xf8,0x14,0x8e,    0xeb,0x13,0x3c,0x89
1518 .byte   0xce,0xa9,0x27,0xee,    0xb7,0x61,0xc9,0x35
1519 .byte   0xe1,0x1c,0xe5,0xed,    0x7a,0x47,0xb1,0x3c
1520 .byte   0x9c,0xd2,0xdf,0x59,    0x55,0xf2,0x73,0x3f
1521 .byte   0x18,0x14,0xce,0x79,    0x73,0xc7,0x37,0xbf
1522 .byte   0x53,0xf7,0xcd,0xea,    0x5f,0xfd,0xaa,0x5b
1523 .byte   0xdf,0x3d,0x6f,0x14,    0x78,0x44,0xdb,0x86
1524 .byte   0xca,0xaf,0xf3,0x81,    0xb9,0x68,0xc4,0x3e
1525 .byte   0x38,0x24,0x34,0x2c,    0xc2,0xa3,0x40,0x5f
1526 .byte   0x16,0x1d,0xc3,0x72,    0xbc,0xe2,0x25,0x0c
1527 .byte   0x28,0x3c,0x49,0x8b,    0xff,0x0d,0x95,0x41
1528 .byte   0x39,0xa8,0x01,0x71,    0x08,0x0c,0xb3,0xde
1529 .byte   0xd8,0xb4,0xe4,0x9c,    0x64,0x56,0xc1,0x90
1530 .byte   0x7b,0xcb,0x84,0x61,    0xd5,0x32,0xb6,0x70
1531 .byte   0x48,0x6c,0x5c,0x74,    0xd0,0xb8,0x57,0x42
1532
1533 .byte   0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38  # Td4
1534 .byte   0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
1535 .byte   0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
1536 .byte   0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
1537 .byte   0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
1538 .byte   0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
1539 .byte   0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
1540 .byte   0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
1541 .byte   0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
1542 .byte   0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
1543 .byte   0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
1544 .byte   0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
1545 .byte   0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
1546 .byte   0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
1547 .byte   0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
1548 .byte   0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
1549 .byte   0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
1550 .byte   0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
1551 .byte   0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
1552 .byte   0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
1553 .byte   0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
1554 .byte   0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
1555 .byte   0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
1556 .byte   0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
1557 .byte   0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
1558 .byte   0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
1559 .byte   0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
1560 .byte   0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
1561 .byte   0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
1562 .byte   0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
1563 .byte   0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
1564 .byte   0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
1565 ___
1566 \f
1567 foreach (split("\n",$code)) {
1568         s/\`([^\`]*)\`/eval $1/ge;
1569
1570         # made-up _instructions, _xtr, _ins, _ror and _bias, cope
1571         # with byte order dependencies...
1572         if (/^\s+_/) {
1573             s/(_[a-z]+\s+)(\$[0-9]+),([^,]+)(#.*)*$/$1$2,$2,$3/;
1574
1575             s/_xtr\s+(\$[0-9]+),(\$[0-9]+),([0-9]+(\-2)*)/
1576                 sprintf("srl\t$1,$2,%d",$big_endian ?   eval($3)
1577                                         :               eval("24-$3"))/e or
1578             s/_ins\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
1579                 sprintf("sll\t$1,$2,%d",$big_endian ?   eval($3)
1580                                         :               eval("24-$3"))/e or
1581             s/_ror\s+(\$[0-9]+),(\$[0-9]+),(\-?[0-9]+)/
1582                 sprintf("srl\t$1,$2,%d",$big_endian ?   eval($3)
1583                                         :               eval("$3*-1"))/e or
1584             s/_bias\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
1585                 sprintf("sll\t$1,$2,%d",$big_endian ?   eval($3)
1586                                         :               eval("($3-16)&31"))/e;
1587
1588             s/srl\s+(\$[0-9]+),(\$[0-9]+),\-([0-9]+)/
1589                 sprintf("sll\t$1,$2,$3")/e                              or
1590             s/srl\s+(\$[0-9]+),(\$[0-9]+),0/
1591                 sprintf("and\t$1,$2,0xff")/e                            or
1592             s/(sll\s+\$[0-9]+,\$[0-9]+,0)/#$1/;
1593         }
1594
1595         # convert lwl/lwr and swr/swl to little-endian order
1596         if (!$big_endian && /^\s+[sl]w[lr]\s+/) {
1597             s/([sl]wl.*)([0-9]+)\((\$[0-9]+)\)/
1598                 sprintf("$1%d($3)",eval("$2-$2%4+($2%4-1)&3"))/e        or
1599             s/([sl]wr.*)([0-9]+)\((\$[0-9]+)\)/
1600                 sprintf("$1%d($3)",eval("$2-$2%4+($2%4+1)&3"))/e;
1601         }
1602
1603         print $_,"\n";
1604 }
1605
1606 close STDOUT;