aes-mips.pl: interleave load and integer instructions for better performance.
[openssl.git] / crypto / aes / asm / aes-mips.pl
1 #!/usr/bin/env perl
2
3 # ====================================================================
4 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9
10 # AES for MIPS
11
12 # October 2010
13 #
14 # Code uses 1K[+256B] S-box and on single-issue core [such as R5000]
15 # spends ~68 cycles per byte processed with 128-bit key. This is ~16%
16 # faster than gcc-generated code, which is not very impressive. But
17 # recall that compressed S-box requires extra processing, namely
18 # additional rotations. Rotations are implemented with lwl/lwr pairs,
19 # which is normally used for loading unaligned data. Another cool
20 # thing about this module is its endian neutrality, which means that
21 # it processes data without ever changing byte order...
22
23 ######################################################################
24 # There is a number of MIPS ABI in use, O32 and N32/64 are most
25 # widely used. Then there is a new contender: NUBI. It appears that if
26 # one picks the latter, it's possible to arrange code in ABI neutral
27 # manner. Therefore let's stick to NUBI register layout:
28 #
29 ($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
30 ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
31 ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
32 ($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
33 #
34 # The return value is placed in $a0. Following coding rules facilitate
35 # interoperability:
36 #
37 # - never ever touch $tp, "thread pointer", former $gp;
38 # - copy return value to $t0, former $v0 [or to $a0 if you're adapting
39 #   old code];
40 # - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
41 #
42 # For reference here is register layout for N32/64 MIPS ABIs:
43 #
44 # ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
45 # ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
46 # ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
47 # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
48 # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
49 #
50 $flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
51
52 if ($flavour =~ /64|n32/i) {
53         $PTR_ADD="dadd";        # incidentally works even on n32
54         $PTR_SUB="dsub";        # incidentally works even on n32
55         $REG_S="sd";
56         $REG_L="ld";
57         $PTR_SLL="dsll";        # incidentally works even on n32
58         $SZREG=8;
59 } else {
60         $PTR_ADD="add";
61         $PTR_SUB="sub";
62         $REG_S="sw";
63         $REG_L="lw";
64         $PTR_SLL="sll";
65         $SZREG=4;
66 }
67 $pf = ($flavour =~ /nubi/i) ? $t0 : $t2;
68 #
69 # <appro@openssl.org>
70 #
71 ######################################################################
72
73 $big_endian=(`echo MIPSEL | $ENV{CC} -E -P -`=~/MIPSEL/)?1:0;
74
75 for (@ARGV) {   $output=$_ if (/^\w[\w\-]*\.\w+$/);     }
76 open STDOUT,">$output";
77
78 if (!defined($big_endian))
79 {    $big_endian=(unpack('L',pack('N',1))==1);   }
80
81 while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
82 open STDOUT,">$output";
83
84 my ($MSB,$LSB)=(0,3);   # automatically converted to little-endian
85
86 $code.=<<___;
87 .text
88 #ifdef OPENSSL_FIPSCANISTER
89 # include <openssl/fipssyms.h>
90 #endif
91
92 #if !defined(__vxworks) || defined(__pic__)
93 .option pic2
94 #endif
95 .set    noat
96 ___
97 \f
98 {{{
99 my $FRAMESIZE=16*$SZREG;
100 my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000;
101
102 my ($inp,$out,$key,$Tbl,$s0,$s1,$s2,$s3)=($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7);
103 my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
104 my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$t9,$t10,$t11) = map("\$$_",(12..23));
105 my ($key0,$cnt)=($gp,$fp);
106
107 # instuction ordering is "stolen" from output from MIPSpro assembler
108 # invoked with -mips3 -O3 arguments...
109 $code.=<<___;
110 .align  5
111 .ent    _mips_AES_encrypt
112 _mips_AES_encrypt:
113         .frame  $sp,0,$ra
114         .set    reorder
115         lw      $t0,0($key)
116         lw      $t1,4($key)
117         lw      $t2,8($key)
118         lw      $t3,12($key)
119         lw      $cnt,240($key)
120         $PTR_ADD $key0,$key,16
121
122         xor     $s0,$t0
123         xor     $s1,$t1
124         xor     $s2,$t2
125         xor     $s3,$t3
126
127         sub     $cnt,1
128         _xtr    $i0,$s1,16-2
129 .Loop_enc:
130         _xtr    $i1,$s2,16-2
131         _xtr    $i2,$s3,16-2
132         _xtr    $i3,$s0,16-2
133         and     $i0,0x3fc
134         and     $i1,0x3fc
135         and     $i2,0x3fc
136         and     $i3,0x3fc
137         $PTR_ADD $i0,$Tbl
138         $PTR_ADD $i1,$Tbl
139         $PTR_ADD $i2,$Tbl
140         $PTR_ADD $i3,$Tbl
141         lwl     $t0,3($i0)              # Te1[s1>>16]
142         lwl     $t1,3($i1)              # Te1[s2>>16]
143         lwl     $t2,3($i2)              # Te1[s3>>16]
144         lwl     $t3,3($i3)              # Te1[s0>>16]
145         lwr     $t0,2($i0)              # Te1[s1>>16]
146         _xtr    $i0,$s2,8-2
147         lwr     $t1,2($i1)              # Te1[s2>>16]
148         _xtr    $i1,$s3,8-2
149         lwr     $t2,2($i2)              # Te1[s3>>16]
150         _xtr    $i2,$s0,8-2
151         lwr     $t3,2($i3)              # Te1[s0>>16]
152         _xtr    $i3,$s1,8-2
153
154         and     $i0,0x3fc
155         and     $i1,0x3fc
156         and     $i2,0x3fc
157         and     $i3,0x3fc
158         $PTR_ADD $i0,$Tbl
159         $PTR_ADD $i1,$Tbl
160         $PTR_ADD $i2,$Tbl
161         $PTR_ADD $i3,$Tbl
162         lwl     $t4,2($i0)              # Te2[s2>>8]
163         lwl     $t5,2($i1)              # Te2[s3>>8]
164         lwl     $t6,2($i2)              # Te2[s0>>8]
165         lwl     $t7,2($i3)              # Te2[s1>>8]
166         lwr     $t4,1($i0)              # Te2[s2>>8]
167         _xtr    $i0,$s3,0-2
168         lwr     $t5,1($i1)              # Te2[s3>>8]
169         _xtr    $i1,$s0,0-2
170         lwr     $t6,1($i2)              # Te2[s0>>8]
171         _xtr    $i2,$s1,0-2
172         lwr     $t7,1($i3)              # Te2[s1>>8]
173         _xtr    $i3,$s2,0-2
174
175         and     $i0,0x3fc
176         and     $i1,0x3fc
177         and     $i2,0x3fc
178         and     $i3,0x3fc
179         $PTR_ADD $i0,$Tbl
180         $PTR_ADD $i1,$Tbl
181         $PTR_ADD $i2,$Tbl
182         $PTR_ADD $i3,$Tbl
183         lwl     $t8,1($i0)              # Te3[s3]
184         lwl     $t9,1($i1)              # Te3[s0]
185         lwl     $t10,1($i2)             # Te3[s1]
186         lwl     $t11,1($i3)             # Te3[s2]
187         lwr     $t8,0($i0)              # Te3[s3]
188         _xtr    $i0,$s0,24-2
189         lwr     $t9,0($i1)              # Te3[s0]
190         _xtr    $i1,$s1,24-2
191         lwr     $t10,0($i2)             # Te3[s1]
192         _xtr    $i2,$s2,24-2
193         lwr     $t11,0($i3)             # Te3[s2]
194         _xtr    $i3,$s3,24-2
195
196         and     $i0,0x3fc
197         and     $i1,0x3fc
198         and     $i2,0x3fc
199         and     $i3,0x3fc
200         $PTR_ADD $i0,$Tbl
201         $PTR_ADD $i1,$Tbl
202         $PTR_ADD $i2,$Tbl
203         $PTR_ADD $i3,$Tbl
204         xor     $t0,$t4
205         lw      $t4,0($i0)              # Te0[s0>>24]
206         xor     $t1,$t5
207         lw      $t5,0($i1)              # Te0[s1>>24]
208         xor     $t2,$t6
209         lw      $t6,0($i2)              # Te0[s2>>24]
210         xor     $t3,$t7
211         lw      $t7,0($i3)              # Te0[s3>>24]
212
213         xor     $t0,$t8
214         lw      $s0,0($key0)
215         xor     $t1,$t9
216         lw      $s1,4($key0)
217         xor     $t2,$t10
218         lw      $s2,8($key0)
219         xor     $t3,$t11
220         lw      $s3,12($key0)
221
222         xor     $t0,$t4
223         xor     $t1,$t5
224         xor     $t2,$t6
225         xor     $t3,$t7
226
227         sub     $cnt,1
228         $PTR_ADD $key0,16
229         xor     $s0,$t0
230         xor     $s1,$t1
231         xor     $s2,$t2
232         xor     $s3,$t3
233         .set    noreorder
234         bnez    $cnt,.Loop_enc
235         _xtr    $i0,$s1,16-2
236
237         .set    reorder
238         _xtr    $i1,$s2,16-2
239         _xtr    $i2,$s3,16-2
240         _xtr    $i3,$s0,16-2
241         and     $i0,0x3fc
242         and     $i1,0x3fc
243         and     $i2,0x3fc
244         and     $i3,0x3fc
245         $PTR_ADD $i0,$Tbl
246         $PTR_ADD $i1,$Tbl
247         $PTR_ADD $i2,$Tbl
248         $PTR_ADD $i3,$Tbl
249         lbu     $t0,2($i0)              # Te4[s1>>16]
250         _xtr    $i0,$s2,8-2
251         lbu     $t1,2($i1)              # Te4[s2>>16]
252         _xtr    $i1,$s3,8-2
253         lbu     $t2,2($i2)              # Te4[s3>>16]
254         _xtr    $i2,$s0,8-2
255         lbu     $t3,2($i3)              # Te4[s0>>16]
256         _xtr    $i3,$s1,8-2
257
258         and     $i0,0x3fc
259         and     $i1,0x3fc
260         and     $i2,0x3fc
261         and     $i3,0x3fc
262         $PTR_ADD $i0,$Tbl
263         $PTR_ADD $i1,$Tbl
264         $PTR_ADD $i2,$Tbl
265         $PTR_ADD $i3,$Tbl
266         lbu     $t4,2($i0)              # Te4[s2>>8]
267         _xtr    $i0,$s0,24-2
268         lbu     $t5,2($i1)              # Te4[s3>>8]
269         _xtr    $i1,$s1,24-2
270         lbu     $t6,2($i2)              # Te4[s0>>8]
271         _xtr    $i2,$s2,24-2
272         lbu     $t7,2($i3)              # Te4[s1>>8]
273         _xtr    $i3,$s3,24-2
274
275         and     $i0,0x3fc
276         and     $i1,0x3fc
277         and     $i2,0x3fc
278         and     $i3,0x3fc
279         $PTR_ADD $i0,$Tbl
280         $PTR_ADD $i1,$Tbl
281         $PTR_ADD $i2,$Tbl
282         $PTR_ADD $i3,$Tbl
283         lbu     $t8,2($i0)              # Te4[s0>>24]
284         _xtr    $i0,$s3,0-2
285         lbu     $t9,2($i1)              # Te4[s1>>24]
286         _xtr    $i1,$s0,0-2
287         lbu     $t10,2($i2)             # Te4[s2>>24]
288         _xtr    $i2,$s1,0-2
289         lbu     $t11,2($i3)             # Te4[s3>>24]
290         _xtr    $i3,$s2,0-2
291
292         and     $i0,0x3fc
293         and     $i1,0x3fc
294         and     $i2,0x3fc
295         and     $i3,0x3fc
296         $PTR_ADD $i0,$Tbl
297         $PTR_ADD $i1,$Tbl
298         $PTR_ADD $i2,$Tbl
299         $PTR_ADD $i3,$Tbl
300
301         _ins    $t0,16
302         _ins    $t1,16
303         _ins    $t2,16
304         _ins    $t3,16
305
306         _ins    $t4,8
307         _ins    $t5,8
308         _ins    $t6,8
309         _ins    $t7,8
310
311         xor     $t0,$t4
312         lbu     $t4,2($i0)              # Te4[s3]
313         xor     $t1,$t5
314         lbu     $t5,2($i1)              # Te4[s0]
315         xor     $t2,$t6
316         lbu     $t6,2($i2)              # Te4[s1]
317         xor     $t3,$t7
318         lbu     $t7,2($i3)              # Te4[s2]
319
320         _ins    $t8,24
321         lw      $s0,0($key0)
322         _ins    $t9,24
323         lw      $s1,4($key0)
324         _ins    $t10,24
325         lw      $s2,8($key0)
326         _ins    $t11,24
327         lw      $s3,12($key0)
328
329         xor     $t0,$t8
330         xor     $t1,$t9
331         xor     $t2,$t10
332         xor     $t3,$t11
333
334         _ins    $t4,0
335         _ins    $t5,0
336         _ins    $t6,0
337         _ins    $t7,0
338
339         xor     $t0,$t4
340         xor     $t1,$t5
341         xor     $t2,$t6
342         xor     $t3,$t7
343
344         xor     $s0,$t0
345         xor     $s1,$t1
346         xor     $s2,$t2
347         xor     $s3,$t3
348
349         jr      $ra
350 .end    _mips_AES_encrypt
351
352 .align  5
353 .globl  AES_encrypt
354 .ent    AES_encrypt
355 AES_encrypt:
356         .frame  $sp,$FRAMESIZE,$ra
357         .mask   $SAVED_REGS_MASK,-$SZREG
358         .set    noreorder
359 ___
360 $code.=<<___ if ($flavour =~ /o32/i);   # o32 PIC-ification
361         .cpload $pf
362 ___
363 $code.=<<___;
364         $PTR_SUB $sp,$FRAMESIZE
365         $REG_S  $ra,$FRAMESIZE-1*$SZREG($sp)
366         $REG_S  $fp,$FRAMESIZE-2*$SZREG($sp)
367         $REG_S  $s11,$FRAMESIZE-3*$SZREG($sp)
368         $REG_S  $s10,$FRAMESIZE-4*$SZREG($sp)
369         $REG_S  $s9,$FRAMESIZE-5*$SZREG($sp)
370         $REG_S  $s8,$FRAMESIZE-6*$SZREG($sp)
371         $REG_S  $s7,$FRAMESIZE-7*$SZREG($sp)
372         $REG_S  $s6,$FRAMESIZE-8*$SZREG($sp)
373         $REG_S  $s5,$FRAMESIZE-9*$SZREG($sp)
374         $REG_S  $s4,$FRAMESIZE-10*$SZREG($sp)
375 ___
376 $code.=<<___ if ($flavour =~ /nubi/i);  # optimize non-nubi prologue
377         $REG_S  \$15,$FRAMESIZE-11*$SZREG($sp)
378         $REG_S  \$14,$FRAMESIZE-12*$SZREG($sp)
379         $REG_S  \$13,$FRAMESIZE-13*$SZREG($sp)
380         $REG_S  \$12,$FRAMESIZE-14*$SZREG($sp)
381         $REG_S  $gp,$FRAMESIZE-15*$SZREG($sp)
382 ___
383 $code.=<<___ if ($flavour !~ /o32/i);   # non-o32 PIC-ification
384         .cplocal        $Tbl
385         .cpsetup        $pf,$zero,AES_encrypt
386 ___
387 $code.=<<___;
388         .set    reorder
389         la      $Tbl,AES_Te             # PIC-ified 'load address'
390
391         lwl     $s0,0+$MSB($inp)
392         lwl     $s1,4+$MSB($inp)
393         lwl     $s2,8+$MSB($inp)
394         lwl     $s3,12+$MSB($inp)
395         lwr     $s0,0+$LSB($inp)
396         lwr     $s1,4+$LSB($inp)
397         lwr     $s2,8+$LSB($inp)
398         lwr     $s3,12+$LSB($inp)
399
400         bal     _mips_AES_encrypt
401
402         swr     $s0,0+$LSB($out)
403         swr     $s1,4+$LSB($out)
404         swr     $s2,8+$LSB($out)
405         swr     $s3,12+$LSB($out)
406         swl     $s0,0+$MSB($out)
407         swl     $s1,4+$MSB($out)
408         swl     $s2,8+$MSB($out)
409         swl     $s3,12+$MSB($out)
410
411         .set    noreorder
412         $REG_L  $ra,$FRAMESIZE-1*$SZREG($sp)
413         $REG_L  $fp,$FRAMESIZE-2*$SZREG($sp)
414         $REG_L  $s11,$FRAMESIZE-3*$SZREG($sp)
415         $REG_L  $s10,$FRAMESIZE-4*$SZREG($sp)
416         $REG_L  $s9,$FRAMESIZE-5*$SZREG($sp)
417         $REG_L  $s8,$FRAMESIZE-6*$SZREG($sp)
418         $REG_L  $s7,$FRAMESIZE-7*$SZREG($sp)
419         $REG_L  $s6,$FRAMESIZE-8*$SZREG($sp)
420         $REG_L  $s5,$FRAMESIZE-9*$SZREG($sp)
421         $REG_L  $s4,$FRAMESIZE-10*$SZREG($sp)
422 ___
423 $code.=<<___ if ($flavour =~ /nubi/i);
424         $REG_L  \$15,$FRAMESIZE-11*$SZREG($sp)
425         $REG_L  \$14,$FRAMESIZE-12*$SZREG($sp)
426         $REG_L  \$13,$FRAMESIZE-13*$SZREG($sp)
427         $REG_L  \$12,$FRAMESIZE-14*$SZREG($sp)
428         $REG_L  $gp,$FRAMESIZE-15*$SZREG($sp)
429 ___
430 $code.=<<___;
431         jr      $ra
432         $PTR_ADD $sp,$FRAMESIZE
433 .end    AES_encrypt
434 ___
435 \f
436 $code.=<<___;
437 .align  5
438 .ent    _mips_AES_decrypt
439 _mips_AES_decrypt:
440         .frame  $sp,0,$ra
441         .set    reorder
442         lw      $t0,0($key)
443         lw      $t1,4($key)
444         lw      $t2,8($key)
445         lw      $t3,12($key)
446         lw      $cnt,240($key)
447         $PTR_ADD $key0,$key,16
448
449         xor     $s0,$t0
450         xor     $s1,$t1
451         xor     $s2,$t2
452         xor     $s3,$t3
453
454         sub     $cnt,1
455         _xtr    $i0,$s3,16-2
456 .Loop_dec:
457         _xtr    $i1,$s0,16-2
458         _xtr    $i2,$s1,16-2
459         _xtr    $i3,$s2,16-2
460         and     $i0,0x3fc
461         and     $i1,0x3fc
462         and     $i2,0x3fc
463         and     $i3,0x3fc
464         $PTR_ADD $i0,$Tbl
465         $PTR_ADD $i1,$Tbl
466         $PTR_ADD $i2,$Tbl
467         $PTR_ADD $i3,$Tbl
468         lwl     $t0,3($i0)              # Td1[s3>>16]
469         lwl     $t1,3($i1)              # Td1[s0>>16]
470         lwl     $t2,3($i2)              # Td1[s1>>16]
471         lwl     $t3,3($i3)              # Td1[s2>>16]
472         lwr     $t0,2($i0)              # Td1[s3>>16]
473         _xtr    $i0,$s2,8-2
474         lwr     $t1,2($i1)              # Td1[s0>>16]
475         _xtr    $i1,$s3,8-2
476         lwr     $t2,2($i2)              # Td1[s1>>16]
477         _xtr    $i2,$s0,8-2
478         lwr     $t3,2($i3)              # Td1[s2>>16]
479         _xtr    $i3,$s1,8-2
480
481         and     $i0,0x3fc
482         and     $i1,0x3fc
483         and     $i2,0x3fc
484         and     $i3,0x3fc
485         $PTR_ADD $i0,$Tbl
486         $PTR_ADD $i1,$Tbl
487         $PTR_ADD $i2,$Tbl
488         $PTR_ADD $i3,$Tbl
489         lwl     $t4,2($i0)              # Td2[s2>>8]
490         lwl     $t5,2($i1)              # Td2[s3>>8]
491         lwl     $t6,2($i2)              # Td2[s0>>8]
492         lwl     $t7,2($i3)              # Td2[s1>>8]
493         lwr     $t4,1($i0)              # Td2[s2>>8]
494         _xtr    $i0,$s1,0-2
495         lwr     $t5,1($i1)              # Td2[s3>>8]
496         _xtr    $i1,$s2,0-2
497         lwr     $t6,1($i2)              # Td2[s0>>8]
498         _xtr    $i2,$s3,0-2
499         lwr     $t7,1($i3)              # Td2[s1>>8]
500         _xtr    $i3,$s0,0-2
501
502         and     $i0,0x3fc
503         and     $i1,0x3fc
504         and     $i2,0x3fc
505         and     $i3,0x3fc
506         $PTR_ADD $i0,$Tbl
507         $PTR_ADD $i1,$Tbl
508         $PTR_ADD $i2,$Tbl
509         $PTR_ADD $i3,$Tbl
510         lwl     $t8,1($i0)              # Td3[s1]
511         lwl     $t9,1($i1)              # Td3[s2]
512         lwl     $t10,1($i2)             # Td3[s3]
513         lwl     $t11,1($i3)             # Td3[s0]
514         lwr     $t8,0($i0)              # Td3[s1]
515         _xtr    $i0,$s0,24-2
516         lwr     $t9,0($i1)              # Td3[s2]
517         _xtr    $i1,$s1,24-2
518         lwr     $t10,0($i2)             # Td3[s3]
519         _xtr    $i2,$s2,24-2
520         lwr     $t11,0($i3)             # Td3[s0]
521         _xtr    $i3,$s3,24-2
522
523         and     $i0,0x3fc
524         and     $i1,0x3fc
525         and     $i2,0x3fc
526         and     $i3,0x3fc
527         $PTR_ADD $i0,$Tbl
528         $PTR_ADD $i1,$Tbl
529         $PTR_ADD $i2,$Tbl
530         $PTR_ADD $i3,$Tbl
531
532         xor     $t0,$t4
533         lw      $t4,0($i0)              # Td0[s0>>24]
534         xor     $t1,$t5
535         lw      $t5,0($i1)              # Td0[s1>>24]
536         xor     $t2,$t6
537         lw      $t6,0($i2)              # Td0[s2>>24]
538         xor     $t3,$t7
539         lw      $t7,0($i3)              # Td0[s3>>24]
540
541         xor     $t0,$t8
542         lw      $s0,0($key0)
543         xor     $t1,$t9
544         lw      $s1,4($key0)
545         xor     $t2,$t10
546         lw      $s2,8($key0)
547         xor     $t3,$t11
548         lw      $s3,12($key0)
549
550         xor     $t0,$t4
551         xor     $t1,$t5
552         xor     $t2,$t6
553         xor     $t3,$t7
554
555         sub     $cnt,1
556         $PTR_ADD $key0,16
557         xor     $s0,$t0
558         xor     $s1,$t1
559         xor     $s2,$t2
560         xor     $s3,$t3
561         .set    noreorder
562         bnez    $cnt,.Loop_dec
563         _xtr    $i0,$s3,16-2
564
565         .set    reorder
566         lw      $t4,1024($Tbl)          # prefetch Td4
567         _xtr    $i0,$s3,16
568         lw      $t5,1024+32($Tbl)
569         _xtr    $i1,$s0,16
570         lw      $t6,1024+64($Tbl)
571         _xtr    $i2,$s1,16
572         lw      $t7,1024+96($Tbl)
573         _xtr    $i3,$s2,16
574         lw      $t8,1024+128($Tbl)
575         and     $i0,0xff
576         lw      $t9,1024+160($Tbl)
577         and     $i1,0xff
578         lw      $t10,1024+192($Tbl)
579         and     $i2,0xff
580         lw      $t11,1024+224($Tbl)
581         and     $i3,0xff
582
583         $PTR_ADD $i0,$Tbl
584         $PTR_ADD $i1,$Tbl
585         $PTR_ADD $i2,$Tbl
586         $PTR_ADD $i3,$Tbl
587         lbu     $t0,1024($i0)           # Td4[s3>>16]
588         _xtr    $i0,$s2,8
589         lbu     $t1,1024($i1)           # Td4[s0>>16]
590         _xtr    $i1,$s3,8
591         lbu     $t2,1024($i2)           # Td4[s1>>16]
592         _xtr    $i2,$s0,8
593         lbu     $t3,1024($i3)           # Td4[s2>>16]
594         _xtr    $i3,$s1,8
595
596         and     $i0,0xff
597         and     $i1,0xff
598         and     $i2,0xff
599         and     $i3,0xff
600         $PTR_ADD $i0,$Tbl
601         $PTR_ADD $i1,$Tbl
602         $PTR_ADD $i2,$Tbl
603         $PTR_ADD $i3,$Tbl
604         lbu     $t4,1024($i0)           # Td4[s2>>8]
605         _xtr    $i0,$s0,24
606         lbu     $t5,1024($i1)           # Td4[s3>>8]
607         _xtr    $i1,$s1,24
608         lbu     $t6,1024($i2)           # Td4[s0>>8]
609         _xtr    $i2,$s2,24
610         lbu     $t7,1024($i3)           # Td4[s1>>8]
611         _xtr    $i3,$s3,24
612
613         $PTR_ADD $i0,$Tbl
614         $PTR_ADD $i1,$Tbl
615         $PTR_ADD $i2,$Tbl
616         $PTR_ADD $i3,$Tbl
617         lbu     $t8,1024($i0)           # Td4[s0>>24]
618         _xtr    $i0,$s1,0
619         lbu     $t9,1024($i1)           # Td4[s1>>24]
620         _xtr    $i1,$s2,0
621         lbu     $t10,1024($i2)          # Td4[s2>>24]
622         _xtr    $i2,$s3,0
623         lbu     $t11,1024($i3)          # Td4[s3>>24]
624         _xtr    $i3,$s0,0
625
626         $PTR_ADD $i0,$Tbl
627         $PTR_ADD $i1,$Tbl
628         $PTR_ADD $i2,$Tbl
629         $PTR_ADD $i3,$Tbl
630
631         _ins    $t0,16
632         _ins    $t1,16
633         _ins    $t2,16
634         _ins    $t3,16
635
636         _ins    $t4,8
637         _ins    $t5,8
638         _ins    $t6,8
639         _ins    $t7,8
640
641         xor     $t0,$t4
642         lbu     $t4,1024($i0)           # Td4[s1]
643         xor     $t1,$t5
644         lbu     $t5,1024($i1)           # Td4[s2]
645         xor     $t2,$t6
646         lbu     $t6,1024($i2)           # Td4[s3]
647         xor     $t3,$t7
648         lbu     $t7,1024($i3)           # Td4[s0]
649
650         _ins    $t8,24
651         lw      $s0,0($key0)
652         _ins    $t9,24
653         lw      $s1,4($key0)
654         _ins    $t10,24
655         lw      $s2,8($key0)
656         _ins    $t11,24
657         lw      $s3,12($key0)
658
659         xor     $t0,$t8
660         xor     $t1,$t9
661         xor     $t2,$t10
662         xor     $t3,$t11
663
664         _ins    $t4,0
665         _ins    $t5,0
666         _ins    $t6,0
667         _ins    $t7,0
668
669         xor     $t0,$t4
670         xor     $t1,$t5
671         xor     $t2,$t6
672         xor     $t3,$t7
673
674         xor     $s0,$t0
675         xor     $s1,$t1
676         xor     $s2,$t2
677         xor     $s3,$t3
678
679         jr      $ra
680 .end    _mips_AES_decrypt
681
682 .align  5
683 .globl  AES_decrypt
684 .ent    AES_decrypt
685 AES_decrypt:
686         .frame  $sp,$FRAMESIZE,$ra
687         .mask   $SAVED_REGS_MASK,-$SZREG
688         .set    noreorder
689 ___
690 $code.=<<___ if ($flavour =~ /o32/i);   # o32 PIC-ification
691         .cpload $pf
692 ___
693 $code.=<<___;
694         $PTR_SUB $sp,$FRAMESIZE
695         $REG_S  $ra,$FRAMESIZE-1*$SZREG($sp)
696         $REG_S  $fp,$FRAMESIZE-2*$SZREG($sp)
697         $REG_S  $s11,$FRAMESIZE-3*$SZREG($sp)
698         $REG_S  $s10,$FRAMESIZE-4*$SZREG($sp)
699         $REG_S  $s9,$FRAMESIZE-5*$SZREG($sp)
700         $REG_S  $s8,$FRAMESIZE-6*$SZREG($sp)
701         $REG_S  $s7,$FRAMESIZE-7*$SZREG($sp)
702         $REG_S  $s6,$FRAMESIZE-8*$SZREG($sp)
703         $REG_S  $s5,$FRAMESIZE-9*$SZREG($sp)
704         $REG_S  $s4,$FRAMESIZE-10*$SZREG($sp)
705 ___
706 $code.=<<___ if ($flavour =~ /nubi/i);  # optimize non-nubi prologue
707         $REG_S  \$15,$FRAMESIZE-11*$SZREG($sp)
708         $REG_S  \$14,$FRAMESIZE-12*$SZREG($sp)
709         $REG_S  \$13,$FRAMESIZE-13*$SZREG($sp)
710         $REG_S  \$12,$FRAMESIZE-14*$SZREG($sp)
711         $REG_S  $gp,$FRAMESIZE-15*$SZREG($sp)
712 ___
713 $code.=<<___ if ($flavour !~ /o32/i);   # non-o32 PIC-ification
714         .cplocal        $Tbl
715         .cpsetup        $pf,$zero,AES_decrypt
716 ___
717 $code.=<<___;
718         .set    reorder
719         la      $Tbl,AES_Td             # PIC-ified 'load address'
720
721         lwl     $s0,0+$MSB($inp)
722         lwl     $s1,4+$MSB($inp)
723         lwl     $s2,8+$MSB($inp)
724         lwl     $s3,12+$MSB($inp)
725         lwr     $s0,0+$LSB($inp)
726         lwr     $s1,4+$LSB($inp)
727         lwr     $s2,8+$LSB($inp)
728         lwr     $s3,12+$LSB($inp)
729
730         bal     _mips_AES_decrypt
731
732         swr     $s0,0+$LSB($out)
733         swr     $s1,4+$LSB($out)
734         swr     $s2,8+$LSB($out)
735         swr     $s3,12+$LSB($out)
736         swl     $s0,0+$MSB($out)
737         swl     $s1,4+$MSB($out)
738         swl     $s2,8+$MSB($out)
739         swl     $s3,12+$MSB($out)
740
741         .set    noreorder
742         $REG_L  $ra,$FRAMESIZE-1*$SZREG($sp)
743         $REG_L  $fp,$FRAMESIZE-2*$SZREG($sp)
744         $REG_L  $s11,$FRAMESIZE-3*$SZREG($sp)
745         $REG_L  $s10,$FRAMESIZE-4*$SZREG($sp)
746         $REG_L  $s9,$FRAMESIZE-5*$SZREG($sp)
747         $REG_L  $s8,$FRAMESIZE-6*$SZREG($sp)
748         $REG_L  $s7,$FRAMESIZE-7*$SZREG($sp)
749         $REG_L  $s6,$FRAMESIZE-8*$SZREG($sp)
750         $REG_L  $s5,$FRAMESIZE-9*$SZREG($sp)
751         $REG_L  $s4,$FRAMESIZE-10*$SZREG($sp)
752 ___
753 $code.=<<___ if ($flavour =~ /nubi/i);
754         $REG_L  \$15,$FRAMESIZE-11*$SZREG($sp)
755         $REG_L  \$14,$FRAMESIZE-12*$SZREG($sp)
756         $REG_L  \$13,$FRAMESIZE-13*$SZREG($sp)
757         $REG_L  \$12,$FRAMESIZE-14*$SZREG($sp)
758         $REG_L  $gp,$FRAMESIZE-15*$SZREG($sp)
759 ___
760 $code.=<<___;
761         jr      $ra
762         $PTR_ADD $sp,$FRAMESIZE
763 .end    AES_decrypt
764 ___
765 }}}
766 \f
767 {{{
768 my $FRAMESIZE=8*$SZREG;
769 my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc000f008 : 0xc0000000;
770
771 my ($inp,$bits,$key,$Tbl)=($a0,$a1,$a2,$a3);
772 my ($rk0,$rk1,$rk2,$rk3,$rk4,$rk5,$rk6,$rk7)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
773 my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
774 my ($rcon,$cnt)=($gp,$fp);
775
776 $code.=<<___;
777 .align  5
778 .ent    _mips_AES_set_encrypt_key
779 _mips_AES_set_encrypt_key:
780         .frame  $sp,0,$ra
781         .set    noreorder
782         beqz    $inp,.Lekey_done
783         li      $t0,-1
784         beqz    $key,.Lekey_done
785         $PTR_ADD $rcon,$Tbl,1024+256
786
787         .set    reorder
788         lwl     $rk0,0+$MSB($inp)       # load 128 bits
789         lwl     $rk1,4+$MSB($inp)
790         lwl     $rk2,8+$MSB($inp)
791         lwl     $rk3,12+$MSB($inp)
792         li      $at,128
793         lwr     $rk0,0+$LSB($inp)
794         lwr     $rk1,4+$LSB($inp)
795         lwr     $rk2,8+$LSB($inp)
796         lwr     $rk3,12+$LSB($inp)
797         .set    noreorder
798         beq     $bits,$at,.L128bits
799         li      $cnt,10
800
801         .set    reorder
802         lwl     $rk4,16+$MSB($inp)      # load 192 bits
803         lwl     $rk5,20+$MSB($inp)
804         li      $at,192
805         lwr     $rk4,16+$LSB($inp)
806         lwr     $rk5,20+$LSB($inp)
807         .set    noreorder
808         beq     $bits,$at,.L192bits
809         li      $cnt,8
810
811         .set    reorder
812         lwl     $rk6,24+$MSB($inp)      # load 256 bits
813         lwl     $rk7,28+$MSB($inp)
814         li      $at,256
815         lwr     $rk6,24+$LSB($inp)
816         lwr     $rk7,28+$LSB($inp)
817         .set    noreorder
818         beq     $bits,$at,.L256bits
819         li      $cnt,7
820
821         b       .Lekey_done
822         li      $t0,-2
823
824 .align  4
825 .L128bits:
826         .set    reorder
827         srl     $i0,$rk3,16
828         srl     $i1,$rk3,8
829         and     $i0,0xff
830         and     $i1,0xff
831         and     $i2,$rk3,0xff
832         srl     $i3,$rk3,24
833         $PTR_ADD $i0,$Tbl
834         $PTR_ADD $i1,$Tbl
835         $PTR_ADD $i2,$Tbl
836         $PTR_ADD $i3,$Tbl
837         lbu     $i0,1024($i0)
838         lbu     $i1,1024($i1)
839         lbu     $i2,1024($i2)
840         lbu     $i3,1024($i3)
841
842         sw      $rk0,0($key)
843         sw      $rk1,4($key)
844         sw      $rk2,8($key)
845         sw      $rk3,12($key)
846         sub     $cnt,1
847         $PTR_ADD $key,16
848
849         _bias   $i0,24
850         _bias   $i1,16
851         _bias   $i2,8
852         _bias   $i3,0
853
854         xor     $rk0,$i0
855         lw      $i0,0($rcon)
856         xor     $rk0,$i1
857         xor     $rk0,$i2
858         xor     $rk0,$i3
859         xor     $rk0,$i0
860
861         xor     $rk1,$rk0
862         xor     $rk2,$rk1
863         xor     $rk3,$rk2
864
865         .set    noreorder
866         bnez    $cnt,.L128bits
867         $PTR_ADD $rcon,4
868
869         sw      $rk0,0($key)
870         sw      $rk1,4($key)
871         sw      $rk2,8($key)
872         li      $cnt,10
873         sw      $rk3,12($key)
874         li      $t0,0
875         sw      $cnt,80($key)
876         b       .Lekey_done
877         $PTR_SUB $key,10*16
878
879 .align  4
880 .L192bits:
881         .set    reorder
882         srl     $i0,$rk5,16
883         srl     $i1,$rk5,8
884         and     $i0,0xff
885         and     $i1,0xff
886         and     $i2,$rk5,0xff
887         srl     $i3,$rk5,24
888         $PTR_ADD $i0,$Tbl
889         $PTR_ADD $i1,$Tbl
890         $PTR_ADD $i2,$Tbl
891         $PTR_ADD $i3,$Tbl
892         lbu     $i0,1024($i0)
893         lbu     $i1,1024($i1)
894         lbu     $i2,1024($i2)
895         lbu     $i3,1024($i3)
896
897         sw      $rk0,0($key)
898         sw      $rk1,4($key)
899         sw      $rk2,8($key)
900         sw      $rk3,12($key)
901         sw      $rk4,16($key)
902         sw      $rk5,20($key)
903         sub     $cnt,1
904         $PTR_ADD $key,24
905
906         _bias   $i0,24
907         _bias   $i1,16
908         _bias   $i2,8
909         _bias   $i3,0
910
911         xor     $rk0,$i0
912         lw      $i0,0($rcon)
913         xor     $rk0,$i1
914         xor     $rk0,$i2
915         xor     $rk0,$i3
916         xor     $rk0,$i0
917
918         xor     $rk1,$rk0
919         xor     $rk2,$rk1
920         xor     $rk3,$rk2
921         xor     $rk4,$rk3
922         xor     $rk5,$rk4
923
924         .set    noreorder
925         bnez    $cnt,.L192bits
926         $PTR_ADD $rcon,4
927
928         sw      $rk0,0($key)
929         sw      $rk1,4($key)
930         sw      $rk2,8($key)
931         li      $cnt,12
932         sw      $rk3,12($key)
933         li      $t0,0
934         sw      $cnt,48($key)
935         b       .Lekey_done
936         $PTR_SUB $key,12*16
937
938 .align  4
939 .L256bits:
940         .set    reorder
941         srl     $i0,$rk7,16
942         srl     $i1,$rk7,8
943         and     $i0,0xff
944         and     $i1,0xff
945         and     $i2,$rk7,0xff
946         srl     $i3,$rk7,24
947         $PTR_ADD $i0,$Tbl
948         $PTR_ADD $i1,$Tbl
949         $PTR_ADD $i2,$Tbl
950         $PTR_ADD $i3,$Tbl
951         lbu     $i0,1024($i0)
952         lbu     $i1,1024($i1)
953         lbu     $i2,1024($i2)
954         lbu     $i3,1024($i3)
955
956         sw      $rk0,0($key)
957         sw      $rk1,4($key)
958         sw      $rk2,8($key)
959         sw      $rk3,12($key)
960         sw      $rk4,16($key)
961         sw      $rk5,20($key)
962         sw      $rk6,24($key)
963         sw      $rk7,28($key)
964         sub     $cnt,1
965
966         _bias   $i0,24
967         _bias   $i1,16
968         _bias   $i2,8
969         _bias   $i3,0
970
971         xor     $rk0,$i0
972         lw      $i0,0($rcon)
973         xor     $rk0,$i1
974         xor     $rk0,$i2
975         xor     $rk0,$i3
976         xor     $rk0,$i0
977
978         xor     $rk1,$rk0
979         xor     $rk2,$rk1
980         xor     $rk3,$rk2
981         beqz    $cnt,.L256bits_done
982
983         srl     $i0,$rk3,24
984         srl     $i1,$rk3,16
985         srl     $i2,$rk3,8
986         and     $i3,$rk3,0xff
987         and     $i1,0xff
988         and     $i2,0xff
989         $PTR_ADD $i0,$Tbl
990         $PTR_ADD $i1,$Tbl
991         $PTR_ADD $i2,$Tbl
992         $PTR_ADD $i3,$Tbl
993         lbu     $i0,1024($i0)
994         lbu     $i1,1024($i1)
995         lbu     $i2,1024($i2)
996         lbu     $i3,1024($i3)
997         sll     $i0,24
998         sll     $i1,16
999         sll     $i2,8
1000
1001         xor     $rk4,$i0
1002         xor     $rk4,$i1
1003         xor     $rk4,$i2
1004         xor     $rk4,$i3
1005
1006         xor     $rk5,$rk4
1007         xor     $rk6,$rk5
1008         xor     $rk7,$rk6
1009
1010         $PTR_ADD $key,32
1011         .set    noreorder
1012         b       .L256bits
1013         $PTR_ADD $rcon,4
1014
1015 .L256bits_done:
1016         sw      $rk0,32($key)
1017         sw      $rk1,36($key)
1018         sw      $rk2,40($key)
1019         li      $cnt,14
1020         sw      $rk3,44($key)
1021         li      $t0,0
1022         sw      $cnt,48($key)
1023         $PTR_SUB $key,12*16
1024
1025 .Lekey_done:
1026         jr      $ra
1027         nop
1028 .end    _mips_AES_set_encrypt_key
1029
1030 .globl  AES_set_encrypt_key
1031 .ent    AES_set_encrypt_key
1032 AES_set_encrypt_key:
1033         .frame  $sp,$FRAMESIZE,$ra
1034         .mask   $SAVED_REGS_MASK,-$SZREG
1035         .set    noreorder
1036 ___
1037 $code.=<<___ if ($flavour =~ /o32/i);   # o32 PIC-ification
1038         .cpload $pf
1039 ___
1040 $code.=<<___;
1041         $PTR_SUB $sp,$FRAMESIZE
1042         $REG_S  $ra,$FRAMESIZE-1*$SZREG($sp)
1043         $REG_S  $fp,$FRAMESIZE-2*$SZREG($sp)
1044 ___
1045 $code.=<<___ if ($flavour =~ /nubi/i);  # optimize non-nubi prologue
1046         $REG_S  $s3,$FRAMESIZE-3*$SZREG($sp)
1047         $REG_S  $s2,$FRAMESIZE-4*$SZREG($sp)
1048         $REG_S  $s1,$FRAMESIZE-5*$SZREG($sp)
1049         $REG_S  $s0,$FRAMESIZE-6*$SZREG($sp)
1050         $REG_S  $gp,$FRAMESIZE-7*$SZREG($sp)
1051 ___
1052 $code.=<<___ if ($flavour !~ /o32/i);   # non-o32 PIC-ification
1053         .cplocal        $Tbl
1054         .cpsetup        $pf,$zero,AES_set_encrypt_key
1055 ___
1056 $code.=<<___;
1057         .set    reorder
1058         la      $Tbl,AES_Te             # PIC-ified 'load address'
1059
1060         bal     _mips_AES_set_encrypt_key
1061
1062         .set    noreorder
1063         move    $a0,$t0
1064         $REG_L  $ra,$FRAMESIZE-1*$SZREG($sp)
1065         $REG_L  $fp,$FRAMESIZE-2*$SZREG($sp)
1066 ___
1067 $code.=<<___ if ($flavour =~ /nubi/i);
1068         $REG_L  $s3,$FRAMESIZE-11*$SZREG($sp)
1069         $REG_L  $s2,$FRAMESIZE-12*$SZREG($sp)
1070         $REG_L  $s1,$FRAMESIZE-13*$SZREG($sp)
1071         $REG_L  $s0,$FRAMESIZE-14*$SZREG($sp)
1072         $REG_L  $gp,$FRAMESIZE-15*$SZREG($sp)
1073 ___
1074 $code.=<<___;
1075         jr      $ra
1076         $PTR_ADD $sp,$FRAMESIZE
1077 .end    AES_set_encrypt_key
1078 ___
1079 \f
1080 my ($head,$tail)=($inp,$bits);
1081 my ($tp1,$tp2,$tp4,$tp8,$tp9,$tpb,$tpd,$tpe)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
1082 my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2);
1083 $code.=<<___;
1084 .align  5
1085 .globl  AES_set_decrypt_key
1086 .ent    AES_set_decrypt_key
1087 AES_set_decrypt_key:
1088         .frame  $sp,$FRAMESIZE,$ra
1089         .mask   $SAVED_REGS_MASK,-$SZREG
1090         .set    noreorder
1091 ___
1092 $code.=<<___ if ($flavour =~ /o32/i);   # o32 PIC-ification
1093         .cpload $pf
1094 ___
1095 $code.=<<___;
1096         $PTR_SUB $sp,$FRAMESIZE
1097         $REG_S  $ra,$FRAMESIZE-1*$SZREG($sp)
1098         $REG_S  $fp,$FRAMESIZE-2*$SZREG($sp)
1099 ___
1100 $code.=<<___ if ($flavour =~ /nubi/i);  # optimize non-nubi prologue
1101         $REG_S  $s3,$FRAMESIZE-3*$SZREG($sp)
1102         $REG_S  $s2,$FRAMESIZE-4*$SZREG($sp)
1103         $REG_S  $s1,$FRAMESIZE-5*$SZREG($sp)
1104         $REG_S  $s0,$FRAMESIZE-6*$SZREG($sp)
1105         $REG_S  $gp,$FRAMESIZE-7*$SZREG($sp)
1106 ___
1107 $code.=<<___ if ($flavour !~ /o32/i);   # non-o32 PIC-ification
1108         .cplocal        $Tbl
1109         .cpsetup        $pf,$zero,AES_set_decrypt_key
1110 ___
1111 $code.=<<___;
1112         .set    reorder
1113         la      $Tbl,AES_Te             # PIC-ified 'load address'
1114
1115         bal     _mips_AES_set_encrypt_key
1116
1117         bltz    $t0,.Ldkey_done
1118
1119         sll     $at,$cnt,4
1120         $PTR_ADD $head,$key,0
1121         $PTR_ADD $tail,$key,$at
1122 .align  4
1123 .Lswap:
1124         lw      $rk0,0($head)
1125         lw      $rk1,4($head)
1126         lw      $rk2,8($head)
1127         lw      $rk3,12($head)
1128         lw      $rk4,0($tail)
1129         lw      $rk5,4($tail)
1130         lw      $rk6,8($tail)
1131         lw      $rk7,12($tail)
1132         sw      $rk0,0($tail)
1133         sw      $rk1,4($tail)
1134         sw      $rk2,8($tail)
1135         sw      $rk3,12($tail)
1136         $PTR_ADD $head,16
1137         $PTR_SUB $tail,16
1138         sw      $rk4,-16($head)
1139         sw      $rk5,-12($head)
1140         sw      $rk6,-8($head)
1141         sw      $rk7,-4($head)
1142         bne     $head,$tail,.Lswap
1143
1144         lw      $tp1,16($key)           # modulo-scheduled
1145         lui     $x80808080,0x8080
1146         sub     $cnt,1
1147         or      $x80808080,0x8080
1148         sll     $cnt,2
1149         $PTR_ADD $key,16
1150         lui     $x1b1b1b1b,0x1b1b
1151         nor     $x7f7f7f7f,$zero,$x80808080
1152         or      $x1b1b1b1b,0x1b1b
1153 .align  4
1154 .Lmix:
1155         and     $m,$tp1,$x80808080
1156         and     $tp2,$tp1,$x7f7f7f7f
1157         srl     $tp4,$m,7
1158         addu    $tp2,$tp2               # tp2<<1
1159         subu    $m,$tp4
1160         and     $m,$x1b1b1b1b
1161         xor     $tp2,$m
1162
1163         and     $m,$tp2,$x80808080
1164         and     $tp4,$tp2,$x7f7f7f7f
1165         srl     $tp8,$m,7
1166         addu    $tp4,$tp4               # tp4<<1
1167         subu    $m,$tp8
1168         and     $m,$x1b1b1b1b
1169         xor     $tp4,$m
1170
1171         and     $m,$tp4,$x80808080
1172         and     $tp8,$tp4,$x7f7f7f7f
1173         srl     $tp9,$m,7
1174         addu    $tp8,$tp8               # tp8<<1
1175         subu    $m,$tp9
1176         and     $m,$x1b1b1b1b
1177         xor     $tp8,$m
1178
1179         xor     $tp9,$tp8,$tp1
1180         xor     $tpe,$tp8,$tp4
1181         xor     $tpb,$tp9,$tp2
1182         xor     $tpd,$tp9,$tp4
1183
1184         _ror    $tp1,$tpd,16
1185          xor    $tpe,$tp2
1186         _ror    $tp2,$tpd,-16
1187         xor     $tpe,$tp1
1188         _ror    $tp1,$tp9,8
1189         xor     $tpe,$tp2
1190         _ror    $tp2,$tp9,-24
1191         xor     $tpe,$tp1
1192         _ror    $tp1,$tpb,24
1193         xor     $tpe,$tp2
1194         _ror    $tp2,$tpb,-8
1195         xor     $tpe,$tp1
1196         lw      $tp1,4($key)            # modulo-scheduled
1197         xor     $tpe,$tp2
1198         sub     $cnt,1
1199         sw      $tpe,0($key)
1200         $PTR_ADD $key,4
1201         bnez    $cnt,.Lmix
1202
1203         li      $t0,0
1204 .Ldkey_done:
1205         .set    noreorder
1206         move    $a0,$t0
1207         $REG_L  $ra,$FRAMESIZE-1*$SZREG($sp)
1208         $REG_L  $fp,$FRAMESIZE-2*$SZREG($sp)
1209 ___
1210 $code.=<<___ if ($flavour =~ /nubi/i);
1211         $REG_L  $s3,$FRAMESIZE-11*$SZREG($sp)
1212         $REG_L  $s2,$FRAMESIZE-12*$SZREG($sp)
1213         $REG_L  $s1,$FRAMESIZE-13*$SZREG($sp)
1214         $REG_L  $s0,$FRAMESIZE-14*$SZREG($sp)
1215         $REG_L  $gp,$FRAMESIZE-15*$SZREG($sp)
1216 ___
1217 $code.=<<___;
1218         jr      $ra
1219         $PTR_ADD $sp,$FRAMESIZE
1220 .end    AES_set_decrypt_key
1221 ___
1222 }}}
1223
1224 ######################################################################
1225 # Tables are kept in endian-neutral manner
1226 $code.=<<___;
1227 .rdata
1228 .align  6
1229 AES_Te:
1230 .byte   0xc6,0x63,0x63,0xa5,    0xf8,0x7c,0x7c,0x84     # Te0
1231 .byte   0xee,0x77,0x77,0x99,    0xf6,0x7b,0x7b,0x8d
1232 .byte   0xff,0xf2,0xf2,0x0d,    0xd6,0x6b,0x6b,0xbd
1233 .byte   0xde,0x6f,0x6f,0xb1,    0x91,0xc5,0xc5,0x54
1234 .byte   0x60,0x30,0x30,0x50,    0x02,0x01,0x01,0x03
1235 .byte   0xce,0x67,0x67,0xa9,    0x56,0x2b,0x2b,0x7d
1236 .byte   0xe7,0xfe,0xfe,0x19,    0xb5,0xd7,0xd7,0x62
1237 .byte   0x4d,0xab,0xab,0xe6,    0xec,0x76,0x76,0x9a
1238 .byte   0x8f,0xca,0xca,0x45,    0x1f,0x82,0x82,0x9d
1239 .byte   0x89,0xc9,0xc9,0x40,    0xfa,0x7d,0x7d,0x87
1240 .byte   0xef,0xfa,0xfa,0x15,    0xb2,0x59,0x59,0xeb
1241 .byte   0x8e,0x47,0x47,0xc9,    0xfb,0xf0,0xf0,0x0b
1242 .byte   0x41,0xad,0xad,0xec,    0xb3,0xd4,0xd4,0x67
1243 .byte   0x5f,0xa2,0xa2,0xfd,    0x45,0xaf,0xaf,0xea
1244 .byte   0x23,0x9c,0x9c,0xbf,    0x53,0xa4,0xa4,0xf7
1245 .byte   0xe4,0x72,0x72,0x96,    0x9b,0xc0,0xc0,0x5b
1246 .byte   0x75,0xb7,0xb7,0xc2,    0xe1,0xfd,0xfd,0x1c
1247 .byte   0x3d,0x93,0x93,0xae,    0x4c,0x26,0x26,0x6a
1248 .byte   0x6c,0x36,0x36,0x5a,    0x7e,0x3f,0x3f,0x41
1249 .byte   0xf5,0xf7,0xf7,0x02,    0x83,0xcc,0xcc,0x4f
1250 .byte   0x68,0x34,0x34,0x5c,    0x51,0xa5,0xa5,0xf4
1251 .byte   0xd1,0xe5,0xe5,0x34,    0xf9,0xf1,0xf1,0x08
1252 .byte   0xe2,0x71,0x71,0x93,    0xab,0xd8,0xd8,0x73
1253 .byte   0x62,0x31,0x31,0x53,    0x2a,0x15,0x15,0x3f
1254 .byte   0x08,0x04,0x04,0x0c,    0x95,0xc7,0xc7,0x52
1255 .byte   0x46,0x23,0x23,0x65,    0x9d,0xc3,0xc3,0x5e
1256 .byte   0x30,0x18,0x18,0x28,    0x37,0x96,0x96,0xa1
1257 .byte   0x0a,0x05,0x05,0x0f,    0x2f,0x9a,0x9a,0xb5
1258 .byte   0x0e,0x07,0x07,0x09,    0x24,0x12,0x12,0x36
1259 .byte   0x1b,0x80,0x80,0x9b,    0xdf,0xe2,0xe2,0x3d
1260 .byte   0xcd,0xeb,0xeb,0x26,    0x4e,0x27,0x27,0x69
1261 .byte   0x7f,0xb2,0xb2,0xcd,    0xea,0x75,0x75,0x9f
1262 .byte   0x12,0x09,0x09,0x1b,    0x1d,0x83,0x83,0x9e
1263 .byte   0x58,0x2c,0x2c,0x74,    0x34,0x1a,0x1a,0x2e
1264 .byte   0x36,0x1b,0x1b,0x2d,    0xdc,0x6e,0x6e,0xb2
1265 .byte   0xb4,0x5a,0x5a,0xee,    0x5b,0xa0,0xa0,0xfb
1266 .byte   0xa4,0x52,0x52,0xf6,    0x76,0x3b,0x3b,0x4d
1267 .byte   0xb7,0xd6,0xd6,0x61,    0x7d,0xb3,0xb3,0xce
1268 .byte   0x52,0x29,0x29,0x7b,    0xdd,0xe3,0xe3,0x3e
1269 .byte   0x5e,0x2f,0x2f,0x71,    0x13,0x84,0x84,0x97
1270 .byte   0xa6,0x53,0x53,0xf5,    0xb9,0xd1,0xd1,0x68
1271 .byte   0x00,0x00,0x00,0x00,    0xc1,0xed,0xed,0x2c
1272 .byte   0x40,0x20,0x20,0x60,    0xe3,0xfc,0xfc,0x1f
1273 .byte   0x79,0xb1,0xb1,0xc8,    0xb6,0x5b,0x5b,0xed
1274 .byte   0xd4,0x6a,0x6a,0xbe,    0x8d,0xcb,0xcb,0x46
1275 .byte   0x67,0xbe,0xbe,0xd9,    0x72,0x39,0x39,0x4b
1276 .byte   0x94,0x4a,0x4a,0xde,    0x98,0x4c,0x4c,0xd4
1277 .byte   0xb0,0x58,0x58,0xe8,    0x85,0xcf,0xcf,0x4a
1278 .byte   0xbb,0xd0,0xd0,0x6b,    0xc5,0xef,0xef,0x2a
1279 .byte   0x4f,0xaa,0xaa,0xe5,    0xed,0xfb,0xfb,0x16
1280 .byte   0x86,0x43,0x43,0xc5,    0x9a,0x4d,0x4d,0xd7
1281 .byte   0x66,0x33,0x33,0x55,    0x11,0x85,0x85,0x94
1282 .byte   0x8a,0x45,0x45,0xcf,    0xe9,0xf9,0xf9,0x10
1283 .byte   0x04,0x02,0x02,0x06,    0xfe,0x7f,0x7f,0x81
1284 .byte   0xa0,0x50,0x50,0xf0,    0x78,0x3c,0x3c,0x44
1285 .byte   0x25,0x9f,0x9f,0xba,    0x4b,0xa8,0xa8,0xe3
1286 .byte   0xa2,0x51,0x51,0xf3,    0x5d,0xa3,0xa3,0xfe
1287 .byte   0x80,0x40,0x40,0xc0,    0x05,0x8f,0x8f,0x8a
1288 .byte   0x3f,0x92,0x92,0xad,    0x21,0x9d,0x9d,0xbc
1289 .byte   0x70,0x38,0x38,0x48,    0xf1,0xf5,0xf5,0x04
1290 .byte   0x63,0xbc,0xbc,0xdf,    0x77,0xb6,0xb6,0xc1
1291 .byte   0xaf,0xda,0xda,0x75,    0x42,0x21,0x21,0x63
1292 .byte   0x20,0x10,0x10,0x30,    0xe5,0xff,0xff,0x1a
1293 .byte   0xfd,0xf3,0xf3,0x0e,    0xbf,0xd2,0xd2,0x6d
1294 .byte   0x81,0xcd,0xcd,0x4c,    0x18,0x0c,0x0c,0x14
1295 .byte   0x26,0x13,0x13,0x35,    0xc3,0xec,0xec,0x2f
1296 .byte   0xbe,0x5f,0x5f,0xe1,    0x35,0x97,0x97,0xa2
1297 .byte   0x88,0x44,0x44,0xcc,    0x2e,0x17,0x17,0x39
1298 .byte   0x93,0xc4,0xc4,0x57,    0x55,0xa7,0xa7,0xf2
1299 .byte   0xfc,0x7e,0x7e,0x82,    0x7a,0x3d,0x3d,0x47
1300 .byte   0xc8,0x64,0x64,0xac,    0xba,0x5d,0x5d,0xe7
1301 .byte   0x32,0x19,0x19,0x2b,    0xe6,0x73,0x73,0x95
1302 .byte   0xc0,0x60,0x60,0xa0,    0x19,0x81,0x81,0x98
1303 .byte   0x9e,0x4f,0x4f,0xd1,    0xa3,0xdc,0xdc,0x7f
1304 .byte   0x44,0x22,0x22,0x66,    0x54,0x2a,0x2a,0x7e
1305 .byte   0x3b,0x90,0x90,0xab,    0x0b,0x88,0x88,0x83
1306 .byte   0x8c,0x46,0x46,0xca,    0xc7,0xee,0xee,0x29
1307 .byte   0x6b,0xb8,0xb8,0xd3,    0x28,0x14,0x14,0x3c
1308 .byte   0xa7,0xde,0xde,0x79,    0xbc,0x5e,0x5e,0xe2
1309 .byte   0x16,0x0b,0x0b,0x1d,    0xad,0xdb,0xdb,0x76
1310 .byte   0xdb,0xe0,0xe0,0x3b,    0x64,0x32,0x32,0x56
1311 .byte   0x74,0x3a,0x3a,0x4e,    0x14,0x0a,0x0a,0x1e
1312 .byte   0x92,0x49,0x49,0xdb,    0x0c,0x06,0x06,0x0a
1313 .byte   0x48,0x24,0x24,0x6c,    0xb8,0x5c,0x5c,0xe4
1314 .byte   0x9f,0xc2,0xc2,0x5d,    0xbd,0xd3,0xd3,0x6e
1315 .byte   0x43,0xac,0xac,0xef,    0xc4,0x62,0x62,0xa6
1316 .byte   0x39,0x91,0x91,0xa8,    0x31,0x95,0x95,0xa4
1317 .byte   0xd3,0xe4,0xe4,0x37,    0xf2,0x79,0x79,0x8b
1318 .byte   0xd5,0xe7,0xe7,0x32,    0x8b,0xc8,0xc8,0x43
1319 .byte   0x6e,0x37,0x37,0x59,    0xda,0x6d,0x6d,0xb7
1320 .byte   0x01,0x8d,0x8d,0x8c,    0xb1,0xd5,0xd5,0x64
1321 .byte   0x9c,0x4e,0x4e,0xd2,    0x49,0xa9,0xa9,0xe0
1322 .byte   0xd8,0x6c,0x6c,0xb4,    0xac,0x56,0x56,0xfa
1323 .byte   0xf3,0xf4,0xf4,0x07,    0xcf,0xea,0xea,0x25
1324 .byte   0xca,0x65,0x65,0xaf,    0xf4,0x7a,0x7a,0x8e
1325 .byte   0x47,0xae,0xae,0xe9,    0x10,0x08,0x08,0x18
1326 .byte   0x6f,0xba,0xba,0xd5,    0xf0,0x78,0x78,0x88
1327 .byte   0x4a,0x25,0x25,0x6f,    0x5c,0x2e,0x2e,0x72
1328 .byte   0x38,0x1c,0x1c,0x24,    0x57,0xa6,0xa6,0xf1
1329 .byte   0x73,0xb4,0xb4,0xc7,    0x97,0xc6,0xc6,0x51
1330 .byte   0xcb,0xe8,0xe8,0x23,    0xa1,0xdd,0xdd,0x7c
1331 .byte   0xe8,0x74,0x74,0x9c,    0x3e,0x1f,0x1f,0x21
1332 .byte   0x96,0x4b,0x4b,0xdd,    0x61,0xbd,0xbd,0xdc
1333 .byte   0x0d,0x8b,0x8b,0x86,    0x0f,0x8a,0x8a,0x85
1334 .byte   0xe0,0x70,0x70,0x90,    0x7c,0x3e,0x3e,0x42
1335 .byte   0x71,0xb5,0xb5,0xc4,    0xcc,0x66,0x66,0xaa
1336 .byte   0x90,0x48,0x48,0xd8,    0x06,0x03,0x03,0x05
1337 .byte   0xf7,0xf6,0xf6,0x01,    0x1c,0x0e,0x0e,0x12
1338 .byte   0xc2,0x61,0x61,0xa3,    0x6a,0x35,0x35,0x5f
1339 .byte   0xae,0x57,0x57,0xf9,    0x69,0xb9,0xb9,0xd0
1340 .byte   0x17,0x86,0x86,0x91,    0x99,0xc1,0xc1,0x58
1341 .byte   0x3a,0x1d,0x1d,0x27,    0x27,0x9e,0x9e,0xb9
1342 .byte   0xd9,0xe1,0xe1,0x38,    0xeb,0xf8,0xf8,0x13
1343 .byte   0x2b,0x98,0x98,0xb3,    0x22,0x11,0x11,0x33
1344 .byte   0xd2,0x69,0x69,0xbb,    0xa9,0xd9,0xd9,0x70
1345 .byte   0x07,0x8e,0x8e,0x89,    0x33,0x94,0x94,0xa7
1346 .byte   0x2d,0x9b,0x9b,0xb6,    0x3c,0x1e,0x1e,0x22
1347 .byte   0x15,0x87,0x87,0x92,    0xc9,0xe9,0xe9,0x20
1348 .byte   0x87,0xce,0xce,0x49,    0xaa,0x55,0x55,0xff
1349 .byte   0x50,0x28,0x28,0x78,    0xa5,0xdf,0xdf,0x7a
1350 .byte   0x03,0x8c,0x8c,0x8f,    0x59,0xa1,0xa1,0xf8
1351 .byte   0x09,0x89,0x89,0x80,    0x1a,0x0d,0x0d,0x17
1352 .byte   0x65,0xbf,0xbf,0xda,    0xd7,0xe6,0xe6,0x31
1353 .byte   0x84,0x42,0x42,0xc6,    0xd0,0x68,0x68,0xb8
1354 .byte   0x82,0x41,0x41,0xc3,    0x29,0x99,0x99,0xb0
1355 .byte   0x5a,0x2d,0x2d,0x77,    0x1e,0x0f,0x0f,0x11
1356 .byte   0x7b,0xb0,0xb0,0xcb,    0xa8,0x54,0x54,0xfc
1357 .byte   0x6d,0xbb,0xbb,0xd6,    0x2c,0x16,0x16,0x3a
1358
1359 .byte   0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5  # Te4
1360 .byte   0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
1361 .byte   0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
1362 .byte   0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
1363 .byte   0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
1364 .byte   0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
1365 .byte   0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
1366 .byte   0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
1367 .byte   0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
1368 .byte   0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
1369 .byte   0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
1370 .byte   0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
1371 .byte   0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
1372 .byte   0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
1373 .byte   0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
1374 .byte   0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
1375 .byte   0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
1376 .byte   0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
1377 .byte   0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
1378 .byte   0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
1379 .byte   0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
1380 .byte   0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
1381 .byte   0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
1382 .byte   0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
1383 .byte   0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
1384 .byte   0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
1385 .byte   0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
1386 .byte   0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
1387 .byte   0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
1388 .byte   0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
1389 .byte   0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
1390 .byte   0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
1391
1392 .byte   0x01,0x00,0x00,0x00,    0x02,0x00,0x00,0x00     # rcon
1393 .byte   0x04,0x00,0x00,0x00,    0x08,0x00,0x00,0x00
1394 .byte   0x10,0x00,0x00,0x00,    0x20,0x00,0x00,0x00
1395 .byte   0x40,0x00,0x00,0x00,    0x80,0x00,0x00,0x00
1396 .byte   0x1B,0x00,0x00,0x00,    0x36,0x00,0x00,0x00
1397
1398 .align  6
1399 AES_Td:
1400 .byte   0x51,0xf4,0xa7,0x50,    0x7e,0x41,0x65,0x53     # Td0
1401 .byte   0x1a,0x17,0xa4,0xc3,    0x3a,0x27,0x5e,0x96
1402 .byte   0x3b,0xab,0x6b,0xcb,    0x1f,0x9d,0x45,0xf1
1403 .byte   0xac,0xfa,0x58,0xab,    0x4b,0xe3,0x03,0x93
1404 .byte   0x20,0x30,0xfa,0x55,    0xad,0x76,0x6d,0xf6
1405 .byte   0x88,0xcc,0x76,0x91,    0xf5,0x02,0x4c,0x25
1406 .byte   0x4f,0xe5,0xd7,0xfc,    0xc5,0x2a,0xcb,0xd7
1407 .byte   0x26,0x35,0x44,0x80,    0xb5,0x62,0xa3,0x8f
1408 .byte   0xde,0xb1,0x5a,0x49,    0x25,0xba,0x1b,0x67
1409 .byte   0x45,0xea,0x0e,0x98,    0x5d,0xfe,0xc0,0xe1
1410 .byte   0xc3,0x2f,0x75,0x02,    0x81,0x4c,0xf0,0x12
1411 .byte   0x8d,0x46,0x97,0xa3,    0x6b,0xd3,0xf9,0xc6
1412 .byte   0x03,0x8f,0x5f,0xe7,    0x15,0x92,0x9c,0x95
1413 .byte   0xbf,0x6d,0x7a,0xeb,    0x95,0x52,0x59,0xda
1414 .byte   0xd4,0xbe,0x83,0x2d,    0x58,0x74,0x21,0xd3
1415 .byte   0x49,0xe0,0x69,0x29,    0x8e,0xc9,0xc8,0x44
1416 .byte   0x75,0xc2,0x89,0x6a,    0xf4,0x8e,0x79,0x78
1417 .byte   0x99,0x58,0x3e,0x6b,    0x27,0xb9,0x71,0xdd
1418 .byte   0xbe,0xe1,0x4f,0xb6,    0xf0,0x88,0xad,0x17
1419 .byte   0xc9,0x20,0xac,0x66,    0x7d,0xce,0x3a,0xb4
1420 .byte   0x63,0xdf,0x4a,0x18,    0xe5,0x1a,0x31,0x82
1421 .byte   0x97,0x51,0x33,0x60,    0x62,0x53,0x7f,0x45
1422 .byte   0xb1,0x64,0x77,0xe0,    0xbb,0x6b,0xae,0x84
1423 .byte   0xfe,0x81,0xa0,0x1c,    0xf9,0x08,0x2b,0x94
1424 .byte   0x70,0x48,0x68,0x58,    0x8f,0x45,0xfd,0x19
1425 .byte   0x94,0xde,0x6c,0x87,    0x52,0x7b,0xf8,0xb7
1426 .byte   0xab,0x73,0xd3,0x23,    0x72,0x4b,0x02,0xe2
1427 .byte   0xe3,0x1f,0x8f,0x57,    0x66,0x55,0xab,0x2a
1428 .byte   0xb2,0xeb,0x28,0x07,    0x2f,0xb5,0xc2,0x03
1429 .byte   0x86,0xc5,0x7b,0x9a,    0xd3,0x37,0x08,0xa5
1430 .byte   0x30,0x28,0x87,0xf2,    0x23,0xbf,0xa5,0xb2
1431 .byte   0x02,0x03,0x6a,0xba,    0xed,0x16,0x82,0x5c
1432 .byte   0x8a,0xcf,0x1c,0x2b,    0xa7,0x79,0xb4,0x92
1433 .byte   0xf3,0x07,0xf2,0xf0,    0x4e,0x69,0xe2,0xa1
1434 .byte   0x65,0xda,0xf4,0xcd,    0x06,0x05,0xbe,0xd5
1435 .byte   0xd1,0x34,0x62,0x1f,    0xc4,0xa6,0xfe,0x8a
1436 .byte   0x34,0x2e,0x53,0x9d,    0xa2,0xf3,0x55,0xa0
1437 .byte   0x05,0x8a,0xe1,0x32,    0xa4,0xf6,0xeb,0x75
1438 .byte   0x0b,0x83,0xec,0x39,    0x40,0x60,0xef,0xaa
1439 .byte   0x5e,0x71,0x9f,0x06,    0xbd,0x6e,0x10,0x51
1440 .byte   0x3e,0x21,0x8a,0xf9,    0x96,0xdd,0x06,0x3d
1441 .byte   0xdd,0x3e,0x05,0xae,    0x4d,0xe6,0xbd,0x46
1442 .byte   0x91,0x54,0x8d,0xb5,    0x71,0xc4,0x5d,0x05
1443 .byte   0x04,0x06,0xd4,0x6f,    0x60,0x50,0x15,0xff
1444 .byte   0x19,0x98,0xfb,0x24,    0xd6,0xbd,0xe9,0x97
1445 .byte   0x89,0x40,0x43,0xcc,    0x67,0xd9,0x9e,0x77
1446 .byte   0xb0,0xe8,0x42,0xbd,    0x07,0x89,0x8b,0x88
1447 .byte   0xe7,0x19,0x5b,0x38,    0x79,0xc8,0xee,0xdb
1448 .byte   0xa1,0x7c,0x0a,0x47,    0x7c,0x42,0x0f,0xe9
1449 .byte   0xf8,0x84,0x1e,0xc9,    0x00,0x00,0x00,0x00
1450 .byte   0x09,0x80,0x86,0x83,    0x32,0x2b,0xed,0x48
1451 .byte   0x1e,0x11,0x70,0xac,    0x6c,0x5a,0x72,0x4e
1452 .byte   0xfd,0x0e,0xff,0xfb,    0x0f,0x85,0x38,0x56
1453 .byte   0x3d,0xae,0xd5,0x1e,    0x36,0x2d,0x39,0x27
1454 .byte   0x0a,0x0f,0xd9,0x64,    0x68,0x5c,0xa6,0x21
1455 .byte   0x9b,0x5b,0x54,0xd1,    0x24,0x36,0x2e,0x3a
1456 .byte   0x0c,0x0a,0x67,0xb1,    0x93,0x57,0xe7,0x0f
1457 .byte   0xb4,0xee,0x96,0xd2,    0x1b,0x9b,0x91,0x9e
1458 .byte   0x80,0xc0,0xc5,0x4f,    0x61,0xdc,0x20,0xa2
1459 .byte   0x5a,0x77,0x4b,0x69,    0x1c,0x12,0x1a,0x16
1460 .byte   0xe2,0x93,0xba,0x0a,    0xc0,0xa0,0x2a,0xe5
1461 .byte   0x3c,0x22,0xe0,0x43,    0x12,0x1b,0x17,0x1d
1462 .byte   0x0e,0x09,0x0d,0x0b,    0xf2,0x8b,0xc7,0xad
1463 .byte   0x2d,0xb6,0xa8,0xb9,    0x14,0x1e,0xa9,0xc8
1464 .byte   0x57,0xf1,0x19,0x85,    0xaf,0x75,0x07,0x4c
1465 .byte   0xee,0x99,0xdd,0xbb,    0xa3,0x7f,0x60,0xfd
1466 .byte   0xf7,0x01,0x26,0x9f,    0x5c,0x72,0xf5,0xbc
1467 .byte   0x44,0x66,0x3b,0xc5,    0x5b,0xfb,0x7e,0x34
1468 .byte   0x8b,0x43,0x29,0x76,    0xcb,0x23,0xc6,0xdc
1469 .byte   0xb6,0xed,0xfc,0x68,    0xb8,0xe4,0xf1,0x63
1470 .byte   0xd7,0x31,0xdc,0xca,    0x42,0x63,0x85,0x10
1471 .byte   0x13,0x97,0x22,0x40,    0x84,0xc6,0x11,0x20
1472 .byte   0x85,0x4a,0x24,0x7d,    0xd2,0xbb,0x3d,0xf8
1473 .byte   0xae,0xf9,0x32,0x11,    0xc7,0x29,0xa1,0x6d
1474 .byte   0x1d,0x9e,0x2f,0x4b,    0xdc,0xb2,0x30,0xf3
1475 .byte   0x0d,0x86,0x52,0xec,    0x77,0xc1,0xe3,0xd0
1476 .byte   0x2b,0xb3,0x16,0x6c,    0xa9,0x70,0xb9,0x99
1477 .byte   0x11,0x94,0x48,0xfa,    0x47,0xe9,0x64,0x22
1478 .byte   0xa8,0xfc,0x8c,0xc4,    0xa0,0xf0,0x3f,0x1a
1479 .byte   0x56,0x7d,0x2c,0xd8,    0x22,0x33,0x90,0xef
1480 .byte   0x87,0x49,0x4e,0xc7,    0xd9,0x38,0xd1,0xc1
1481 .byte   0x8c,0xca,0xa2,0xfe,    0x98,0xd4,0x0b,0x36
1482 .byte   0xa6,0xf5,0x81,0xcf,    0xa5,0x7a,0xde,0x28
1483 .byte   0xda,0xb7,0x8e,0x26,    0x3f,0xad,0xbf,0xa4
1484 .byte   0x2c,0x3a,0x9d,0xe4,    0x50,0x78,0x92,0x0d
1485 .byte   0x6a,0x5f,0xcc,0x9b,    0x54,0x7e,0x46,0x62
1486 .byte   0xf6,0x8d,0x13,0xc2,    0x90,0xd8,0xb8,0xe8
1487 .byte   0x2e,0x39,0xf7,0x5e,    0x82,0xc3,0xaf,0xf5
1488 .byte   0x9f,0x5d,0x80,0xbe,    0x69,0xd0,0x93,0x7c
1489 .byte   0x6f,0xd5,0x2d,0xa9,    0xcf,0x25,0x12,0xb3
1490 .byte   0xc8,0xac,0x99,0x3b,    0x10,0x18,0x7d,0xa7
1491 .byte   0xe8,0x9c,0x63,0x6e,    0xdb,0x3b,0xbb,0x7b
1492 .byte   0xcd,0x26,0x78,0x09,    0x6e,0x59,0x18,0xf4
1493 .byte   0xec,0x9a,0xb7,0x01,    0x83,0x4f,0x9a,0xa8
1494 .byte   0xe6,0x95,0x6e,0x65,    0xaa,0xff,0xe6,0x7e
1495 .byte   0x21,0xbc,0xcf,0x08,    0xef,0x15,0xe8,0xe6
1496 .byte   0xba,0xe7,0x9b,0xd9,    0x4a,0x6f,0x36,0xce
1497 .byte   0xea,0x9f,0x09,0xd4,    0x29,0xb0,0x7c,0xd6
1498 .byte   0x31,0xa4,0xb2,0xaf,    0x2a,0x3f,0x23,0x31
1499 .byte   0xc6,0xa5,0x94,0x30,    0x35,0xa2,0x66,0xc0
1500 .byte   0x74,0x4e,0xbc,0x37,    0xfc,0x82,0xca,0xa6
1501 .byte   0xe0,0x90,0xd0,0xb0,    0x33,0xa7,0xd8,0x15
1502 .byte   0xf1,0x04,0x98,0x4a,    0x41,0xec,0xda,0xf7
1503 .byte   0x7f,0xcd,0x50,0x0e,    0x17,0x91,0xf6,0x2f
1504 .byte   0x76,0x4d,0xd6,0x8d,    0x43,0xef,0xb0,0x4d
1505 .byte   0xcc,0xaa,0x4d,0x54,    0xe4,0x96,0x04,0xdf
1506 .byte   0x9e,0xd1,0xb5,0xe3,    0x4c,0x6a,0x88,0x1b
1507 .byte   0xc1,0x2c,0x1f,0xb8,    0x46,0x65,0x51,0x7f
1508 .byte   0x9d,0x5e,0xea,0x04,    0x01,0x8c,0x35,0x5d
1509 .byte   0xfa,0x87,0x74,0x73,    0xfb,0x0b,0x41,0x2e
1510 .byte   0xb3,0x67,0x1d,0x5a,    0x92,0xdb,0xd2,0x52
1511 .byte   0xe9,0x10,0x56,0x33,    0x6d,0xd6,0x47,0x13
1512 .byte   0x9a,0xd7,0x61,0x8c,    0x37,0xa1,0x0c,0x7a
1513 .byte   0x59,0xf8,0x14,0x8e,    0xeb,0x13,0x3c,0x89
1514 .byte   0xce,0xa9,0x27,0xee,    0xb7,0x61,0xc9,0x35
1515 .byte   0xe1,0x1c,0xe5,0xed,    0x7a,0x47,0xb1,0x3c
1516 .byte   0x9c,0xd2,0xdf,0x59,    0x55,0xf2,0x73,0x3f
1517 .byte   0x18,0x14,0xce,0x79,    0x73,0xc7,0x37,0xbf
1518 .byte   0x53,0xf7,0xcd,0xea,    0x5f,0xfd,0xaa,0x5b
1519 .byte   0xdf,0x3d,0x6f,0x14,    0x78,0x44,0xdb,0x86
1520 .byte   0xca,0xaf,0xf3,0x81,    0xb9,0x68,0xc4,0x3e
1521 .byte   0x38,0x24,0x34,0x2c,    0xc2,0xa3,0x40,0x5f
1522 .byte   0x16,0x1d,0xc3,0x72,    0xbc,0xe2,0x25,0x0c
1523 .byte   0x28,0x3c,0x49,0x8b,    0xff,0x0d,0x95,0x41
1524 .byte   0x39,0xa8,0x01,0x71,    0x08,0x0c,0xb3,0xde
1525 .byte   0xd8,0xb4,0xe4,0x9c,    0x64,0x56,0xc1,0x90
1526 .byte   0x7b,0xcb,0x84,0x61,    0xd5,0x32,0xb6,0x70
1527 .byte   0x48,0x6c,0x5c,0x74,    0xd0,0xb8,0x57,0x42
1528
1529 .byte   0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38  # Td4
1530 .byte   0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
1531 .byte   0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
1532 .byte   0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
1533 .byte   0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
1534 .byte   0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
1535 .byte   0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
1536 .byte   0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
1537 .byte   0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
1538 .byte   0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
1539 .byte   0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
1540 .byte   0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
1541 .byte   0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
1542 .byte   0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
1543 .byte   0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
1544 .byte   0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
1545 .byte   0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
1546 .byte   0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
1547 .byte   0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
1548 .byte   0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
1549 .byte   0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
1550 .byte   0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
1551 .byte   0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
1552 .byte   0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
1553 .byte   0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
1554 .byte   0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
1555 .byte   0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
1556 .byte   0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
1557 .byte   0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
1558 .byte   0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
1559 .byte   0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
1560 .byte   0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
1561 ___
1562 \f
1563 foreach (split("\n",$code)) {
1564         s/\`([^\`]*)\`/eval $1/ge;
1565
1566         # made-up _instructions, _xtr, _ins, _ror and _bias, cope
1567         # with byte order dependencies...
1568         if (/^\s+_/) {
1569             s/(_[a-z]+\s+)(\$[0-9]+),([^,]+)(#.*)*$/$1$2,$2,$3/;
1570
1571             s/_xtr\s+(\$[0-9]+),(\$[0-9]+),([0-9]+(\-2)*)/
1572                 sprintf("srl\t$1,$2,%d",$big_endian ?   eval($3)
1573                                         :               eval("24-$3"))/e or
1574             s/_ins\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
1575                 sprintf("sll\t$1,$2,%d",$big_endian ?   eval($3)
1576                                         :               eval("24-$3"))/e or
1577             s/_ror\s+(\$[0-9]+),(\$[0-9]+),(\-?[0-9]+)/
1578                 sprintf("srl\t$1,$2,%d",$big_endian ?   eval($3)
1579                                         :               eval("$3*-1"))/e or
1580             s/_bias\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
1581                 sprintf("sll\t$1,$2,%d",$big_endian ?   eval($3)
1582                                         :               eval("($3-16)&31"))/e;
1583
1584             s/srl\s+(\$[0-9]+),(\$[0-9]+),\-([0-9]+)/
1585                 sprintf("sll\t$1,$2,$3")/e                              or
1586             s/srl\s+(\$[0-9]+),(\$[0-9]+),0/
1587                 sprintf("and\t$1,$2,0xff")/e                            or
1588             s/(sll\s+\$[0-9]+,\$[0-9]+,0)/#$1/;
1589         }
1590
1591         # convert lwl/lwr and swr/swl to little-endian order
1592         if (!$big_endian && /^\s+[sl]w[lr]\s+/) {
1593             s/([sl]wl.*)([0-9]+)\((\$[0-9]+)\)/
1594                 sprintf("$1%d($3)",eval("$2-$2%4+($2%4-1)&3"))/e        or
1595             s/([sl]wr.*)([0-9]+)\((\$[0-9]+)\)/
1596                 sprintf("$1%d($3)",eval("$2-$2%4+($2%4+1)&3"))/e;
1597         }
1598
1599         print $_,"\n";
1600 }
1601
1602 close STDOUT;