MIPS assembly pack updates from HEAD.
[openssl.git] / crypto / aes / asm / aes-mips.pl
1 #!/usr/bin/env perl
2
3 # ====================================================================
4 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9
10 # AES for MIPS
11
12 # October 2010
13 #
14 # Code uses 1K[+256B] S-box and on single-issue core [such as R5000]
15 # spends ~68 cycles per byte processed with 128-bit key. This is ~16%
16 # faster than gcc-generated code, which is not very impressive. But
17 # recall that compressed S-box requires extra processing, namely
18 # additional rotations. Rotations are implemented with lwl/lwr pairs,
19 # which is normally used for loading unaligned data. Another cool
20 # thing about this module is its endian neutrality, which means that
21 # it processes data without ever changing byte order...
22
23 ######################################################################
24 # There is a number of MIPS ABI in use, O32 and N32/64 are most
25 # widely used. Then there is a new contender: NUBI. It appears that if
26 # one picks the latter, it's possible to arrange code in ABI neutral
27 # manner. Therefore let's stick to NUBI register layout:
28 #
29 ($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
30 ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
31 ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
32 ($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
33 #
34 # The return value is placed in $a0. Following coding rules facilitate
35 # interoperability:
36 #
37 # - never ever touch $tp, "thread pointer", former $gp;
38 # - copy return value to $t0, former $v0 [or to $a0 if you're adapting
39 #   old code];
40 # - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
41 #
42 # For reference here is register layout for N32/64 MIPS ABIs:
43 #
44 # ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
45 # ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
46 # ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
47 # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
48 # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
49 #
50 $flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
51
52 if ($flavour =~ /64|n32/i) {
53         $PTR_ADD="dadd";        # incidentally works even on n32
54         $PTR_SUB="dsub";        # incidentally works even on n32
55         $REG_S="sd";
56         $REG_L="ld";
57         $PTR_SLL="dsll";        # incidentally works even on n32
58         $SZREG=8;
59 } else {
60         $PTR_ADD="add";
61         $PTR_SUB="sub";
62         $REG_S="sw";
63         $REG_L="lw";
64         $PTR_SLL="sll";
65         $SZREG=4;
66 }
67 $pf = ($flavour =~ /nubi/i) ? $t0 : $t2;
68 #
69 # <appro@openssl.org>
70 #
71 ######################################################################
72
73 $big_endian=(`echo MIPSEL | $ENV{CC} -E -P -`=~/MIPSEL/)?1:0;
74
75 for (@ARGV) {   $output=$_ if (/^\w[\w\-]*\.\w+$/);     }
76 open STDOUT,">$output";
77
78 if (!defined($big_endian))
79 {    $big_endian=(unpack('L',pack('N',1))==1);   }
80
81 while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
82 open STDOUT,">$output";
83
84 my ($MSB,$LSB)=(0,3);   # automatically converted to little-endian
85
86 $code.=<<___;
87 .text
88 #ifdef OPENSSL_FIPSCANISTER
89 # include <openssl/fipssyms.h>
90 #endif
91
92 #if !defined(__vxworks) || defined(__pic__)
93 .option pic2
94 #endif
95 .set    noat
96 ___
97 \f
98 {{{
99 my $FRAMESIZE=16*$SZREG;
100 my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000;
101
102 my ($inp,$out,$key,$Tbl,$s0,$s1,$s2,$s3)=($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7);
103 my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
104 my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$t9,$t10,$t11) = map("\$$_",(12..23));
105 my ($key0,$cnt)=($gp,$fp);
106
107 # instuction ordering is "stolen" from output from MIPSpro assembler
108 # invoked with -mips3 -O3 arguments...
109 $code.=<<___;
110 .align  5
111 .ent    _mips_AES_encrypt
112 _mips_AES_encrypt:
113         .frame  $sp,0,$ra
114         .set    reorder
115         lw      $t0,0($key)
116         lw      $t1,4($key)
117         lw      $t2,8($key)
118         lw      $t3,12($key)
119         lw      $cnt,240($key)
120         $PTR_ADD $key0,$key,16
121
122         xor     $s0,$t0
123         xor     $s1,$t1
124         xor     $s2,$t2
125         xor     $s3,$t3
126
127         sub     $cnt,1
128         _xtr    $i0,$s1,16-2
129 .Loop_enc:
130         _xtr    $i1,$s2,16-2
131         _xtr    $i2,$s3,16-2
132         _xtr    $i3,$s0,16-2
133         and     $i0,0x3fc
134         and     $i1,0x3fc
135         and     $i2,0x3fc
136         and     $i3,0x3fc
137         $PTR_ADD $i0,$Tbl
138         $PTR_ADD $i1,$Tbl
139         $PTR_ADD $i2,$Tbl
140         $PTR_ADD $i3,$Tbl
141         lwl     $t0,3($i0)              # Te1[s1>>16]
142         lwl     $t1,3($i1)              # Te1[s2>>16]
143         lwl     $t2,3($i2)              # Te1[s3>>16]
144         lwl     $t3,3($i3)              # Te1[s0>>16]
145         lwr     $t0,2($i0)              # Te1[s1>>16]
146         lwr     $t1,2($i1)              # Te1[s2>>16]
147         lwr     $t2,2($i2)              # Te1[s3>>16]
148         lwr     $t3,2($i3)              # Te1[s0>>16]
149
150         _xtr    $i0,$s2,8-2
151         _xtr    $i1,$s3,8-2
152         _xtr    $i2,$s0,8-2
153         _xtr    $i3,$s1,8-2
154         and     $i0,0x3fc
155         and     $i1,0x3fc
156         and     $i2,0x3fc
157         and     $i3,0x3fc
158         $PTR_ADD $i0,$Tbl
159         $PTR_ADD $i1,$Tbl
160         $PTR_ADD $i2,$Tbl
161         $PTR_ADD $i3,$Tbl
162         lwl     $t4,2($i0)              # Te2[s2>>8]
163         lwl     $t5,2($i1)              # Te2[s3>>8]
164         lwl     $t6,2($i2)              # Te2[s0>>8]
165         lwl     $t7,2($i3)              # Te2[s1>>8]
166         lwr     $t4,1($i0)              # Te2[s2>>8]
167         lwr     $t5,1($i1)              # Te2[s3>>8]
168         lwr     $t6,1($i2)              # Te2[s0>>8]
169         lwr     $t7,1($i3)              # Te2[s1>>8]
170
171         _xtr    $i0,$s3,0-2
172         _xtr    $i1,$s0,0-2
173         _xtr    $i2,$s1,0-2
174         _xtr    $i3,$s2,0-2
175         and     $i0,0x3fc
176         and     $i1,0x3fc
177         and     $i2,0x3fc
178         and     $i3,0x3fc
179         $PTR_ADD $i0,$Tbl
180         $PTR_ADD $i1,$Tbl
181         $PTR_ADD $i2,$Tbl
182         $PTR_ADD $i3,$Tbl
183         lwl     $t8,1($i0)              # Te3[s3]
184         lwl     $t9,1($i1)              # Te3[s0]
185         lwl     $t10,1($i2)             # Te3[s1]
186         lwl     $t11,1($i3)             # Te3[s2]
187         lwr     $t8,0($i0)              # Te3[s3]
188         lwr     $t9,0($i1)              # Te3[s0]
189         lwr     $t10,0($i2)             # Te3[s1]
190         lwr     $t11,0($i3)             # Te3[s2]
191
192         _xtr    $i0,$s0,24-2
193         _xtr    $i1,$s1,24-2
194         _xtr    $i2,$s2,24-2
195         _xtr    $i3,$s3,24-2
196         and     $i0,0x3fc
197         and     $i1,0x3fc
198         and     $i2,0x3fc
199         and     $i3,0x3fc
200         $PTR_ADD $i0,$Tbl
201         $PTR_ADD $i1,$Tbl
202         $PTR_ADD $i2,$Tbl
203         $PTR_ADD $i3,$Tbl
204         xor     $t0,$t4
205         xor     $t1,$t5
206         xor     $t2,$t6
207         xor     $t3,$t7
208         lw      $t4,0($i0)              # Te0[s0>>24]
209         lw      $t5,0($i1)              # Te0[s1>>24]
210         lw      $t6,0($i2)              # Te0[s2>>24]
211         lw      $t7,0($i3)              # Te0[s3>>24]
212
213         lw      $s0,0($key0)
214         lw      $s1,4($key0)
215         lw      $s2,8($key0)
216         lw      $s3,12($key0)
217
218         xor     $t0,$t8
219         xor     $t1,$t9
220         xor     $t2,$t10
221         xor     $t3,$t11
222
223         xor     $t0,$t4
224         xor     $t1,$t5
225         xor     $t2,$t6
226         xor     $t3,$t7
227
228         sub     $cnt,1
229         $PTR_ADD $key0,16
230         xor     $s0,$t0
231         xor     $s1,$t1
232         xor     $s2,$t2
233         xor     $s3,$t3
234         .set    noreorder
235         bnez    $cnt,.Loop_enc
236         _xtr    $i0,$s1,16-2
237
238         .set    reorder
239         _xtr    $i1,$s2,16-2
240         _xtr    $i2,$s3,16-2
241         _xtr    $i3,$s0,16-2
242         and     $i0,0x3fc
243         and     $i1,0x3fc
244         and     $i2,0x3fc
245         and     $i3,0x3fc
246         $PTR_ADD $i0,$Tbl
247         $PTR_ADD $i1,$Tbl
248         $PTR_ADD $i2,$Tbl
249         $PTR_ADD $i3,$Tbl
250         lbu     $t0,2($i0)              # Te4[s1>>16]
251         lbu     $t1,2($i1)              # Te4[s2>>16]
252         lbu     $t2,2($i2)              # Te4[s3>>16]
253         lbu     $t3,2($i3)              # Te4[s0>>16]
254
255         _xtr    $i0,$s2,8-2
256         _xtr    $i1,$s3,8-2
257         _xtr    $i2,$s0,8-2
258         _xtr    $i3,$s1,8-2
259         and     $i0,0x3fc
260         and     $i1,0x3fc
261         and     $i2,0x3fc
262         and     $i3,0x3fc
263         $PTR_ADD $i0,$Tbl
264         $PTR_ADD $i1,$Tbl
265         $PTR_ADD $i2,$Tbl
266         $PTR_ADD $i3,$Tbl
267         lbu     $t4,2($i0)              # Te4[s2>>8]
268         lbu     $t5,2($i1)              # Te4[s3>>8]
269         lbu     $t6,2($i2)              # Te4[s0>>8]
270         lbu     $t7,2($i3)              # Te4[s1>>8]
271
272         _xtr    $i0,$s0,24-2
273         _xtr    $i1,$s1,24-2
274         _xtr    $i2,$s2,24-2
275         _xtr    $i3,$s3,24-2
276         and     $i0,0x3fc
277         and     $i1,0x3fc
278         and     $i2,0x3fc
279         and     $i3,0x3fc
280         $PTR_ADD $i0,$Tbl
281         $PTR_ADD $i1,$Tbl
282         $PTR_ADD $i2,$Tbl
283         $PTR_ADD $i3,$Tbl
284         lbu     $t8,2($i0)              # Te4[s0>>24]
285         lbu     $t9,2($i1)              # Te4[s1>>24]
286         lbu     $t10,2($i2)             # Te4[s2>>24]
287         lbu     $t11,2($i3)             # Te4[s3>>24]
288
289         _xtr    $i0,$s3,0-2
290         _xtr    $i1,$s0,0-2
291         _xtr    $i2,$s1,0-2
292         _xtr    $i3,$s2,0-2
293         and     $i0,0x3fc
294         and     $i1,0x3fc
295         and     $i2,0x3fc
296         and     $i3,0x3fc
297
298         _ins    $t0,16
299         _ins    $t1,16
300         _ins    $t2,16
301         _ins    $t3,16
302
303         _ins    $t4,8
304         _ins    $t5,8
305         _ins    $t6,8
306         _ins    $t7,8
307
308         xor     $t0,$t4
309         xor     $t1,$t5
310         xor     $t2,$t6
311         xor     $t3,$t7
312
313         $PTR_ADD $i0,$Tbl
314         $PTR_ADD $i1,$Tbl
315         $PTR_ADD $i2,$Tbl
316         $PTR_ADD $i3,$Tbl
317         lbu     $t4,2($i0)              # Te4[s3]
318         lbu     $t5,2($i1)              # Te4[s0]
319         lbu     $t6,2($i2)              # Te4[s1]
320         lbu     $t7,2($i3)              # Te4[s2]
321
322         _ins    $t8,24
323         _ins    $t9,24
324         _ins    $t10,24
325         _ins    $t11,24
326
327         lw      $s0,0($key0)
328         lw      $s1,4($key0)
329         lw      $s2,8($key0)
330         lw      $s3,12($key0)
331
332         xor     $t0,$t8
333         xor     $t1,$t9
334         xor     $t2,$t10
335         xor     $t3,$t11
336
337         _ins    $t4,0
338         _ins    $t5,0
339         _ins    $t6,0
340         _ins    $t7,0
341
342         xor     $t0,$t4
343         xor     $t1,$t5
344         xor     $t2,$t6
345         xor     $t3,$t7
346
347         xor     $s0,$t0
348         xor     $s1,$t1
349         xor     $s2,$t2
350         xor     $s3,$t3
351
352         jr      $ra
353 .end    _mips_AES_encrypt
354
355 .align  5
356 .globl  AES_encrypt
357 .ent    AES_encrypt
358 AES_encrypt:
359         .frame  $sp,$FRAMESIZE,$ra
360         .mask   $SAVED_REGS_MASK,-$SZREG
361         .set    noreorder
362 ___
363 $code.=<<___ if ($flavour =~ /o32/i);   # o32 PIC-ification
364         .cpload $pf
365 ___
366 $code.=<<___;
367         $PTR_SUB $sp,$FRAMESIZE
368         $REG_S  $ra,$FRAMESIZE-1*$SZREG($sp)
369         $REG_S  $fp,$FRAMESIZE-2*$SZREG($sp)
370         $REG_S  $s11,$FRAMESIZE-3*$SZREG($sp)
371         $REG_S  $s10,$FRAMESIZE-4*$SZREG($sp)
372         $REG_S  $s9,$FRAMESIZE-5*$SZREG($sp)
373         $REG_S  $s8,$FRAMESIZE-6*$SZREG($sp)
374         $REG_S  $s7,$FRAMESIZE-7*$SZREG($sp)
375         $REG_S  $s6,$FRAMESIZE-8*$SZREG($sp)
376         $REG_S  $s5,$FRAMESIZE-9*$SZREG($sp)
377         $REG_S  $s4,$FRAMESIZE-10*$SZREG($sp)
378 ___
379 $code.=<<___ if ($flavour =~ /nubi/i);  # optimize non-nubi prologue
380         $REG_S  \$15,$FRAMESIZE-11*$SZREG($sp)
381         $REG_S  \$14,$FRAMESIZE-12*$SZREG($sp)
382         $REG_S  \$13,$FRAMESIZE-13*$SZREG($sp)
383         $REG_S  \$12,$FRAMESIZE-14*$SZREG($sp)
384         $REG_S  $gp,$FRAMESIZE-15*$SZREG($sp)
385 ___
386 $code.=<<___ if ($flavour !~ /o32/i);   # non-o32 PIC-ification
387         .cplocal        $Tbl
388         .cpsetup        $pf,$zero,AES_encrypt
389 ___
390 $code.=<<___;
391         .set    reorder
392         la      $Tbl,AES_Te             # PIC-ified 'load address'
393
394         lwl     $s0,0+$MSB($inp)
395         lwl     $s1,4+$MSB($inp)
396         lwl     $s2,8+$MSB($inp)
397         lwl     $s3,12+$MSB($inp)
398         lwr     $s0,0+$LSB($inp)
399         lwr     $s1,4+$LSB($inp)
400         lwr     $s2,8+$LSB($inp)
401         lwr     $s3,12+$LSB($inp)
402
403         bal     _mips_AES_encrypt
404
405         swr     $s0,0+$LSB($out)
406         swr     $s1,4+$LSB($out)
407         swr     $s2,8+$LSB($out)
408         swr     $s3,12+$LSB($out)
409         swl     $s0,0+$MSB($out)
410         swl     $s1,4+$MSB($out)
411         swl     $s2,8+$MSB($out)
412         swl     $s3,12+$MSB($out)
413
414         .set    noreorder
415         $REG_L  $ra,$FRAMESIZE-1*$SZREG($sp)
416         $REG_L  $fp,$FRAMESIZE-2*$SZREG($sp)
417         $REG_L  $s11,$FRAMESIZE-3*$SZREG($sp)
418         $REG_L  $s10,$FRAMESIZE-4*$SZREG($sp)
419         $REG_L  $s9,$FRAMESIZE-5*$SZREG($sp)
420         $REG_L  $s8,$FRAMESIZE-6*$SZREG($sp)
421         $REG_L  $s7,$FRAMESIZE-7*$SZREG($sp)
422         $REG_L  $s6,$FRAMESIZE-8*$SZREG($sp)
423         $REG_L  $s5,$FRAMESIZE-9*$SZREG($sp)
424         $REG_L  $s4,$FRAMESIZE-10*$SZREG($sp)
425 ___
426 $code.=<<___ if ($flavour =~ /nubi/i);
427         $REG_L  \$15,$FRAMESIZE-11*$SZREG($sp)
428         $REG_L  \$14,$FRAMESIZE-12*$SZREG($sp)
429         $REG_L  \$13,$FRAMESIZE-13*$SZREG($sp)
430         $REG_L  \$12,$FRAMESIZE-14*$SZREG($sp)
431         $REG_L  $gp,$FRAMESIZE-15*$SZREG($sp)
432 ___
433 $code.=<<___;
434         jr      $ra
435         $PTR_ADD $sp,$FRAMESIZE
436 .end    AES_encrypt
437 ___
438 \f
439 $code.=<<___;
440 .align  5
441 .ent    _mips_AES_decrypt
442 _mips_AES_decrypt:
443         .frame  $sp,0,$ra
444         .set    reorder
445         lw      $t0,0($key)
446         lw      $t1,4($key)
447         lw      $t2,8($key)
448         lw      $t3,12($key)
449         lw      $cnt,240($key)
450         $PTR_ADD $key0,$key,16
451
452         xor     $s0,$t0
453         xor     $s1,$t1
454         xor     $s2,$t2
455         xor     $s3,$t3
456
457         sub     $cnt,1
458         _xtr    $i0,$s3,16-2
459 .Loop_dec:
460         _xtr    $i1,$s0,16-2
461         _xtr    $i2,$s1,16-2
462         _xtr    $i3,$s2,16-2
463         and     $i0,0x3fc
464         and     $i1,0x3fc
465         and     $i2,0x3fc
466         and     $i3,0x3fc
467         $PTR_ADD $i0,$Tbl
468         $PTR_ADD $i1,$Tbl
469         $PTR_ADD $i2,$Tbl
470         $PTR_ADD $i3,$Tbl
471         lwl     $t0,3($i0)              # Td1[s3>>16]
472         lwl     $t1,3($i1)              # Td1[s0>>16]
473         lwl     $t2,3($i2)              # Td1[s1>>16]
474         lwl     $t3,3($i3)              # Td1[s2>>16]
475         lwr     $t0,2($i0)              # Td1[s3>>16]
476         lwr     $t1,2($i1)              # Td1[s0>>16]
477         lwr     $t2,2($i2)              # Td1[s1>>16]
478         lwr     $t3,2($i3)              # Td1[s2>>16]
479
480         _xtr    $i0,$s2,8-2
481         _xtr    $i1,$s3,8-2
482         _xtr    $i2,$s0,8-2
483         _xtr    $i3,$s1,8-2
484         and     $i0,0x3fc
485         and     $i1,0x3fc
486         and     $i2,0x3fc
487         and     $i3,0x3fc
488         $PTR_ADD $i0,$Tbl
489         $PTR_ADD $i1,$Tbl
490         $PTR_ADD $i2,$Tbl
491         $PTR_ADD $i3,$Tbl
492         lwl     $t4,2($i0)              # Td2[s2>>8]
493         lwl     $t5,2($i1)              # Td2[s3>>8]
494         lwl     $t6,2($i2)              # Td2[s0>>8]
495         lwl     $t7,2($i3)              # Td2[s1>>8]
496         lwr     $t4,1($i0)              # Td2[s2>>8]
497         lwr     $t5,1($i1)              # Td2[s3>>8]
498         lwr     $t6,1($i2)              # Td2[s0>>8]
499         lwr     $t7,1($i3)              # Td2[s1>>8]
500
501         _xtr    $i0,$s1,0-2
502         _xtr    $i1,$s2,0-2
503         _xtr    $i2,$s3,0-2
504         _xtr    $i3,$s0,0-2
505         and     $i0,0x3fc
506         and     $i1,0x3fc
507         and     $i2,0x3fc
508         and     $i3,0x3fc
509         $PTR_ADD $i0,$Tbl
510         $PTR_ADD $i1,$Tbl
511         $PTR_ADD $i2,$Tbl
512         $PTR_ADD $i3,$Tbl
513         lwl     $t8,1($i0)              # Td3[s1]
514         lwl     $t9,1($i1)              # Td3[s2]
515         lwl     $t10,1($i2)             # Td3[s3]
516         lwl     $t11,1($i3)             # Td3[s0]
517         lwr     $t8,0($i0)              # Td3[s1]
518         lwr     $t9,0($i1)              # Td3[s2]
519         lwr     $t10,0($i2)             # Td3[s3]
520         lwr     $t11,0($i3)             # Td3[s0]
521
522         _xtr    $i0,$s0,24-2
523         _xtr    $i1,$s1,24-2
524         _xtr    $i2,$s2,24-2
525         _xtr    $i3,$s3,24-2
526         and     $i0,0x3fc
527         and     $i1,0x3fc
528         and     $i2,0x3fc
529         and     $i3,0x3fc
530         $PTR_ADD $i0,$Tbl
531         $PTR_ADD $i1,$Tbl
532         $PTR_ADD $i2,$Tbl
533         $PTR_ADD $i3,$Tbl
534
535         xor     $t0,$t4
536         xor     $t1,$t5
537         xor     $t2,$t6
538         xor     $t3,$t7
539
540
541         lw      $t4,0($i0)              # Td0[s0>>24]
542         lw      $t5,0($i1)              # Td0[s1>>24]
543         lw      $t6,0($i2)              # Td0[s2>>24]
544         lw      $t7,0($i3)              # Td0[s3>>24]
545
546         lw      $s0,0($key0)
547         lw      $s1,4($key0)
548         lw      $s2,8($key0)
549         lw      $s3,12($key0)
550
551         xor     $t0,$t8
552         xor     $t1,$t9
553         xor     $t2,$t10
554         xor     $t3,$t11
555
556         xor     $t0,$t4
557         xor     $t1,$t5
558         xor     $t2,$t6
559         xor     $t3,$t7
560
561         sub     $cnt,1
562         $PTR_ADD $key0,16
563         xor     $s0,$t0
564         xor     $s1,$t1
565         xor     $s2,$t2
566         xor     $s3,$t3
567         .set    noreorder
568         bnez    $cnt,.Loop_dec
569         _xtr    $i0,$s3,16-2
570
571         .set    reorder
572         lw      $t4,1024($Tbl)          # prefetch Td4
573         lw      $t5,1024+32($Tbl)
574         lw      $t6,1024+64($Tbl)
575         lw      $t7,1024+96($Tbl)
576         lw      $t8,1024+128($Tbl)
577         lw      $t9,1024+160($Tbl)
578         lw      $t10,1024+192($Tbl)
579         lw      $t11,1024+224($Tbl)
580
581         _xtr    $i0,$s3,16
582         _xtr    $i1,$s0,16
583         _xtr    $i2,$s1,16
584         _xtr    $i3,$s2,16
585         and     $i0,0xff
586         and     $i1,0xff
587         and     $i2,0xff
588         and     $i3,0xff
589         $PTR_ADD $i0,$Tbl
590         $PTR_ADD $i1,$Tbl
591         $PTR_ADD $i2,$Tbl
592         $PTR_ADD $i3,$Tbl
593         lbu     $t0,1024($i0)           # Td4[s3>>16]
594         lbu     $t1,1024($i1)           # Td4[s0>>16]
595         lbu     $t2,1024($i2)           # Td4[s1>>16]
596         lbu     $t3,1024($i3)           # Td4[s2>>16]
597
598         _xtr    $i0,$s2,8
599         _xtr    $i1,$s3,8
600         _xtr    $i2,$s0,8
601         _xtr    $i3,$s1,8
602         and     $i0,0xff
603         and     $i1,0xff
604         and     $i2,0xff
605         and     $i3,0xff
606         $PTR_ADD $i0,$Tbl
607         $PTR_ADD $i1,$Tbl
608         $PTR_ADD $i2,$Tbl
609         $PTR_ADD $i3,$Tbl
610         lbu     $t4,1024($i0)           # Td4[s2>>8]
611         lbu     $t5,1024($i1)           # Td4[s3>>8]
612         lbu     $t6,1024($i2)           # Td4[s0>>8]
613         lbu     $t7,1024($i3)           # Td4[s1>>8]
614
615         _xtr    $i0,$s0,24
616         _xtr    $i1,$s1,24
617         _xtr    $i2,$s2,24
618         _xtr    $i3,$s3,24
619         $PTR_ADD $i0,$Tbl
620         $PTR_ADD $i1,$Tbl
621         $PTR_ADD $i2,$Tbl
622         $PTR_ADD $i3,$Tbl
623         lbu     $t8,1024($i0)           # Td4[s0>>24]
624         lbu     $t9,1024($i1)           # Td4[s1>>24]
625         lbu     $t10,1024($i2)          # Td4[s2>>24]
626         lbu     $t11,1024($i3)          # Td4[s3>>24]
627
628         _xtr    $i0,$s1,0
629         _xtr    $i1,$s2,0
630         _xtr    $i2,$s3,0
631         _xtr    $i3,$s0,0
632
633         _ins    $t0,16
634         _ins    $t1,16
635         _ins    $t2,16
636         _ins    $t3,16
637
638         _ins    $t4,8
639         _ins    $t5,8
640         _ins    $t6,8
641         _ins    $t7,8
642
643         xor     $t0,$t4
644         xor     $t1,$t5
645         xor     $t2,$t6
646         xor     $t3,$t7
647
648         $PTR_ADD $i0,$Tbl
649         $PTR_ADD $i1,$Tbl
650         $PTR_ADD $i2,$Tbl
651         $PTR_ADD $i3,$Tbl
652         lbu     $t4,1024($i0)           # Td4[s1]
653         lbu     $t5,1024($i1)           # Td4[s2]
654         lbu     $t6,1024($i2)           # Td4[s3]
655         lbu     $t7,1024($i3)           # Td4[s0]
656
657         _ins    $t8,24
658         _ins    $t9,24
659         _ins    $t10,24
660         _ins    $t11,24
661
662         lw      $s0,0($key0)
663         lw      $s1,4($key0)
664         lw      $s2,8($key0)
665         lw      $s3,12($key0)
666
667         _ins    $t4,0
668         _ins    $t5,0
669         _ins    $t6,0
670         _ins    $t7,0
671
672
673         xor     $t0,$t8
674         xor     $t1,$t9
675         xor     $t2,$t10
676         xor     $t3,$t11
677
678         xor     $t0,$t4
679         xor     $t1,$t5
680         xor     $t2,$t6
681         xor     $t3,$t7
682
683         xor     $s0,$t0
684         xor     $s1,$t1
685         xor     $s2,$t2
686         xor     $s3,$t3
687
688         jr      $ra
689 .end    _mips_AES_decrypt
690
691 .align  5
692 .globl  AES_decrypt
693 .ent    AES_decrypt
694 AES_decrypt:
695         .frame  $sp,$FRAMESIZE,$ra
696         .mask   $SAVED_REGS_MASK,-$SZREG
697         .set    noreorder
698 ___
699 $code.=<<___ if ($flavour =~ /o32/i);   # o32 PIC-ification
700         .cpload $pf
701 ___
702 $code.=<<___;
703         $PTR_SUB $sp,$FRAMESIZE
704         $REG_S  $ra,$FRAMESIZE-1*$SZREG($sp)
705         $REG_S  $fp,$FRAMESIZE-2*$SZREG($sp)
706         $REG_S  $s11,$FRAMESIZE-3*$SZREG($sp)
707         $REG_S  $s10,$FRAMESIZE-4*$SZREG($sp)
708         $REG_S  $s9,$FRAMESIZE-5*$SZREG($sp)
709         $REG_S  $s8,$FRAMESIZE-6*$SZREG($sp)
710         $REG_S  $s7,$FRAMESIZE-7*$SZREG($sp)
711         $REG_S  $s6,$FRAMESIZE-8*$SZREG($sp)
712         $REG_S  $s5,$FRAMESIZE-9*$SZREG($sp)
713         $REG_S  $s4,$FRAMESIZE-10*$SZREG($sp)
714 ___
715 $code.=<<___ if ($flavour =~ /nubi/i);  # optimize non-nubi prologue
716         $REG_S  \$15,$FRAMESIZE-11*$SZREG($sp)
717         $REG_S  \$14,$FRAMESIZE-12*$SZREG($sp)
718         $REG_S  \$13,$FRAMESIZE-13*$SZREG($sp)
719         $REG_S  \$12,$FRAMESIZE-14*$SZREG($sp)
720         $REG_S  $gp,$FRAMESIZE-15*$SZREG($sp)
721 ___
722 $code.=<<___ if ($flavour !~ /o32/i);   # non-o32 PIC-ification
723         .cplocal        $Tbl
724         .cpsetup        $pf,$zero,AES_decrypt
725 ___
726 $code.=<<___;
727         .set    reorder
728         la      $Tbl,AES_Td             # PIC-ified 'load address'
729
730         lwl     $s0,0+$MSB($inp)
731         lwl     $s1,4+$MSB($inp)
732         lwl     $s2,8+$MSB($inp)
733         lwl     $s3,12+$MSB($inp)
734         lwr     $s0,0+$LSB($inp)
735         lwr     $s1,4+$LSB($inp)
736         lwr     $s2,8+$LSB($inp)
737         lwr     $s3,12+$LSB($inp)
738
739         bal     _mips_AES_decrypt
740
741         swr     $s0,0+$LSB($out)
742         swr     $s1,4+$LSB($out)
743         swr     $s2,8+$LSB($out)
744         swr     $s3,12+$LSB($out)
745         swl     $s0,0+$MSB($out)
746         swl     $s1,4+$MSB($out)
747         swl     $s2,8+$MSB($out)
748         swl     $s3,12+$MSB($out)
749
750         .set    noreorder
751         $REG_L  $ra,$FRAMESIZE-1*$SZREG($sp)
752         $REG_L  $fp,$FRAMESIZE-2*$SZREG($sp)
753         $REG_L  $s11,$FRAMESIZE-3*$SZREG($sp)
754         $REG_L  $s10,$FRAMESIZE-4*$SZREG($sp)
755         $REG_L  $s9,$FRAMESIZE-5*$SZREG($sp)
756         $REG_L  $s8,$FRAMESIZE-6*$SZREG($sp)
757         $REG_L  $s7,$FRAMESIZE-7*$SZREG($sp)
758         $REG_L  $s6,$FRAMESIZE-8*$SZREG($sp)
759         $REG_L  $s5,$FRAMESIZE-9*$SZREG($sp)
760         $REG_L  $s4,$FRAMESIZE-10*$SZREG($sp)
761 ___
762 $code.=<<___ if ($flavour =~ /nubi/i);
763         $REG_L  \$15,$FRAMESIZE-11*$SZREG($sp)
764         $REG_L  \$14,$FRAMESIZE-12*$SZREG($sp)
765         $REG_L  \$13,$FRAMESIZE-13*$SZREG($sp)
766         $REG_L  \$12,$FRAMESIZE-14*$SZREG($sp)
767         $REG_L  $gp,$FRAMESIZE-15*$SZREG($sp)
768 ___
769 $code.=<<___;
770         jr      $ra
771         $PTR_ADD $sp,$FRAMESIZE
772 .end    AES_decrypt
773 ___
774 }}}
775 \f
776 {{{
777 my $FRAMESIZE=8*$SZREG;
778 my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc000f008 : 0xc0000000;
779
780 my ($inp,$bits,$key,$Tbl)=($a0,$a1,$a2,$a3);
781 my ($rk0,$rk1,$rk2,$rk3,$rk4,$rk5,$rk6,$rk7)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
782 my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
783 my ($rcon,$cnt)=($gp,$fp);
784
785 $code.=<<___;
786 .align  5
787 .ent    _mips_AES_set_encrypt_key
788 _mips_AES_set_encrypt_key:
789         .frame  $sp,0,$ra
790         .set    noreorder
791         beqz    $inp,.Lekey_done
792         li      $t0,-1
793         beqz    $key,.Lekey_done
794         $PTR_ADD $rcon,$Tbl,1024+256
795
796         .set    reorder
797         lwl     $rk0,0+$MSB($inp)       # load 128 bits
798         lwl     $rk1,4+$MSB($inp)
799         lwl     $rk2,8+$MSB($inp)
800         lwl     $rk3,12+$MSB($inp)
801         li      $at,128
802         lwr     $rk0,0+$LSB($inp)
803         lwr     $rk1,4+$LSB($inp)
804         lwr     $rk2,8+$LSB($inp)
805         lwr     $rk3,12+$LSB($inp)
806         .set    noreorder
807         beq     $bits,$at,.L128bits
808         li      $cnt,10
809
810         .set    reorder
811         lwl     $rk4,16+$MSB($inp)      # load 192 bits
812         lwl     $rk5,20+$MSB($inp)
813         li      $at,192
814         lwr     $rk4,16+$LSB($inp)
815         lwr     $rk5,20+$LSB($inp)
816         .set    noreorder
817         beq     $bits,$at,.L192bits
818         li      $cnt,8
819
820         .set    reorder
821         lwl     $rk6,24+$MSB($inp)      # load 256 bits
822         lwl     $rk7,28+$MSB($inp)
823         li      $at,256
824         lwr     $rk6,24+$LSB($inp)
825         lwr     $rk7,28+$LSB($inp)
826         .set    noreorder
827         beq     $bits,$at,.L256bits
828         li      $cnt,7
829
830         b       .Lekey_done
831         li      $t0,-2
832
833 .align  4
834 .L128bits:
835         .set    reorder
836         srl     $i0,$rk3,16
837         srl     $i1,$rk3,8
838         and     $i0,0xff
839         and     $i1,0xff
840         and     $i2,$rk3,0xff
841         srl     $i3,$rk3,24
842         $PTR_ADD $i0,$Tbl
843         $PTR_ADD $i1,$Tbl
844         $PTR_ADD $i2,$Tbl
845         $PTR_ADD $i3,$Tbl
846         lbu     $i0,1024($i0)
847         lbu     $i1,1024($i1)
848         lbu     $i2,1024($i2)
849         lbu     $i3,1024($i3)
850
851         sw      $rk0,0($key)
852         sw      $rk1,4($key)
853         sw      $rk2,8($key)
854         sw      $rk3,12($key)
855         sub     $cnt,1
856         $PTR_ADD $key,16
857
858         _bias   $i0,24
859         _bias   $i1,16
860         _bias   $i2,8
861         _bias   $i3,0
862
863         xor     $rk0,$i0
864         lw      $i0,0($rcon)
865         xor     $rk0,$i1
866         xor     $rk0,$i2
867         xor     $rk0,$i3
868         xor     $rk0,$i0
869
870         xor     $rk1,$rk0
871         xor     $rk2,$rk1
872         xor     $rk3,$rk2
873
874         .set    noreorder
875         bnez    $cnt,.L128bits
876         $PTR_ADD $rcon,4
877
878         sw      $rk0,0($key)
879         sw      $rk1,4($key)
880         sw      $rk2,8($key)
881         li      $cnt,10
882         sw      $rk3,12($key)
883         li      $t0,0
884         sw      $cnt,80($key)
885         b       .Lekey_done
886         $PTR_SUB $key,10*16
887
888 .align  4
889 .L192bits:
890         .set    reorder
891         srl     $i0,$rk5,16
892         srl     $i1,$rk5,8
893         and     $i0,0xff
894         and     $i1,0xff
895         and     $i2,$rk5,0xff
896         srl     $i3,$rk5,24
897         $PTR_ADD $i0,$Tbl
898         $PTR_ADD $i1,$Tbl
899         $PTR_ADD $i2,$Tbl
900         $PTR_ADD $i3,$Tbl
901         lbu     $i0,1024($i0)
902         lbu     $i1,1024($i1)
903         lbu     $i2,1024($i2)
904         lbu     $i3,1024($i3)
905
906         sw      $rk0,0($key)
907         sw      $rk1,4($key)
908         sw      $rk2,8($key)
909         sw      $rk3,12($key)
910         sw      $rk4,16($key)
911         sw      $rk5,20($key)
912         sub     $cnt,1
913         $PTR_ADD $key,24
914
915         _bias   $i0,24
916         _bias   $i1,16
917         _bias   $i2,8
918         _bias   $i3,0
919
920         xor     $rk0,$i0
921         lw      $i0,0($rcon)
922         xor     $rk0,$i1
923         xor     $rk0,$i2
924         xor     $rk0,$i3
925         xor     $rk0,$i0
926
927         xor     $rk1,$rk0
928         xor     $rk2,$rk1
929         xor     $rk3,$rk2
930         xor     $rk4,$rk3
931         xor     $rk5,$rk4
932
933         .set    noreorder
934         bnez    $cnt,.L192bits
935         $PTR_ADD $rcon,4
936
937         sw      $rk0,0($key)
938         sw      $rk1,4($key)
939         sw      $rk2,8($key)
940         li      $cnt,12
941         sw      $rk3,12($key)
942         li      $t0,0
943         sw      $cnt,48($key)
944         b       .Lekey_done
945         $PTR_SUB $key,12*16
946
947 .align  4
948 .L256bits:
949         .set    reorder
950         srl     $i0,$rk7,16
951         srl     $i1,$rk7,8
952         and     $i0,0xff
953         and     $i1,0xff
954         and     $i2,$rk7,0xff
955         srl     $i3,$rk7,24
956         $PTR_ADD $i0,$Tbl
957         $PTR_ADD $i1,$Tbl
958         $PTR_ADD $i2,$Tbl
959         $PTR_ADD $i3,$Tbl
960         lbu     $i0,1024($i0)
961         lbu     $i1,1024($i1)
962         lbu     $i2,1024($i2)
963         lbu     $i3,1024($i3)
964
965         sw      $rk0,0($key)
966         sw      $rk1,4($key)
967         sw      $rk2,8($key)
968         sw      $rk3,12($key)
969         sw      $rk4,16($key)
970         sw      $rk5,20($key)
971         sw      $rk6,24($key)
972         sw      $rk7,28($key)
973         sub     $cnt,1
974
975         _bias   $i0,24
976         _bias   $i1,16
977         _bias   $i2,8
978         _bias   $i3,0
979
980         xor     $rk0,$i0
981         lw      $i0,0($rcon)
982         xor     $rk0,$i1
983         xor     $rk0,$i2
984         xor     $rk0,$i3
985         xor     $rk0,$i0
986
987         xor     $rk1,$rk0
988         xor     $rk2,$rk1
989         xor     $rk3,$rk2
990         beqz    $cnt,.L256bits_done
991
992         srl     $i0,$rk3,24
993         srl     $i1,$rk3,16
994         srl     $i2,$rk3,8
995         and     $i3,$rk3,0xff
996         and     $i1,0xff
997         and     $i2,0xff
998         $PTR_ADD $i0,$Tbl
999         $PTR_ADD $i1,$Tbl
1000         $PTR_ADD $i2,$Tbl
1001         $PTR_ADD $i3,$Tbl
1002         lbu     $i0,1024($i0)
1003         lbu     $i1,1024($i1)
1004         lbu     $i2,1024($i2)
1005         lbu     $i3,1024($i3)
1006         sll     $i0,24
1007         sll     $i1,16
1008         sll     $i2,8
1009
1010         xor     $rk4,$i0
1011         xor     $rk4,$i1
1012         xor     $rk4,$i2
1013         xor     $rk4,$i3
1014
1015         xor     $rk5,$rk4
1016         xor     $rk6,$rk5
1017         xor     $rk7,$rk6
1018
1019         $PTR_ADD $key,32
1020         .set    noreorder
1021         b       .L256bits
1022         $PTR_ADD $rcon,4
1023
1024 .L256bits_done:
1025         sw      $rk0,32($key)
1026         sw      $rk1,36($key)
1027         sw      $rk2,40($key)
1028         li      $cnt,14
1029         sw      $rk3,44($key)
1030         li      $t0,0
1031         sw      $cnt,48($key)
1032         $PTR_SUB $key,12*16
1033
1034 .Lekey_done:
1035         jr      $ra
1036         nop
1037 .end    _mips_AES_set_encrypt_key
1038
1039 .globl  private_AES_set_encrypt_key
1040 .ent    private_AES_set_encrypt_key
1041 private_AES_set_encrypt_key:
1042         .frame  $sp,$FRAMESIZE,$ra
1043         .mask   $SAVED_REGS_MASK,-$SZREG
1044         .set    noreorder
1045 ___
1046 $code.=<<___ if ($flavour =~ /o32/i);   # o32 PIC-ification
1047         .cpload $pf
1048 ___
1049 $code.=<<___;
1050         $PTR_SUB $sp,$FRAMESIZE
1051         $REG_S  $ra,$FRAMESIZE-1*$SZREG($sp)
1052         $REG_S  $fp,$FRAMESIZE-2*$SZREG($sp)
1053 ___
1054 $code.=<<___ if ($flavour =~ /nubi/i);  # optimize non-nubi prologue
1055         $REG_S  $s3,$FRAMESIZE-3*$SZREG($sp)
1056         $REG_S  $s2,$FRAMESIZE-4*$SZREG($sp)
1057         $REG_S  $s1,$FRAMESIZE-5*$SZREG($sp)
1058         $REG_S  $s0,$FRAMESIZE-6*$SZREG($sp)
1059         $REG_S  $gp,$FRAMESIZE-7*$SZREG($sp)
1060 ___
1061 $code.=<<___ if ($flavour !~ /o32/i);   # non-o32 PIC-ification
1062         .cplocal        $Tbl
1063         .cpsetup        $pf,$zero,private_AES_set_encrypt_key
1064 ___
1065 $code.=<<___;
1066         .set    reorder
1067         la      $Tbl,AES_Te             # PIC-ified 'load address'
1068
1069         bal     _mips_AES_set_encrypt_key
1070
1071         .set    noreorder
1072         move    $a0,$t0
1073         $REG_L  $ra,$FRAMESIZE-1*$SZREG($sp)
1074         $REG_L  $fp,$FRAMESIZE-2*$SZREG($sp)
1075 ___
1076 $code.=<<___ if ($flavour =~ /nubi/i);
1077         $REG_L  $s3,$FRAMESIZE-11*$SZREG($sp)
1078         $REG_L  $s2,$FRAMESIZE-12*$SZREG($sp)
1079         $REG_L  $s1,$FRAMESIZE-13*$SZREG($sp)
1080         $REG_L  $s0,$FRAMESIZE-14*$SZREG($sp)
1081         $REG_L  $gp,$FRAMESIZE-15*$SZREG($sp)
1082 ___
1083 $code.=<<___;
1084         jr      $ra
1085         $PTR_ADD $sp,$FRAMESIZE
1086 .end    private_AES_set_encrypt_key
1087 ___
1088 \f
1089 my ($head,$tail)=($inp,$bits);
1090 my ($tp1,$tp2,$tp4,$tp8,$tp9,$tpb,$tpd,$tpe)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
1091 my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2);
1092 $code.=<<___;
1093 .align  5
1094 .globl  private_AES_set_decrypt_key
1095 .ent    private_AES_set_decrypt_key
1096 private_AES_set_decrypt_key:
1097         .frame  $sp,$FRAMESIZE,$ra
1098         .mask   $SAVED_REGS_MASK,-$SZREG
1099         .set    noreorder
1100 ___
1101 $code.=<<___ if ($flavour =~ /o32/i);   # o32 PIC-ification
1102         .cpload $pf
1103 ___
1104 $code.=<<___;
1105         $PTR_SUB $sp,$FRAMESIZE
1106         $REG_S  $ra,$FRAMESIZE-1*$SZREG($sp)
1107         $REG_S  $fp,$FRAMESIZE-2*$SZREG($sp)
1108 ___
1109 $code.=<<___ if ($flavour =~ /nubi/i);  # optimize non-nubi prologue
1110         $REG_S  $s3,$FRAMESIZE-3*$SZREG($sp)
1111         $REG_S  $s2,$FRAMESIZE-4*$SZREG($sp)
1112         $REG_S  $s1,$FRAMESIZE-5*$SZREG($sp)
1113         $REG_S  $s0,$FRAMESIZE-6*$SZREG($sp)
1114         $REG_S  $gp,$FRAMESIZE-7*$SZREG($sp)
1115 ___
1116 $code.=<<___ if ($flavour !~ /o32/i);   # non-o32 PIC-ification
1117         .cplocal        $Tbl
1118         .cpsetup        $pf,$zero,private_AES_set_decrypt_key
1119 ___
1120 $code.=<<___;
1121         .set    reorder
1122         la      $Tbl,AES_Te             # PIC-ified 'load address'
1123
1124         bal     _mips_AES_set_encrypt_key
1125
1126         bltz    $t0,.Ldkey_done
1127
1128         sll     $at,$cnt,4
1129         $PTR_ADD $head,$key,0
1130         $PTR_ADD $tail,$key,$at
1131 .align  4
1132 .Lswap:
1133         lw      $rk0,0($head)
1134         lw      $rk1,4($head)
1135         lw      $rk2,8($head)
1136         lw      $rk3,12($head)
1137         lw      $rk4,0($tail)
1138         lw      $rk5,4($tail)
1139         lw      $rk6,8($tail)
1140         lw      $rk7,12($tail)
1141         sw      $rk0,0($tail)
1142         sw      $rk1,4($tail)
1143         sw      $rk2,8($tail)
1144         sw      $rk3,12($tail)
1145         $PTR_ADD $head,16
1146         $PTR_SUB $tail,16
1147         sw      $rk4,-16($head)
1148         sw      $rk5,-12($head)
1149         sw      $rk6,-8($head)
1150         sw      $rk7,-4($head)
1151         bne     $head,$tail,.Lswap
1152
1153         lw      $tp1,16($key)           # modulo-scheduled
1154         lui     $x80808080,0x8080
1155         sub     $cnt,1
1156         or      $x80808080,0x8080
1157         sll     $cnt,2
1158         $PTR_ADD $key,16
1159         lui     $x1b1b1b1b,0x1b1b
1160         nor     $x7f7f7f7f,$zero,$x80808080
1161         or      $x1b1b1b1b,0x1b1b
1162 .align  4
1163 .Lmix:
1164         and     $m,$tp1,$x80808080
1165         and     $tp2,$tp1,$x7f7f7f7f
1166         srl     $tp4,$m,7
1167         addu    $tp2,$tp2               # tp2<<1
1168         subu    $m,$tp4
1169         and     $m,$x1b1b1b1b
1170         xor     $tp2,$m
1171
1172         and     $m,$tp2,$x80808080
1173         and     $tp4,$tp2,$x7f7f7f7f
1174         srl     $tp8,$m,7
1175         addu    $tp4,$tp4               # tp4<<1
1176         subu    $m,$tp8
1177         and     $m,$x1b1b1b1b
1178         xor     $tp4,$m
1179
1180         and     $m,$tp4,$x80808080
1181         and     $tp8,$tp4,$x7f7f7f7f
1182         srl     $tp9,$m,7
1183         addu    $tp8,$tp8               # tp8<<1
1184         subu    $m,$tp9
1185         and     $m,$x1b1b1b1b
1186         xor     $tp8,$m
1187
1188         xor     $tp9,$tp8,$tp1
1189         xor     $tpe,$tp8,$tp4
1190         xor     $tpb,$tp9,$tp2
1191         xor     $tpd,$tp9,$tp4
1192
1193         _ror    $tp1,$tpd,16
1194          xor    $tpe,$tp2
1195         _ror    $tp2,$tpd,-16
1196         xor     $tpe,$tp1
1197         _ror    $tp1,$tp9,8
1198         xor     $tpe,$tp2
1199         _ror    $tp2,$tp9,-24
1200         xor     $tpe,$tp1
1201         _ror    $tp1,$tpb,24
1202         xor     $tpe,$tp2
1203         _ror    $tp2,$tpb,-8
1204         xor     $tpe,$tp1
1205         lw      $tp1,4($key)            # modulo-scheduled
1206         xor     $tpe,$tp2
1207         sub     $cnt,1
1208         sw      $tpe,0($key)
1209         $PTR_ADD $key,4
1210         bnez    $cnt,.Lmix
1211
1212         li      $t0,0
1213 .Ldkey_done:
1214         .set    noreorder
1215         move    $a0,$t0
1216         $REG_L  $ra,$FRAMESIZE-1*$SZREG($sp)
1217         $REG_L  $fp,$FRAMESIZE-2*$SZREG($sp)
1218 ___
1219 $code.=<<___ if ($flavour =~ /nubi/i);
1220         $REG_L  $s3,$FRAMESIZE-11*$SZREG($sp)
1221         $REG_L  $s2,$FRAMESIZE-12*$SZREG($sp)
1222         $REG_L  $s1,$FRAMESIZE-13*$SZREG($sp)
1223         $REG_L  $s0,$FRAMESIZE-14*$SZREG($sp)
1224         $REG_L  $gp,$FRAMESIZE-15*$SZREG($sp)
1225 ___
1226 $code.=<<___;
1227         jr      $ra
1228         $PTR_ADD $sp,$FRAMESIZE
1229 .end    private_AES_set_decrypt_key
1230 ___
1231 }}}
1232
1233 ######################################################################
1234 # Tables are kept in endian-neutral manner
1235 $code.=<<___;
1236 .rdata
1237 .align  6
1238 AES_Te:
1239 .byte   0xc6,0x63,0x63,0xa5,    0xf8,0x7c,0x7c,0x84     # Te0
1240 .byte   0xee,0x77,0x77,0x99,    0xf6,0x7b,0x7b,0x8d
1241 .byte   0xff,0xf2,0xf2,0x0d,    0xd6,0x6b,0x6b,0xbd
1242 .byte   0xde,0x6f,0x6f,0xb1,    0x91,0xc5,0xc5,0x54
1243 .byte   0x60,0x30,0x30,0x50,    0x02,0x01,0x01,0x03
1244 .byte   0xce,0x67,0x67,0xa9,    0x56,0x2b,0x2b,0x7d
1245 .byte   0xe7,0xfe,0xfe,0x19,    0xb5,0xd7,0xd7,0x62
1246 .byte   0x4d,0xab,0xab,0xe6,    0xec,0x76,0x76,0x9a
1247 .byte   0x8f,0xca,0xca,0x45,    0x1f,0x82,0x82,0x9d
1248 .byte   0x89,0xc9,0xc9,0x40,    0xfa,0x7d,0x7d,0x87
1249 .byte   0xef,0xfa,0xfa,0x15,    0xb2,0x59,0x59,0xeb
1250 .byte   0x8e,0x47,0x47,0xc9,    0xfb,0xf0,0xf0,0x0b
1251 .byte   0x41,0xad,0xad,0xec,    0xb3,0xd4,0xd4,0x67
1252 .byte   0x5f,0xa2,0xa2,0xfd,    0x45,0xaf,0xaf,0xea
1253 .byte   0x23,0x9c,0x9c,0xbf,    0x53,0xa4,0xa4,0xf7
1254 .byte   0xe4,0x72,0x72,0x96,    0x9b,0xc0,0xc0,0x5b
1255 .byte   0x75,0xb7,0xb7,0xc2,    0xe1,0xfd,0xfd,0x1c
1256 .byte   0x3d,0x93,0x93,0xae,    0x4c,0x26,0x26,0x6a
1257 .byte   0x6c,0x36,0x36,0x5a,    0x7e,0x3f,0x3f,0x41
1258 .byte   0xf5,0xf7,0xf7,0x02,    0x83,0xcc,0xcc,0x4f
1259 .byte   0x68,0x34,0x34,0x5c,    0x51,0xa5,0xa5,0xf4
1260 .byte   0xd1,0xe5,0xe5,0x34,    0xf9,0xf1,0xf1,0x08
1261 .byte   0xe2,0x71,0x71,0x93,    0xab,0xd8,0xd8,0x73
1262 .byte   0x62,0x31,0x31,0x53,    0x2a,0x15,0x15,0x3f
1263 .byte   0x08,0x04,0x04,0x0c,    0x95,0xc7,0xc7,0x52
1264 .byte   0x46,0x23,0x23,0x65,    0x9d,0xc3,0xc3,0x5e
1265 .byte   0x30,0x18,0x18,0x28,    0x37,0x96,0x96,0xa1
1266 .byte   0x0a,0x05,0x05,0x0f,    0x2f,0x9a,0x9a,0xb5
1267 .byte   0x0e,0x07,0x07,0x09,    0x24,0x12,0x12,0x36
1268 .byte   0x1b,0x80,0x80,0x9b,    0xdf,0xe2,0xe2,0x3d
1269 .byte   0xcd,0xeb,0xeb,0x26,    0x4e,0x27,0x27,0x69
1270 .byte   0x7f,0xb2,0xb2,0xcd,    0xea,0x75,0x75,0x9f
1271 .byte   0x12,0x09,0x09,0x1b,    0x1d,0x83,0x83,0x9e
1272 .byte   0x58,0x2c,0x2c,0x74,    0x34,0x1a,0x1a,0x2e
1273 .byte   0x36,0x1b,0x1b,0x2d,    0xdc,0x6e,0x6e,0xb2
1274 .byte   0xb4,0x5a,0x5a,0xee,    0x5b,0xa0,0xa0,0xfb
1275 .byte   0xa4,0x52,0x52,0xf6,    0x76,0x3b,0x3b,0x4d
1276 .byte   0xb7,0xd6,0xd6,0x61,    0x7d,0xb3,0xb3,0xce
1277 .byte   0x52,0x29,0x29,0x7b,    0xdd,0xe3,0xe3,0x3e
1278 .byte   0x5e,0x2f,0x2f,0x71,    0x13,0x84,0x84,0x97
1279 .byte   0xa6,0x53,0x53,0xf5,    0xb9,0xd1,0xd1,0x68
1280 .byte   0x00,0x00,0x00,0x00,    0xc1,0xed,0xed,0x2c
1281 .byte   0x40,0x20,0x20,0x60,    0xe3,0xfc,0xfc,0x1f
1282 .byte   0x79,0xb1,0xb1,0xc8,    0xb6,0x5b,0x5b,0xed
1283 .byte   0xd4,0x6a,0x6a,0xbe,    0x8d,0xcb,0xcb,0x46
1284 .byte   0x67,0xbe,0xbe,0xd9,    0x72,0x39,0x39,0x4b
1285 .byte   0x94,0x4a,0x4a,0xde,    0x98,0x4c,0x4c,0xd4
1286 .byte   0xb0,0x58,0x58,0xe8,    0x85,0xcf,0xcf,0x4a
1287 .byte   0xbb,0xd0,0xd0,0x6b,    0xc5,0xef,0xef,0x2a
1288 .byte   0x4f,0xaa,0xaa,0xe5,    0xed,0xfb,0xfb,0x16
1289 .byte   0x86,0x43,0x43,0xc5,    0x9a,0x4d,0x4d,0xd7
1290 .byte   0x66,0x33,0x33,0x55,    0x11,0x85,0x85,0x94
1291 .byte   0x8a,0x45,0x45,0xcf,    0xe9,0xf9,0xf9,0x10
1292 .byte   0x04,0x02,0x02,0x06,    0xfe,0x7f,0x7f,0x81
1293 .byte   0xa0,0x50,0x50,0xf0,    0x78,0x3c,0x3c,0x44
1294 .byte   0x25,0x9f,0x9f,0xba,    0x4b,0xa8,0xa8,0xe3
1295 .byte   0xa2,0x51,0x51,0xf3,    0x5d,0xa3,0xa3,0xfe
1296 .byte   0x80,0x40,0x40,0xc0,    0x05,0x8f,0x8f,0x8a
1297 .byte   0x3f,0x92,0x92,0xad,    0x21,0x9d,0x9d,0xbc
1298 .byte   0x70,0x38,0x38,0x48,    0xf1,0xf5,0xf5,0x04
1299 .byte   0x63,0xbc,0xbc,0xdf,    0x77,0xb6,0xb6,0xc1
1300 .byte   0xaf,0xda,0xda,0x75,    0x42,0x21,0x21,0x63
1301 .byte   0x20,0x10,0x10,0x30,    0xe5,0xff,0xff,0x1a
1302 .byte   0xfd,0xf3,0xf3,0x0e,    0xbf,0xd2,0xd2,0x6d
1303 .byte   0x81,0xcd,0xcd,0x4c,    0x18,0x0c,0x0c,0x14
1304 .byte   0x26,0x13,0x13,0x35,    0xc3,0xec,0xec,0x2f
1305 .byte   0xbe,0x5f,0x5f,0xe1,    0x35,0x97,0x97,0xa2
1306 .byte   0x88,0x44,0x44,0xcc,    0x2e,0x17,0x17,0x39
1307 .byte   0x93,0xc4,0xc4,0x57,    0x55,0xa7,0xa7,0xf2
1308 .byte   0xfc,0x7e,0x7e,0x82,    0x7a,0x3d,0x3d,0x47
1309 .byte   0xc8,0x64,0x64,0xac,    0xba,0x5d,0x5d,0xe7
1310 .byte   0x32,0x19,0x19,0x2b,    0xe6,0x73,0x73,0x95
1311 .byte   0xc0,0x60,0x60,0xa0,    0x19,0x81,0x81,0x98
1312 .byte   0x9e,0x4f,0x4f,0xd1,    0xa3,0xdc,0xdc,0x7f
1313 .byte   0x44,0x22,0x22,0x66,    0x54,0x2a,0x2a,0x7e
1314 .byte   0x3b,0x90,0x90,0xab,    0x0b,0x88,0x88,0x83
1315 .byte   0x8c,0x46,0x46,0xca,    0xc7,0xee,0xee,0x29
1316 .byte   0x6b,0xb8,0xb8,0xd3,    0x28,0x14,0x14,0x3c
1317 .byte   0xa7,0xde,0xde,0x79,    0xbc,0x5e,0x5e,0xe2
1318 .byte   0x16,0x0b,0x0b,0x1d,    0xad,0xdb,0xdb,0x76
1319 .byte   0xdb,0xe0,0xe0,0x3b,    0x64,0x32,0x32,0x56
1320 .byte   0x74,0x3a,0x3a,0x4e,    0x14,0x0a,0x0a,0x1e
1321 .byte   0x92,0x49,0x49,0xdb,    0x0c,0x06,0x06,0x0a
1322 .byte   0x48,0x24,0x24,0x6c,    0xb8,0x5c,0x5c,0xe4
1323 .byte   0x9f,0xc2,0xc2,0x5d,    0xbd,0xd3,0xd3,0x6e
1324 .byte   0x43,0xac,0xac,0xef,    0xc4,0x62,0x62,0xa6
1325 .byte   0x39,0x91,0x91,0xa8,    0x31,0x95,0x95,0xa4
1326 .byte   0xd3,0xe4,0xe4,0x37,    0xf2,0x79,0x79,0x8b
1327 .byte   0xd5,0xe7,0xe7,0x32,    0x8b,0xc8,0xc8,0x43
1328 .byte   0x6e,0x37,0x37,0x59,    0xda,0x6d,0x6d,0xb7
1329 .byte   0x01,0x8d,0x8d,0x8c,    0xb1,0xd5,0xd5,0x64
1330 .byte   0x9c,0x4e,0x4e,0xd2,    0x49,0xa9,0xa9,0xe0
1331 .byte   0xd8,0x6c,0x6c,0xb4,    0xac,0x56,0x56,0xfa
1332 .byte   0xf3,0xf4,0xf4,0x07,    0xcf,0xea,0xea,0x25
1333 .byte   0xca,0x65,0x65,0xaf,    0xf4,0x7a,0x7a,0x8e
1334 .byte   0x47,0xae,0xae,0xe9,    0x10,0x08,0x08,0x18
1335 .byte   0x6f,0xba,0xba,0xd5,    0xf0,0x78,0x78,0x88
1336 .byte   0x4a,0x25,0x25,0x6f,    0x5c,0x2e,0x2e,0x72
1337 .byte   0x38,0x1c,0x1c,0x24,    0x57,0xa6,0xa6,0xf1
1338 .byte   0x73,0xb4,0xb4,0xc7,    0x97,0xc6,0xc6,0x51
1339 .byte   0xcb,0xe8,0xe8,0x23,    0xa1,0xdd,0xdd,0x7c
1340 .byte   0xe8,0x74,0x74,0x9c,    0x3e,0x1f,0x1f,0x21
1341 .byte   0x96,0x4b,0x4b,0xdd,    0x61,0xbd,0xbd,0xdc
1342 .byte   0x0d,0x8b,0x8b,0x86,    0x0f,0x8a,0x8a,0x85
1343 .byte   0xe0,0x70,0x70,0x90,    0x7c,0x3e,0x3e,0x42
1344 .byte   0x71,0xb5,0xb5,0xc4,    0xcc,0x66,0x66,0xaa
1345 .byte   0x90,0x48,0x48,0xd8,    0x06,0x03,0x03,0x05
1346 .byte   0xf7,0xf6,0xf6,0x01,    0x1c,0x0e,0x0e,0x12
1347 .byte   0xc2,0x61,0x61,0xa3,    0x6a,0x35,0x35,0x5f
1348 .byte   0xae,0x57,0x57,0xf9,    0x69,0xb9,0xb9,0xd0
1349 .byte   0x17,0x86,0x86,0x91,    0x99,0xc1,0xc1,0x58
1350 .byte   0x3a,0x1d,0x1d,0x27,    0x27,0x9e,0x9e,0xb9
1351 .byte   0xd9,0xe1,0xe1,0x38,    0xeb,0xf8,0xf8,0x13
1352 .byte   0x2b,0x98,0x98,0xb3,    0x22,0x11,0x11,0x33
1353 .byte   0xd2,0x69,0x69,0xbb,    0xa9,0xd9,0xd9,0x70
1354 .byte   0x07,0x8e,0x8e,0x89,    0x33,0x94,0x94,0xa7
1355 .byte   0x2d,0x9b,0x9b,0xb6,    0x3c,0x1e,0x1e,0x22
1356 .byte   0x15,0x87,0x87,0x92,    0xc9,0xe9,0xe9,0x20
1357 .byte   0x87,0xce,0xce,0x49,    0xaa,0x55,0x55,0xff
1358 .byte   0x50,0x28,0x28,0x78,    0xa5,0xdf,0xdf,0x7a
1359 .byte   0x03,0x8c,0x8c,0x8f,    0x59,0xa1,0xa1,0xf8
1360 .byte   0x09,0x89,0x89,0x80,    0x1a,0x0d,0x0d,0x17
1361 .byte   0x65,0xbf,0xbf,0xda,    0xd7,0xe6,0xe6,0x31
1362 .byte   0x84,0x42,0x42,0xc6,    0xd0,0x68,0x68,0xb8
1363 .byte   0x82,0x41,0x41,0xc3,    0x29,0x99,0x99,0xb0
1364 .byte   0x5a,0x2d,0x2d,0x77,    0x1e,0x0f,0x0f,0x11
1365 .byte   0x7b,0xb0,0xb0,0xcb,    0xa8,0x54,0x54,0xfc
1366 .byte   0x6d,0xbb,0xbb,0xd6,    0x2c,0x16,0x16,0x3a
1367
1368 .byte   0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5  # Te4
1369 .byte   0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
1370 .byte   0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
1371 .byte   0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
1372 .byte   0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
1373 .byte   0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
1374 .byte   0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
1375 .byte   0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
1376 .byte   0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
1377 .byte   0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
1378 .byte   0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
1379 .byte   0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
1380 .byte   0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
1381 .byte   0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
1382 .byte   0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
1383 .byte   0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
1384 .byte   0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
1385 .byte   0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
1386 .byte   0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
1387 .byte   0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
1388 .byte   0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
1389 .byte   0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
1390 .byte   0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
1391 .byte   0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
1392 .byte   0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
1393 .byte   0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
1394 .byte   0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
1395 .byte   0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
1396 .byte   0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
1397 .byte   0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
1398 .byte   0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
1399 .byte   0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
1400
1401 .byte   0x01,0x00,0x00,0x00,    0x02,0x00,0x00,0x00     # rcon
1402 .byte   0x04,0x00,0x00,0x00,    0x08,0x00,0x00,0x00
1403 .byte   0x10,0x00,0x00,0x00,    0x20,0x00,0x00,0x00
1404 .byte   0x40,0x00,0x00,0x00,    0x80,0x00,0x00,0x00
1405 .byte   0x1B,0x00,0x00,0x00,    0x36,0x00,0x00,0x00
1406
1407 .align  6
1408 AES_Td:
1409 .byte   0x51,0xf4,0xa7,0x50,    0x7e,0x41,0x65,0x53     # Td0
1410 .byte   0x1a,0x17,0xa4,0xc3,    0x3a,0x27,0x5e,0x96
1411 .byte   0x3b,0xab,0x6b,0xcb,    0x1f,0x9d,0x45,0xf1
1412 .byte   0xac,0xfa,0x58,0xab,    0x4b,0xe3,0x03,0x93
1413 .byte   0x20,0x30,0xfa,0x55,    0xad,0x76,0x6d,0xf6
1414 .byte   0x88,0xcc,0x76,0x91,    0xf5,0x02,0x4c,0x25
1415 .byte   0x4f,0xe5,0xd7,0xfc,    0xc5,0x2a,0xcb,0xd7
1416 .byte   0x26,0x35,0x44,0x80,    0xb5,0x62,0xa3,0x8f
1417 .byte   0xde,0xb1,0x5a,0x49,    0x25,0xba,0x1b,0x67
1418 .byte   0x45,0xea,0x0e,0x98,    0x5d,0xfe,0xc0,0xe1
1419 .byte   0xc3,0x2f,0x75,0x02,    0x81,0x4c,0xf0,0x12
1420 .byte   0x8d,0x46,0x97,0xa3,    0x6b,0xd3,0xf9,0xc6
1421 .byte   0x03,0x8f,0x5f,0xe7,    0x15,0x92,0x9c,0x95
1422 .byte   0xbf,0x6d,0x7a,0xeb,    0x95,0x52,0x59,0xda
1423 .byte   0xd4,0xbe,0x83,0x2d,    0x58,0x74,0x21,0xd3
1424 .byte   0x49,0xe0,0x69,0x29,    0x8e,0xc9,0xc8,0x44
1425 .byte   0x75,0xc2,0x89,0x6a,    0xf4,0x8e,0x79,0x78
1426 .byte   0x99,0x58,0x3e,0x6b,    0x27,0xb9,0x71,0xdd
1427 .byte   0xbe,0xe1,0x4f,0xb6,    0xf0,0x88,0xad,0x17
1428 .byte   0xc9,0x20,0xac,0x66,    0x7d,0xce,0x3a,0xb4
1429 .byte   0x63,0xdf,0x4a,0x18,    0xe5,0x1a,0x31,0x82
1430 .byte   0x97,0x51,0x33,0x60,    0x62,0x53,0x7f,0x45
1431 .byte   0xb1,0x64,0x77,0xe0,    0xbb,0x6b,0xae,0x84
1432 .byte   0xfe,0x81,0xa0,0x1c,    0xf9,0x08,0x2b,0x94
1433 .byte   0x70,0x48,0x68,0x58,    0x8f,0x45,0xfd,0x19
1434 .byte   0x94,0xde,0x6c,0x87,    0x52,0x7b,0xf8,0xb7
1435 .byte   0xab,0x73,0xd3,0x23,    0x72,0x4b,0x02,0xe2
1436 .byte   0xe3,0x1f,0x8f,0x57,    0x66,0x55,0xab,0x2a
1437 .byte   0xb2,0xeb,0x28,0x07,    0x2f,0xb5,0xc2,0x03
1438 .byte   0x86,0xc5,0x7b,0x9a,    0xd3,0x37,0x08,0xa5
1439 .byte   0x30,0x28,0x87,0xf2,    0x23,0xbf,0xa5,0xb2
1440 .byte   0x02,0x03,0x6a,0xba,    0xed,0x16,0x82,0x5c
1441 .byte   0x8a,0xcf,0x1c,0x2b,    0xa7,0x79,0xb4,0x92
1442 .byte   0xf3,0x07,0xf2,0xf0,    0x4e,0x69,0xe2,0xa1
1443 .byte   0x65,0xda,0xf4,0xcd,    0x06,0x05,0xbe,0xd5
1444 .byte   0xd1,0x34,0x62,0x1f,    0xc4,0xa6,0xfe,0x8a
1445 .byte   0x34,0x2e,0x53,0x9d,    0xa2,0xf3,0x55,0xa0
1446 .byte   0x05,0x8a,0xe1,0x32,    0xa4,0xf6,0xeb,0x75
1447 .byte   0x0b,0x83,0xec,0x39,    0x40,0x60,0xef,0xaa
1448 .byte   0x5e,0x71,0x9f,0x06,    0xbd,0x6e,0x10,0x51
1449 .byte   0x3e,0x21,0x8a,0xf9,    0x96,0xdd,0x06,0x3d
1450 .byte   0xdd,0x3e,0x05,0xae,    0x4d,0xe6,0xbd,0x46
1451 .byte   0x91,0x54,0x8d,0xb5,    0x71,0xc4,0x5d,0x05
1452 .byte   0x04,0x06,0xd4,0x6f,    0x60,0x50,0x15,0xff
1453 .byte   0x19,0x98,0xfb,0x24,    0xd6,0xbd,0xe9,0x97
1454 .byte   0x89,0x40,0x43,0xcc,    0x67,0xd9,0x9e,0x77
1455 .byte   0xb0,0xe8,0x42,0xbd,    0x07,0x89,0x8b,0x88
1456 .byte   0xe7,0x19,0x5b,0x38,    0x79,0xc8,0xee,0xdb
1457 .byte   0xa1,0x7c,0x0a,0x47,    0x7c,0x42,0x0f,0xe9
1458 .byte   0xf8,0x84,0x1e,0xc9,    0x00,0x00,0x00,0x00
1459 .byte   0x09,0x80,0x86,0x83,    0x32,0x2b,0xed,0x48
1460 .byte   0x1e,0x11,0x70,0xac,    0x6c,0x5a,0x72,0x4e
1461 .byte   0xfd,0x0e,0xff,0xfb,    0x0f,0x85,0x38,0x56
1462 .byte   0x3d,0xae,0xd5,0x1e,    0x36,0x2d,0x39,0x27
1463 .byte   0x0a,0x0f,0xd9,0x64,    0x68,0x5c,0xa6,0x21
1464 .byte   0x9b,0x5b,0x54,0xd1,    0x24,0x36,0x2e,0x3a
1465 .byte   0x0c,0x0a,0x67,0xb1,    0x93,0x57,0xe7,0x0f
1466 .byte   0xb4,0xee,0x96,0xd2,    0x1b,0x9b,0x91,0x9e
1467 .byte   0x80,0xc0,0xc5,0x4f,    0x61,0xdc,0x20,0xa2
1468 .byte   0x5a,0x77,0x4b,0x69,    0x1c,0x12,0x1a,0x16
1469 .byte   0xe2,0x93,0xba,0x0a,    0xc0,0xa0,0x2a,0xe5
1470 .byte   0x3c,0x22,0xe0,0x43,    0x12,0x1b,0x17,0x1d
1471 .byte   0x0e,0x09,0x0d,0x0b,    0xf2,0x8b,0xc7,0xad
1472 .byte   0x2d,0xb6,0xa8,0xb9,    0x14,0x1e,0xa9,0xc8
1473 .byte   0x57,0xf1,0x19,0x85,    0xaf,0x75,0x07,0x4c
1474 .byte   0xee,0x99,0xdd,0xbb,    0xa3,0x7f,0x60,0xfd
1475 .byte   0xf7,0x01,0x26,0x9f,    0x5c,0x72,0xf5,0xbc
1476 .byte   0x44,0x66,0x3b,0xc5,    0x5b,0xfb,0x7e,0x34
1477 .byte   0x8b,0x43,0x29,0x76,    0xcb,0x23,0xc6,0xdc
1478 .byte   0xb6,0xed,0xfc,0x68,    0xb8,0xe4,0xf1,0x63
1479 .byte   0xd7,0x31,0xdc,0xca,    0x42,0x63,0x85,0x10
1480 .byte   0x13,0x97,0x22,0x40,    0x84,0xc6,0x11,0x20
1481 .byte   0x85,0x4a,0x24,0x7d,    0xd2,0xbb,0x3d,0xf8
1482 .byte   0xae,0xf9,0x32,0x11,    0xc7,0x29,0xa1,0x6d
1483 .byte   0x1d,0x9e,0x2f,0x4b,    0xdc,0xb2,0x30,0xf3
1484 .byte   0x0d,0x86,0x52,0xec,    0x77,0xc1,0xe3,0xd0
1485 .byte   0x2b,0xb3,0x16,0x6c,    0xa9,0x70,0xb9,0x99
1486 .byte   0x11,0x94,0x48,0xfa,    0x47,0xe9,0x64,0x22
1487 .byte   0xa8,0xfc,0x8c,0xc4,    0xa0,0xf0,0x3f,0x1a
1488 .byte   0x56,0x7d,0x2c,0xd8,    0x22,0x33,0x90,0xef
1489 .byte   0x87,0x49,0x4e,0xc7,    0xd9,0x38,0xd1,0xc1
1490 .byte   0x8c,0xca,0xa2,0xfe,    0x98,0xd4,0x0b,0x36
1491 .byte   0xa6,0xf5,0x81,0xcf,    0xa5,0x7a,0xde,0x28
1492 .byte   0xda,0xb7,0x8e,0x26,    0x3f,0xad,0xbf,0xa4
1493 .byte   0x2c,0x3a,0x9d,0xe4,    0x50,0x78,0x92,0x0d
1494 .byte   0x6a,0x5f,0xcc,0x9b,    0x54,0x7e,0x46,0x62
1495 .byte   0xf6,0x8d,0x13,0xc2,    0x90,0xd8,0xb8,0xe8
1496 .byte   0x2e,0x39,0xf7,0x5e,    0x82,0xc3,0xaf,0xf5
1497 .byte   0x9f,0x5d,0x80,0xbe,    0x69,0xd0,0x93,0x7c
1498 .byte   0x6f,0xd5,0x2d,0xa9,    0xcf,0x25,0x12,0xb3
1499 .byte   0xc8,0xac,0x99,0x3b,    0x10,0x18,0x7d,0xa7
1500 .byte   0xe8,0x9c,0x63,0x6e,    0xdb,0x3b,0xbb,0x7b
1501 .byte   0xcd,0x26,0x78,0x09,    0x6e,0x59,0x18,0xf4
1502 .byte   0xec,0x9a,0xb7,0x01,    0x83,0x4f,0x9a,0xa8
1503 .byte   0xe6,0x95,0x6e,0x65,    0xaa,0xff,0xe6,0x7e
1504 .byte   0x21,0xbc,0xcf,0x08,    0xef,0x15,0xe8,0xe6
1505 .byte   0xba,0xe7,0x9b,0xd9,    0x4a,0x6f,0x36,0xce
1506 .byte   0xea,0x9f,0x09,0xd4,    0x29,0xb0,0x7c,0xd6
1507 .byte   0x31,0xa4,0xb2,0xaf,    0x2a,0x3f,0x23,0x31
1508 .byte   0xc6,0xa5,0x94,0x30,    0x35,0xa2,0x66,0xc0
1509 .byte   0x74,0x4e,0xbc,0x37,    0xfc,0x82,0xca,0xa6
1510 .byte   0xe0,0x90,0xd0,0xb0,    0x33,0xa7,0xd8,0x15
1511 .byte   0xf1,0x04,0x98,0x4a,    0x41,0xec,0xda,0xf7
1512 .byte   0x7f,0xcd,0x50,0x0e,    0x17,0x91,0xf6,0x2f
1513 .byte   0x76,0x4d,0xd6,0x8d,    0x43,0xef,0xb0,0x4d
1514 .byte   0xcc,0xaa,0x4d,0x54,    0xe4,0x96,0x04,0xdf
1515 .byte   0x9e,0xd1,0xb5,0xe3,    0x4c,0x6a,0x88,0x1b
1516 .byte   0xc1,0x2c,0x1f,0xb8,    0x46,0x65,0x51,0x7f
1517 .byte   0x9d,0x5e,0xea,0x04,    0x01,0x8c,0x35,0x5d
1518 .byte   0xfa,0x87,0x74,0x73,    0xfb,0x0b,0x41,0x2e
1519 .byte   0xb3,0x67,0x1d,0x5a,    0x92,0xdb,0xd2,0x52
1520 .byte   0xe9,0x10,0x56,0x33,    0x6d,0xd6,0x47,0x13
1521 .byte   0x9a,0xd7,0x61,0x8c,    0x37,0xa1,0x0c,0x7a
1522 .byte   0x59,0xf8,0x14,0x8e,    0xeb,0x13,0x3c,0x89
1523 .byte   0xce,0xa9,0x27,0xee,    0xb7,0x61,0xc9,0x35
1524 .byte   0xe1,0x1c,0xe5,0xed,    0x7a,0x47,0xb1,0x3c
1525 .byte   0x9c,0xd2,0xdf,0x59,    0x55,0xf2,0x73,0x3f
1526 .byte   0x18,0x14,0xce,0x79,    0x73,0xc7,0x37,0xbf
1527 .byte   0x53,0xf7,0xcd,0xea,    0x5f,0xfd,0xaa,0x5b
1528 .byte   0xdf,0x3d,0x6f,0x14,    0x78,0x44,0xdb,0x86
1529 .byte   0xca,0xaf,0xf3,0x81,    0xb9,0x68,0xc4,0x3e
1530 .byte   0x38,0x24,0x34,0x2c,    0xc2,0xa3,0x40,0x5f
1531 .byte   0x16,0x1d,0xc3,0x72,    0xbc,0xe2,0x25,0x0c
1532 .byte   0x28,0x3c,0x49,0x8b,    0xff,0x0d,0x95,0x41
1533 .byte   0x39,0xa8,0x01,0x71,    0x08,0x0c,0xb3,0xde
1534 .byte   0xd8,0xb4,0xe4,0x9c,    0x64,0x56,0xc1,0x90
1535 .byte   0x7b,0xcb,0x84,0x61,    0xd5,0x32,0xb6,0x70
1536 .byte   0x48,0x6c,0x5c,0x74,    0xd0,0xb8,0x57,0x42
1537
1538 .byte   0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38  # Td4
1539 .byte   0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
1540 .byte   0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
1541 .byte   0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
1542 .byte   0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
1543 .byte   0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
1544 .byte   0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
1545 .byte   0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
1546 .byte   0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
1547 .byte   0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
1548 .byte   0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
1549 .byte   0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
1550 .byte   0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
1551 .byte   0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
1552 .byte   0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
1553 .byte   0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
1554 .byte   0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
1555 .byte   0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
1556 .byte   0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
1557 .byte   0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
1558 .byte   0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
1559 .byte   0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
1560 .byte   0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
1561 .byte   0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
1562 .byte   0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
1563 .byte   0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
1564 .byte   0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
1565 .byte   0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
1566 .byte   0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
1567 .byte   0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
1568 .byte   0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
1569 .byte   0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
1570 ___
1571 \f
1572 foreach (split("\n",$code)) {
1573         s/\`([^\`]*)\`/eval $1/ge;
1574
1575         # made-up _instructions, _xtr, _ins, _ror and _bias, cope
1576         # with byte order dependencies...
1577         if (/^\s+_/) {
1578             s/(_[a-z]+\s+)(\$[0-9]+),([^,]+)(#.*)*$/$1$2,$2,$3/;
1579
1580             s/_xtr\s+(\$[0-9]+),(\$[0-9]+),([0-9]+(\-2)*)/
1581                 sprintf("srl\t$1,$2,%d",$big_endian ?   eval($3)
1582                                         :               eval("24-$3"))/e or
1583             s/_ins\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
1584                 sprintf("sll\t$1,$2,%d",$big_endian ?   eval($3)
1585                                         :               eval("24-$3"))/e or
1586             s/_ror\s+(\$[0-9]+),(\$[0-9]+),(\-?[0-9]+)/
1587                 sprintf("srl\t$1,$2,%d",$big_endian ?   eval($3)
1588                                         :               eval("$3*-1"))/e or
1589             s/_bias\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
1590                 sprintf("sll\t$1,$2,%d",$big_endian ?   eval($3)
1591                                         :               eval("($3-16)&31"))/e;
1592
1593             s/srl\s+(\$[0-9]+),(\$[0-9]+),\-([0-9]+)/
1594                 sprintf("sll\t$1,$2,$3")/e                              or
1595             s/srl\s+(\$[0-9]+),(\$[0-9]+),0/
1596                 sprintf("and\t$1,$2,0xff")/e                            or
1597             s/(sll\s+\$[0-9]+,\$[0-9]+,0)/#$1/;
1598         }
1599
1600         # convert lwl/lwr and swr/swl to little-endian order
1601         if (!$big_endian && /^\s+[sl]w[lr]\s+/) {
1602             s/([sl]wl.*)([0-9]+)\((\$[0-9]+)\)/
1603                 sprintf("$1%d($3)",eval("$2-$2%4+($2%4-1)&3"))/e        or
1604             s/([sl]wr.*)([0-9]+)\((\$[0-9]+)\)/
1605                 sprintf("$1%d($3)",eval("$2-$2%4+($2%4+1)&3"))/e;
1606         }
1607
1608         print $_,"\n";
1609 }
1610
1611 close STDOUT;