Following the license change, modify the boilerplates in crypto/aes/
[openssl.git] / crypto / aes / asm / aes-mips.pl
1 #! /usr/bin/env perl
2 # Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved.
3 #
4 # Licensed under the Apache License 2.0 (the "License").  You may not use
5 # this file except in compliance with the License.  You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
8
9
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
16
17 # AES for MIPS
18
19 # October 2010
20 #
21 # Code uses 1K[+256B] S-box and on single-issue core [such as R5000]
22 # spends ~68 cycles per byte processed with 128-bit key. This is ~16%
23 # faster than gcc-generated code, which is not very impressive. But
24 # recall that compressed S-box requires extra processing, namely
25 # additional rotations. Rotations are implemented with lwl/lwr pairs,
26 # which is normally used for loading unaligned data. Another cool
27 # thing about this module is its endian neutrality, which means that
28 # it processes data without ever changing byte order...
29
30 # September 2012
31 #
32 # Add MIPS32R2 (~10% less instructions) and SmartMIPS ASE (further
33 # ~25% less instructions) code. Note that there is no run-time switch,
34 # instead, code path is chosen upon pre-process time, pass -mips32r2
35 # or/and -msmartmips.
36
37 ######################################################################
38 # There is a number of MIPS ABI in use, O32 and N32/64 are most
39 # widely used. Then there is a new contender: NUBI. It appears that if
40 # one picks the latter, it's possible to arrange code in ABI neutral
41 # manner. Therefore let's stick to NUBI register layout:
42 #
43 ($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
44 ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
45 ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
46 ($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
47 #
48 # The return value is placed in $a0. Following coding rules facilitate
49 # interoperability:
50 #
51 # - never ever touch $tp, "thread pointer", former $gp;
52 # - copy return value to $t0, former $v0 [or to $a0 if you're adapting
53 #   old code];
54 # - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
55 #
56 # For reference here is register layout for N32/64 MIPS ABIs:
57 #
58 # ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
59 # ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
60 # ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
61 # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
62 # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
63 #
64 $flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
65
66 if ($flavour =~ /64|n32/i) {
67         $PTR_LA="dla";
68         $PTR_ADD="daddu";       # incidentally works even on n32
69         $PTR_SUB="dsubu";       # incidentally works even on n32
70         $PTR_INS="dins";
71         $REG_S="sd";
72         $REG_L="ld";
73         $PTR_SLL="dsll";        # incidentally works even on n32
74         $SZREG=8;
75 } else {
76         $PTR_LA="la";
77         $PTR_ADD="addu";
78         $PTR_SUB="subu";
79         $PTR_INS="ins";
80         $REG_S="sw";
81         $REG_L="lw";
82         $PTR_SLL="sll";
83         $SZREG=4;
84 }
85 $pf = ($flavour =~ /nubi/i) ? $t0 : $t2;
86 #
87 # <appro@openssl.org>
88 #
89 ######################################################################
90
91 $big_endian=(`echo MIPSEB | $ENV{CC} -E -`=~/MIPSEB/)?0:1 if ($ENV{CC});
92
93 for (@ARGV) {   $output=$_ if (/\w[\w\-]*\.\w+$/);      }
94 open STDOUT,">$output";
95
96 if (!defined($big_endian))
97 {    $big_endian=(unpack('L',pack('N',1))==1);   }
98
99 while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
100 open STDOUT,">$output";
101
102 my ($MSB,$LSB)=(0,3);   # automatically converted to little-endian
103
104 $code.=<<___;
105 #include "mips_arch.h"
106
107 .text
108 #if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__))
109 .option pic2
110 #endif
111 .set    noat
112 ___
113 \f
114 {{{
115 my $FRAMESIZE=16*$SZREG;
116 my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc0fff008" : "0xc0ff0000";
117
118 my ($inp,$out,$key,$Tbl,$s0,$s1,$s2,$s3)=($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7);
119 my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
120 my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$t9,$t10,$t11) = map("\$$_",(12..23));
121 my ($key0,$cnt)=($gp,$fp);
122
123 # instruction ordering is "stolen" from output from MIPSpro assembler
124 # invoked with -mips3 -O3 arguments...
125 $code.=<<___;
126 .align  5
127 .ent    _mips_AES_encrypt
128 _mips_AES_encrypt:
129         .frame  $sp,0,$ra
130         .set    reorder
131         lw      $t0,0($key)
132         lw      $t1,4($key)
133         lw      $t2,8($key)
134         lw      $t3,12($key)
135         lw      $cnt,240($key)
136         $PTR_ADD $key0,$key,16
137
138         xor     $s0,$t0
139         xor     $s1,$t1
140         xor     $s2,$t2
141         xor     $s3,$t3
142
143         subu    $cnt,1
144 #if defined(__mips_smartmips)
145         ext     $i0,$s1,16,8
146 .Loop_enc:
147         ext     $i1,$s2,16,8
148         ext     $i2,$s3,16,8
149         ext     $i3,$s0,16,8
150         lwxs    $t0,$i0($Tbl)           # Te1[s1>>16]
151         ext     $i0,$s2,8,8
152         lwxs    $t1,$i1($Tbl)           # Te1[s2>>16]
153         ext     $i1,$s3,8,8
154         lwxs    $t2,$i2($Tbl)           # Te1[s3>>16]
155         ext     $i2,$s0,8,8
156         lwxs    $t3,$i3($Tbl)           # Te1[s0>>16]
157         ext     $i3,$s1,8,8
158
159         lwxs    $t4,$i0($Tbl)           # Te2[s2>>8]
160         ext     $i0,$s3,0,8
161         lwxs    $t5,$i1($Tbl)           # Te2[s3>>8]
162         ext     $i1,$s0,0,8
163         lwxs    $t6,$i2($Tbl)           # Te2[s0>>8]
164         ext     $i2,$s1,0,8
165         lwxs    $t7,$i3($Tbl)           # Te2[s1>>8]
166         ext     $i3,$s2,0,8
167
168         lwxs    $t8,$i0($Tbl)           # Te3[s3]
169         ext     $i0,$s0,24,8
170         lwxs    $t9,$i1($Tbl)           # Te3[s0]
171         ext     $i1,$s1,24,8
172         lwxs    $t10,$i2($Tbl)          # Te3[s1]
173         ext     $i2,$s2,24,8
174         lwxs    $t11,$i3($Tbl)          # Te3[s2]
175         ext     $i3,$s3,24,8
176
177         rotr    $t0,$t0,8
178         rotr    $t1,$t1,8
179         rotr    $t2,$t2,8
180         rotr    $t3,$t3,8
181
182         rotr    $t4,$t4,16
183         rotr    $t5,$t5,16
184         rotr    $t6,$t6,16
185         rotr    $t7,$t7,16
186
187         xor     $t0,$t4
188         lwxs    $t4,$i0($Tbl)           # Te0[s0>>24]
189         xor     $t1,$t5
190         lwxs    $t5,$i1($Tbl)           # Te0[s1>>24]
191         xor     $t2,$t6
192         lwxs    $t6,$i2($Tbl)           # Te0[s2>>24]
193         xor     $t3,$t7
194         lwxs    $t7,$i3($Tbl)           # Te0[s3>>24]
195
196         rotr    $t8,$t8,24
197         lw      $s0,0($key0)
198         rotr    $t9,$t9,24
199         lw      $s1,4($key0)
200         rotr    $t10,$t10,24
201         lw      $s2,8($key0)
202         rotr    $t11,$t11,24
203         lw      $s3,12($key0)
204
205         xor     $t0,$t8
206         xor     $t1,$t9
207         xor     $t2,$t10
208         xor     $t3,$t11
209
210         xor     $t0,$t4
211         xor     $t1,$t5
212         xor     $t2,$t6
213         xor     $t3,$t7
214
215         subu    $cnt,1
216         $PTR_ADD $key0,16
217         xor     $s0,$t0
218         xor     $s1,$t1
219         xor     $s2,$t2
220         xor     $s3,$t3
221         .set    noreorder
222         bnez    $cnt,.Loop_enc
223         ext     $i0,$s1,16,8
224
225         _xtr    $i0,$s1,16-2
226 #else
227         _xtr    $i0,$s1,16-2
228 .Loop_enc:
229         _xtr    $i1,$s2,16-2
230         _xtr    $i2,$s3,16-2
231         _xtr    $i3,$s0,16-2
232         and     $i0,0x3fc
233         and     $i1,0x3fc
234         and     $i2,0x3fc
235         and     $i3,0x3fc
236         $PTR_ADD $i0,$Tbl
237         $PTR_ADD $i1,$Tbl
238         $PTR_ADD $i2,$Tbl
239         $PTR_ADD $i3,$Tbl
240 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
241         lw      $t0,0($i0)              # Te1[s1>>16]
242         _xtr    $i0,$s2,8-2
243         lw      $t1,0($i1)              # Te1[s2>>16]
244         _xtr    $i1,$s3,8-2
245         lw      $t2,0($i2)              # Te1[s3>>16]
246         _xtr    $i2,$s0,8-2
247         lw      $t3,0($i3)              # Te1[s0>>16]
248         _xtr    $i3,$s1,8-2
249 #else
250         lwl     $t0,3($i0)              # Te1[s1>>16]
251         lwl     $t1,3($i1)              # Te1[s2>>16]
252         lwl     $t2,3($i2)              # Te1[s3>>16]
253         lwl     $t3,3($i3)              # Te1[s0>>16]
254         lwr     $t0,2($i0)              # Te1[s1>>16]
255         _xtr    $i0,$s2,8-2
256         lwr     $t1,2($i1)              # Te1[s2>>16]
257         _xtr    $i1,$s3,8-2
258         lwr     $t2,2($i2)              # Te1[s3>>16]
259         _xtr    $i2,$s0,8-2
260         lwr     $t3,2($i3)              # Te1[s0>>16]
261         _xtr    $i3,$s1,8-2
262 #endif
263         and     $i0,0x3fc
264         and     $i1,0x3fc
265         and     $i2,0x3fc
266         and     $i3,0x3fc
267         $PTR_ADD $i0,$Tbl
268         $PTR_ADD $i1,$Tbl
269         $PTR_ADD $i2,$Tbl
270         $PTR_ADD $i3,$Tbl
271 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
272         rotr    $t0,$t0,8
273         rotr    $t1,$t1,8
274         rotr    $t2,$t2,8
275         rotr    $t3,$t3,8
276 # if defined(_MIPSEL)
277         lw      $t4,0($i0)              # Te2[s2>>8]
278         _xtr    $i0,$s3,0-2
279         lw      $t5,0($i1)              # Te2[s3>>8]
280         _xtr    $i1,$s0,0-2
281         lw      $t6,0($i2)              # Te2[s0>>8]
282         _xtr    $i2,$s1,0-2
283         lw      $t7,0($i3)              # Te2[s1>>8]
284         _xtr    $i3,$s2,0-2
285
286         and     $i0,0x3fc
287         and     $i1,0x3fc
288         and     $i2,0x3fc
289         and     $i3,0x3fc
290         $PTR_ADD $i0,$Tbl
291         $PTR_ADD $i1,$Tbl
292         $PTR_ADD $i2,$Tbl
293         $PTR_ADD $i3,$Tbl
294         lw      $t8,0($i0)              # Te3[s3]
295         $PTR_INS $i0,$s0,2,8
296         lw      $t9,0($i1)              # Te3[s0]
297         $PTR_INS $i1,$s1,2,8
298         lw      $t10,0($i2)             # Te3[s1]
299         $PTR_INS $i2,$s2,2,8
300         lw      $t11,0($i3)             # Te3[s2]
301         $PTR_INS $i3,$s3,2,8
302 # else
303         lw      $t4,0($i0)              # Te2[s2>>8]
304         $PTR_INS $i0,$s3,2,8
305         lw      $t5,0($i1)              # Te2[s3>>8]
306         $PTR_INS $i1,$s0,2,8
307         lw      $t6,0($i2)              # Te2[s0>>8]
308         $PTR_INS $i2,$s1,2,8
309         lw      $t7,0($i3)              # Te2[s1>>8]
310         $PTR_INS $i3,$s2,2,8
311
312         lw      $t8,0($i0)              # Te3[s3]
313         _xtr    $i0,$s0,24-2
314         lw      $t9,0($i1)              # Te3[s0]
315         _xtr    $i1,$s1,24-2
316         lw      $t10,0($i2)             # Te3[s1]
317         _xtr    $i2,$s2,24-2
318         lw      $t11,0($i3)             # Te3[s2]
319         _xtr    $i3,$s3,24-2
320
321         and     $i0,0x3fc
322         and     $i1,0x3fc
323         and     $i2,0x3fc
324         and     $i3,0x3fc
325         $PTR_ADD $i0,$Tbl
326         $PTR_ADD $i1,$Tbl
327         $PTR_ADD $i2,$Tbl
328         $PTR_ADD $i3,$Tbl
329 # endif
330         rotr    $t4,$t4,16
331         rotr    $t5,$t5,16
332         rotr    $t6,$t6,16
333         rotr    $t7,$t7,16
334
335         rotr    $t8,$t8,24
336         rotr    $t9,$t9,24
337         rotr    $t10,$t10,24
338         rotr    $t11,$t11,24
339 #else
340         lwl     $t4,2($i0)              # Te2[s2>>8]
341         lwl     $t5,2($i1)              # Te2[s3>>8]
342         lwl     $t6,2($i2)              # Te2[s0>>8]
343         lwl     $t7,2($i3)              # Te2[s1>>8]
344         lwr     $t4,1($i0)              # Te2[s2>>8]
345         _xtr    $i0,$s3,0-2
346         lwr     $t5,1($i1)              # Te2[s3>>8]
347         _xtr    $i1,$s0,0-2
348         lwr     $t6,1($i2)              # Te2[s0>>8]
349         _xtr    $i2,$s1,0-2
350         lwr     $t7,1($i3)              # Te2[s1>>8]
351         _xtr    $i3,$s2,0-2
352
353         and     $i0,0x3fc
354         and     $i1,0x3fc
355         and     $i2,0x3fc
356         and     $i3,0x3fc
357         $PTR_ADD $i0,$Tbl
358         $PTR_ADD $i1,$Tbl
359         $PTR_ADD $i2,$Tbl
360         $PTR_ADD $i3,$Tbl
361         lwl     $t8,1($i0)              # Te3[s3]
362         lwl     $t9,1($i1)              # Te3[s0]
363         lwl     $t10,1($i2)             # Te3[s1]
364         lwl     $t11,1($i3)             # Te3[s2]
365         lwr     $t8,0($i0)              # Te3[s3]
366         _xtr    $i0,$s0,24-2
367         lwr     $t9,0($i1)              # Te3[s0]
368         _xtr    $i1,$s1,24-2
369         lwr     $t10,0($i2)             # Te3[s1]
370         _xtr    $i2,$s2,24-2
371         lwr     $t11,0($i3)             # Te3[s2]
372         _xtr    $i3,$s3,24-2
373
374         and     $i0,0x3fc
375         and     $i1,0x3fc
376         and     $i2,0x3fc
377         and     $i3,0x3fc
378         $PTR_ADD $i0,$Tbl
379         $PTR_ADD $i1,$Tbl
380         $PTR_ADD $i2,$Tbl
381         $PTR_ADD $i3,$Tbl
382 #endif
383         xor     $t0,$t4
384         lw      $t4,0($i0)              # Te0[s0>>24]
385         xor     $t1,$t5
386         lw      $t5,0($i1)              # Te0[s1>>24]
387         xor     $t2,$t6
388         lw      $t6,0($i2)              # Te0[s2>>24]
389         xor     $t3,$t7
390         lw      $t7,0($i3)              # Te0[s3>>24]
391
392         xor     $t0,$t8
393         lw      $s0,0($key0)
394         xor     $t1,$t9
395         lw      $s1,4($key0)
396         xor     $t2,$t10
397         lw      $s2,8($key0)
398         xor     $t3,$t11
399         lw      $s3,12($key0)
400
401         xor     $t0,$t4
402         xor     $t1,$t5
403         xor     $t2,$t6
404         xor     $t3,$t7
405
406         subu    $cnt,1
407         $PTR_ADD $key0,16
408         xor     $s0,$t0
409         xor     $s1,$t1
410         xor     $s2,$t2
411         xor     $s3,$t3
412         .set    noreorder
413         bnez    $cnt,.Loop_enc
414         _xtr    $i0,$s1,16-2
415 #endif
416
417         .set    reorder
418         _xtr    $i1,$s2,16-2
419         _xtr    $i2,$s3,16-2
420         _xtr    $i3,$s0,16-2
421         and     $i0,0x3fc
422         and     $i1,0x3fc
423         and     $i2,0x3fc
424         and     $i3,0x3fc
425         $PTR_ADD $i0,$Tbl
426         $PTR_ADD $i1,$Tbl
427         $PTR_ADD $i2,$Tbl
428         $PTR_ADD $i3,$Tbl
429         lbu     $t0,2($i0)              # Te4[s1>>16]
430         _xtr    $i0,$s2,8-2
431         lbu     $t1,2($i1)              # Te4[s2>>16]
432         _xtr    $i1,$s3,8-2
433         lbu     $t2,2($i2)              # Te4[s3>>16]
434         _xtr    $i2,$s0,8-2
435         lbu     $t3,2($i3)              # Te4[s0>>16]
436         _xtr    $i3,$s1,8-2
437
438         and     $i0,0x3fc
439         and     $i1,0x3fc
440         and     $i2,0x3fc
441         and     $i3,0x3fc
442         $PTR_ADD $i0,$Tbl
443         $PTR_ADD $i1,$Tbl
444         $PTR_ADD $i2,$Tbl
445         $PTR_ADD $i3,$Tbl
446 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
447 # if defined(_MIPSEL)
448         lbu     $t4,2($i0)              # Te4[s2>>8]
449         $PTR_INS $i0,$s0,2,8
450         lbu     $t5,2($i1)              # Te4[s3>>8]
451         $PTR_INS $i1,$s1,2,8
452         lbu     $t6,2($i2)              # Te4[s0>>8]
453         $PTR_INS $i2,$s2,2,8
454         lbu     $t7,2($i3)              # Te4[s1>>8]
455         $PTR_INS $i3,$s3,2,8
456
457         lbu     $t8,2($i0)              # Te4[s0>>24]
458         _xtr    $i0,$s3,0-2
459         lbu     $t9,2($i1)              # Te4[s1>>24]
460         _xtr    $i1,$s0,0-2
461         lbu     $t10,2($i2)             # Te4[s2>>24]
462         _xtr    $i2,$s1,0-2
463         lbu     $t11,2($i3)             # Te4[s3>>24]
464         _xtr    $i3,$s2,0-2
465
466         and     $i0,0x3fc
467         and     $i1,0x3fc
468         and     $i2,0x3fc
469         and     $i3,0x3fc
470         $PTR_ADD $i0,$Tbl
471         $PTR_ADD $i1,$Tbl
472         $PTR_ADD $i2,$Tbl
473         $PTR_ADD $i3,$Tbl
474 # else
475         lbu     $t4,2($i0)              # Te4[s2>>8]
476         _xtr    $i0,$s0,24-2
477         lbu     $t5,2($i1)              # Te4[s3>>8]
478         _xtr    $i1,$s1,24-2
479         lbu     $t6,2($i2)              # Te4[s0>>8]
480         _xtr    $i2,$s2,24-2
481         lbu     $t7,2($i3)              # Te4[s1>>8]
482         _xtr    $i3,$s3,24-2
483
484         and     $i0,0x3fc
485         and     $i1,0x3fc
486         and     $i2,0x3fc
487         and     $i3,0x3fc
488         $PTR_ADD $i0,$Tbl
489         $PTR_ADD $i1,$Tbl
490         $PTR_ADD $i2,$Tbl
491         $PTR_ADD $i3,$Tbl
492         lbu     $t8,2($i0)              # Te4[s0>>24]
493         $PTR_INS $i0,$s3,2,8
494         lbu     $t9,2($i1)              # Te4[s1>>24]
495         $PTR_INS $i1,$s0,2,8
496         lbu     $t10,2($i2)             # Te4[s2>>24]
497         $PTR_INS $i2,$s1,2,8
498         lbu     $t11,2($i3)             # Te4[s3>>24]
499         $PTR_INS $i3,$s2,2,8
500 # endif
501         _ins    $t0,16
502         _ins    $t1,16
503         _ins    $t2,16
504         _ins    $t3,16
505
506         _ins2   $t0,$t4,8
507         lbu     $t4,2($i0)              # Te4[s3]
508         _ins2   $t1,$t5,8
509         lbu     $t5,2($i1)              # Te4[s0]
510         _ins2   $t2,$t6,8
511         lbu     $t6,2($i2)              # Te4[s1]
512         _ins2   $t3,$t7,8
513         lbu     $t7,2($i3)              # Te4[s2]
514
515         _ins2   $t0,$t8,24
516         lw      $s0,0($key0)
517         _ins2   $t1,$t9,24
518         lw      $s1,4($key0)
519         _ins2   $t2,$t10,24
520         lw      $s2,8($key0)
521         _ins2   $t3,$t11,24
522         lw      $s3,12($key0)
523
524         _ins2   $t0,$t4,0
525         _ins2   $t1,$t5,0
526         _ins2   $t2,$t6,0
527         _ins2   $t3,$t7,0
528 #else
529         lbu     $t4,2($i0)              # Te4[s2>>8]
530         _xtr    $i0,$s0,24-2
531         lbu     $t5,2($i1)              # Te4[s3>>8]
532         _xtr    $i1,$s1,24-2
533         lbu     $t6,2($i2)              # Te4[s0>>8]
534         _xtr    $i2,$s2,24-2
535         lbu     $t7,2($i3)              # Te4[s1>>8]
536         _xtr    $i3,$s3,24-2
537
538         and     $i0,0x3fc
539         and     $i1,0x3fc
540         and     $i2,0x3fc
541         and     $i3,0x3fc
542         $PTR_ADD $i0,$Tbl
543         $PTR_ADD $i1,$Tbl
544         $PTR_ADD $i2,$Tbl
545         $PTR_ADD $i3,$Tbl
546         lbu     $t8,2($i0)              # Te4[s0>>24]
547         _xtr    $i0,$s3,0-2
548         lbu     $t9,2($i1)              # Te4[s1>>24]
549         _xtr    $i1,$s0,0-2
550         lbu     $t10,2($i2)             # Te4[s2>>24]
551         _xtr    $i2,$s1,0-2
552         lbu     $t11,2($i3)             # Te4[s3>>24]
553         _xtr    $i3,$s2,0-2
554
555         and     $i0,0x3fc
556         and     $i1,0x3fc
557         and     $i2,0x3fc
558         and     $i3,0x3fc
559         $PTR_ADD $i0,$Tbl
560         $PTR_ADD $i1,$Tbl
561         $PTR_ADD $i2,$Tbl
562         $PTR_ADD $i3,$Tbl
563
564         _ins    $t0,16
565         _ins    $t1,16
566         _ins    $t2,16
567         _ins    $t3,16
568
569         _ins    $t4,8
570         _ins    $t5,8
571         _ins    $t6,8
572         _ins    $t7,8
573
574         xor     $t0,$t4
575         lbu     $t4,2($i0)              # Te4[s3]
576         xor     $t1,$t5
577         lbu     $t5,2($i1)              # Te4[s0]
578         xor     $t2,$t6
579         lbu     $t6,2($i2)              # Te4[s1]
580         xor     $t3,$t7
581         lbu     $t7,2($i3)              # Te4[s2]
582
583         _ins    $t8,24
584         lw      $s0,0($key0)
585         _ins    $t9,24
586         lw      $s1,4($key0)
587         _ins    $t10,24
588         lw      $s2,8($key0)
589         _ins    $t11,24
590         lw      $s3,12($key0)
591
592         xor     $t0,$t8
593         xor     $t1,$t9
594         xor     $t2,$t10
595         xor     $t3,$t11
596
597         _ins    $t4,0
598         _ins    $t5,0
599         _ins    $t6,0
600         _ins    $t7,0
601
602         xor     $t0,$t4
603         xor     $t1,$t5
604         xor     $t2,$t6
605         xor     $t3,$t7
606 #endif
607         xor     $s0,$t0
608         xor     $s1,$t1
609         xor     $s2,$t2
610         xor     $s3,$t3
611
612         jr      $ra
613 .end    _mips_AES_encrypt
614
615 .align  5
616 .globl  AES_encrypt
617 .ent    AES_encrypt
618 AES_encrypt:
619         .frame  $sp,$FRAMESIZE,$ra
620         .mask   $SAVED_REGS_MASK,-$SZREG
621         .set    noreorder
622 ___
623 $code.=<<___ if ($flavour =~ /o32/i);   # o32 PIC-ification
624         .cpload $pf
625 ___
626 $code.=<<___;
627         $PTR_SUB $sp,$FRAMESIZE
628         $REG_S  $ra,$FRAMESIZE-1*$SZREG($sp)
629         $REG_S  $fp,$FRAMESIZE-2*$SZREG($sp)
630         $REG_S  $s11,$FRAMESIZE-3*$SZREG($sp)
631         $REG_S  $s10,$FRAMESIZE-4*$SZREG($sp)
632         $REG_S  $s9,$FRAMESIZE-5*$SZREG($sp)
633         $REG_S  $s8,$FRAMESIZE-6*$SZREG($sp)
634         $REG_S  $s7,$FRAMESIZE-7*$SZREG($sp)
635         $REG_S  $s6,$FRAMESIZE-8*$SZREG($sp)
636         $REG_S  $s5,$FRAMESIZE-9*$SZREG($sp)
637         $REG_S  $s4,$FRAMESIZE-10*$SZREG($sp)
638 ___
639 $code.=<<___ if ($flavour =~ /nubi/i);  # optimize non-nubi prologue
640         $REG_S  \$15,$FRAMESIZE-11*$SZREG($sp)
641         $REG_S  \$14,$FRAMESIZE-12*$SZREG($sp)
642         $REG_S  \$13,$FRAMESIZE-13*$SZREG($sp)
643         $REG_S  \$12,$FRAMESIZE-14*$SZREG($sp)
644         $REG_S  $gp,$FRAMESIZE-15*$SZREG($sp)
645 ___
646 $code.=<<___ if ($flavour !~ /o32/i);   # non-o32 PIC-ification
647         .cplocal        $Tbl
648         .cpsetup        $pf,$zero,AES_encrypt
649 ___
650 $code.=<<___;
651         .set    reorder
652         $PTR_LA $Tbl,AES_Te             # PIC-ified 'load address'
653
654 #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
655         lw      $s0,0($inp)
656         lw      $s1,4($inp)
657         lw      $s2,8($inp)
658         lw      $s3,12($inp)
659 #else
660         lwl     $s0,0+$MSB($inp)
661         lwl     $s1,4+$MSB($inp)
662         lwl     $s2,8+$MSB($inp)
663         lwl     $s3,12+$MSB($inp)
664         lwr     $s0,0+$LSB($inp)
665         lwr     $s1,4+$LSB($inp)
666         lwr     $s2,8+$LSB($inp)
667         lwr     $s3,12+$LSB($inp)
668 #endif
669
670         bal     _mips_AES_encrypt
671
672 #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
673         sw      $s0,0($out)
674         sw      $s1,4($out)
675         sw      $s2,8($out)
676         sw      $s3,12($out)
677 #else
678         swr     $s0,0+$LSB($out)
679         swr     $s1,4+$LSB($out)
680         swr     $s2,8+$LSB($out)
681         swr     $s3,12+$LSB($out)
682         swl     $s0,0+$MSB($out)
683         swl     $s1,4+$MSB($out)
684         swl     $s2,8+$MSB($out)
685         swl     $s3,12+$MSB($out)
686 #endif
687
688         .set    noreorder
689         $REG_L  $ra,$FRAMESIZE-1*$SZREG($sp)
690         $REG_L  $fp,$FRAMESIZE-2*$SZREG($sp)
691         $REG_L  $s11,$FRAMESIZE-3*$SZREG($sp)
692         $REG_L  $s10,$FRAMESIZE-4*$SZREG($sp)
693         $REG_L  $s9,$FRAMESIZE-5*$SZREG($sp)
694         $REG_L  $s8,$FRAMESIZE-6*$SZREG($sp)
695         $REG_L  $s7,$FRAMESIZE-7*$SZREG($sp)
696         $REG_L  $s6,$FRAMESIZE-8*$SZREG($sp)
697         $REG_L  $s5,$FRAMESIZE-9*$SZREG($sp)
698         $REG_L  $s4,$FRAMESIZE-10*$SZREG($sp)
699 ___
700 $code.=<<___ if ($flavour =~ /nubi/i);
701         $REG_L  \$15,$FRAMESIZE-11*$SZREG($sp)
702         $REG_L  \$14,$FRAMESIZE-12*$SZREG($sp)
703         $REG_L  \$13,$FRAMESIZE-13*$SZREG($sp)
704         $REG_L  \$12,$FRAMESIZE-14*$SZREG($sp)
705         $REG_L  $gp,$FRAMESIZE-15*$SZREG($sp)
706 ___
707 $code.=<<___;
708         jr      $ra
709         $PTR_ADD $sp,$FRAMESIZE
710 .end    AES_encrypt
711 ___
712 \f
713 $code.=<<___;
714 .align  5
715 .ent    _mips_AES_decrypt
716 _mips_AES_decrypt:
717         .frame  $sp,0,$ra
718         .set    reorder
719         lw      $t0,0($key)
720         lw      $t1,4($key)
721         lw      $t2,8($key)
722         lw      $t3,12($key)
723         lw      $cnt,240($key)
724         $PTR_ADD $key0,$key,16
725
726         xor     $s0,$t0
727         xor     $s1,$t1
728         xor     $s2,$t2
729         xor     $s3,$t3
730
731         subu    $cnt,1
732 #if defined(__mips_smartmips)
733         ext     $i0,$s3,16,8
734 .Loop_dec:
735         ext     $i1,$s0,16,8
736         ext     $i2,$s1,16,8
737         ext     $i3,$s2,16,8
738         lwxs    $t0,$i0($Tbl)           # Td1[s3>>16]
739         ext     $i0,$s2,8,8
740         lwxs    $t1,$i1($Tbl)           # Td1[s0>>16]
741         ext     $i1,$s3,8,8
742         lwxs    $t2,$i2($Tbl)           # Td1[s1>>16]
743         ext     $i2,$s0,8,8
744         lwxs    $t3,$i3($Tbl)           # Td1[s2>>16]
745         ext     $i3,$s1,8,8
746
747         lwxs    $t4,$i0($Tbl)           # Td2[s2>>8]
748         ext     $i0,$s1,0,8
749         lwxs    $t5,$i1($Tbl)           # Td2[s3>>8]
750         ext     $i1,$s2,0,8
751         lwxs    $t6,$i2($Tbl)           # Td2[s0>>8]
752         ext     $i2,$s3,0,8
753         lwxs    $t7,$i3($Tbl)           # Td2[s1>>8]
754         ext     $i3,$s0,0,8
755
756         lwxs    $t8,$i0($Tbl)           # Td3[s1]
757         ext     $i0,$s0,24,8
758         lwxs    $t9,$i1($Tbl)           # Td3[s2]
759         ext     $i1,$s1,24,8
760         lwxs    $t10,$i2($Tbl)          # Td3[s3]
761         ext     $i2,$s2,24,8
762         lwxs    $t11,$i3($Tbl)          # Td3[s0]
763         ext     $i3,$s3,24,8
764
765         rotr    $t0,$t0,8
766         rotr    $t1,$t1,8
767         rotr    $t2,$t2,8
768         rotr    $t3,$t3,8
769
770         rotr    $t4,$t4,16
771         rotr    $t5,$t5,16
772         rotr    $t6,$t6,16
773         rotr    $t7,$t7,16
774
775         xor     $t0,$t4
776         lwxs    $t4,$i0($Tbl)           # Td0[s0>>24]
777         xor     $t1,$t5
778         lwxs    $t5,$i1($Tbl)           # Td0[s1>>24]
779         xor     $t2,$t6
780         lwxs    $t6,$i2($Tbl)           # Td0[s2>>24]
781         xor     $t3,$t7
782         lwxs    $t7,$i3($Tbl)           # Td0[s3>>24]
783
784         rotr    $t8,$t8,24
785         lw      $s0,0($key0)
786         rotr    $t9,$t9,24
787         lw      $s1,4($key0)
788         rotr    $t10,$t10,24
789         lw      $s2,8($key0)
790         rotr    $t11,$t11,24
791         lw      $s3,12($key0)
792
793         xor     $t0,$t8
794         xor     $t1,$t9
795         xor     $t2,$t10
796         xor     $t3,$t11
797
798         xor     $t0,$t4
799         xor     $t1,$t5
800         xor     $t2,$t6
801         xor     $t3,$t7
802
803         subu    $cnt,1
804         $PTR_ADD $key0,16
805         xor     $s0,$t0
806         xor     $s1,$t1
807         xor     $s2,$t2
808         xor     $s3,$t3
809         .set    noreorder
810         bnez    $cnt,.Loop_dec
811         ext     $i0,$s3,16,8
812
813         _xtr    $i0,$s3,16-2
814 #else
815         _xtr    $i0,$s3,16-2
816 .Loop_dec:
817         _xtr    $i1,$s0,16-2
818         _xtr    $i2,$s1,16-2
819         _xtr    $i3,$s2,16-2
820         and     $i0,0x3fc
821         and     $i1,0x3fc
822         and     $i2,0x3fc
823         and     $i3,0x3fc
824         $PTR_ADD $i0,$Tbl
825         $PTR_ADD $i1,$Tbl
826         $PTR_ADD $i2,$Tbl
827         $PTR_ADD $i3,$Tbl
828 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
829         lw      $t0,0($i0)              # Td1[s3>>16]
830         _xtr    $i0,$s2,8-2
831         lw      $t1,0($i1)              # Td1[s0>>16]
832         _xtr    $i1,$s3,8-2
833         lw      $t2,0($i2)              # Td1[s1>>16]
834         _xtr    $i2,$s0,8-2
835         lw      $t3,0($i3)              # Td1[s2>>16]
836         _xtr    $i3,$s1,8-2
837 #else
838         lwl     $t0,3($i0)              # Td1[s3>>16]
839         lwl     $t1,3($i1)              # Td1[s0>>16]
840         lwl     $t2,3($i2)              # Td1[s1>>16]
841         lwl     $t3,3($i3)              # Td1[s2>>16]
842         lwr     $t0,2($i0)              # Td1[s3>>16]
843         _xtr    $i0,$s2,8-2
844         lwr     $t1,2($i1)              # Td1[s0>>16]
845         _xtr    $i1,$s3,8-2
846         lwr     $t2,2($i2)              # Td1[s1>>16]
847         _xtr    $i2,$s0,8-2
848         lwr     $t3,2($i3)              # Td1[s2>>16]
849         _xtr    $i3,$s1,8-2
850 #endif
851
852         and     $i0,0x3fc
853         and     $i1,0x3fc
854         and     $i2,0x3fc
855         and     $i3,0x3fc
856         $PTR_ADD $i0,$Tbl
857         $PTR_ADD $i1,$Tbl
858         $PTR_ADD $i2,$Tbl
859         $PTR_ADD $i3,$Tbl
860 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
861         rotr    $t0,$t0,8
862         rotr    $t1,$t1,8
863         rotr    $t2,$t2,8
864         rotr    $t3,$t3,8
865 # if defined(_MIPSEL)
866         lw      $t4,0($i0)              # Td2[s2>>8]
867         _xtr    $i0,$s1,0-2
868         lw      $t5,0($i1)              # Td2[s3>>8]
869         _xtr    $i1,$s2,0-2
870         lw      $t6,0($i2)              # Td2[s0>>8]
871         _xtr    $i2,$s3,0-2
872         lw      $t7,0($i3)              # Td2[s1>>8]
873         _xtr    $i3,$s0,0-2
874
875         and     $i0,0x3fc
876         and     $i1,0x3fc
877         and     $i2,0x3fc
878         and     $i3,0x3fc
879         $PTR_ADD $i0,$Tbl
880         $PTR_ADD $i1,$Tbl
881         $PTR_ADD $i2,$Tbl
882         $PTR_ADD $i3,$Tbl
883         lw      $t8,0($i0)              # Td3[s1]
884         $PTR_INS $i0,$s0,2,8
885         lw      $t9,0($i1)              # Td3[s2]
886         $PTR_INS $i1,$s1,2,8
887         lw      $t10,0($i2)             # Td3[s3]
888         $PTR_INS $i2,$s2,2,8
889         lw      $t11,0($i3)             # Td3[s0]
890         $PTR_INS $i3,$s3,2,8
891 #else
892         lw      $t4,0($i0)              # Td2[s2>>8]
893         $PTR_INS $i0,$s1,2,8
894         lw      $t5,0($i1)              # Td2[s3>>8]
895         $PTR_INS $i1,$s2,2,8
896         lw      $t6,0($i2)              # Td2[s0>>8]
897         $PTR_INS $i2,$s3,2,8
898         lw      $t7,0($i3)              # Td2[s1>>8]
899         $PTR_INS $i3,$s0,2,8
900
901         lw      $t8,0($i0)              # Td3[s1]
902         _xtr    $i0,$s0,24-2
903         lw      $t9,0($i1)              # Td3[s2]
904         _xtr    $i1,$s1,24-2
905         lw      $t10,0($i2)             # Td3[s3]
906         _xtr    $i2,$s2,24-2
907         lw      $t11,0($i3)             # Td3[s0]
908         _xtr    $i3,$s3,24-2
909
910         and     $i0,0x3fc
911         and     $i1,0x3fc
912         and     $i2,0x3fc
913         and     $i3,0x3fc
914         $PTR_ADD $i0,$Tbl
915         $PTR_ADD $i1,$Tbl
916         $PTR_ADD $i2,$Tbl
917         $PTR_ADD $i3,$Tbl
918 #endif
919         rotr    $t4,$t4,16
920         rotr    $t5,$t5,16
921         rotr    $t6,$t6,16
922         rotr    $t7,$t7,16
923
924         rotr    $t8,$t8,24
925         rotr    $t9,$t9,24
926         rotr    $t10,$t10,24
927         rotr    $t11,$t11,24
928 #else
929         lwl     $t4,2($i0)              # Td2[s2>>8]
930         lwl     $t5,2($i1)              # Td2[s3>>8]
931         lwl     $t6,2($i2)              # Td2[s0>>8]
932         lwl     $t7,2($i3)              # Td2[s1>>8]
933         lwr     $t4,1($i0)              # Td2[s2>>8]
934         _xtr    $i0,$s1,0-2
935         lwr     $t5,1($i1)              # Td2[s3>>8]
936         _xtr    $i1,$s2,0-2
937         lwr     $t6,1($i2)              # Td2[s0>>8]
938         _xtr    $i2,$s3,0-2
939         lwr     $t7,1($i3)              # Td2[s1>>8]
940         _xtr    $i3,$s0,0-2
941
942         and     $i0,0x3fc
943         and     $i1,0x3fc
944         and     $i2,0x3fc
945         and     $i3,0x3fc
946         $PTR_ADD $i0,$Tbl
947         $PTR_ADD $i1,$Tbl
948         $PTR_ADD $i2,$Tbl
949         $PTR_ADD $i3,$Tbl
950         lwl     $t8,1($i0)              # Td3[s1]
951         lwl     $t9,1($i1)              # Td3[s2]
952         lwl     $t10,1($i2)             # Td3[s3]
953         lwl     $t11,1($i3)             # Td3[s0]
954         lwr     $t8,0($i0)              # Td3[s1]
955         _xtr    $i0,$s0,24-2
956         lwr     $t9,0($i1)              # Td3[s2]
957         _xtr    $i1,$s1,24-2
958         lwr     $t10,0($i2)             # Td3[s3]
959         _xtr    $i2,$s2,24-2
960         lwr     $t11,0($i3)             # Td3[s0]
961         _xtr    $i3,$s3,24-2
962
963         and     $i0,0x3fc
964         and     $i1,0x3fc
965         and     $i2,0x3fc
966         and     $i3,0x3fc
967         $PTR_ADD $i0,$Tbl
968         $PTR_ADD $i1,$Tbl
969         $PTR_ADD $i2,$Tbl
970         $PTR_ADD $i3,$Tbl
971 #endif
972
973         xor     $t0,$t4
974         lw      $t4,0($i0)              # Td0[s0>>24]
975         xor     $t1,$t5
976         lw      $t5,0($i1)              # Td0[s1>>24]
977         xor     $t2,$t6
978         lw      $t6,0($i2)              # Td0[s2>>24]
979         xor     $t3,$t7
980         lw      $t7,0($i3)              # Td0[s3>>24]
981
982         xor     $t0,$t8
983         lw      $s0,0($key0)
984         xor     $t1,$t9
985         lw      $s1,4($key0)
986         xor     $t2,$t10
987         lw      $s2,8($key0)
988         xor     $t3,$t11
989         lw      $s3,12($key0)
990
991         xor     $t0,$t4
992         xor     $t1,$t5
993         xor     $t2,$t6
994         xor     $t3,$t7
995
996         subu    $cnt,1
997         $PTR_ADD $key0,16
998         xor     $s0,$t0
999         xor     $s1,$t1
1000         xor     $s2,$t2
1001         xor     $s3,$t3
1002         .set    noreorder
1003         bnez    $cnt,.Loop_dec
1004         _xtr    $i0,$s3,16-2
1005 #endif
1006
1007         .set    reorder
1008         lw      $t4,1024($Tbl)          # prefetch Td4
1009         _xtr    $i0,$s3,16
1010         lw      $t5,1024+32($Tbl)
1011         _xtr    $i1,$s0,16
1012         lw      $t6,1024+64($Tbl)
1013         _xtr    $i2,$s1,16
1014         lw      $t7,1024+96($Tbl)
1015         _xtr    $i3,$s2,16
1016         lw      $t8,1024+128($Tbl)
1017         and     $i0,0xff
1018         lw      $t9,1024+160($Tbl)
1019         and     $i1,0xff
1020         lw      $t10,1024+192($Tbl)
1021         and     $i2,0xff
1022         lw      $t11,1024+224($Tbl)
1023         and     $i3,0xff
1024
1025         $PTR_ADD $i0,$Tbl
1026         $PTR_ADD $i1,$Tbl
1027         $PTR_ADD $i2,$Tbl
1028         $PTR_ADD $i3,$Tbl
1029         lbu     $t0,1024($i0)           # Td4[s3>>16]
1030         _xtr    $i0,$s2,8
1031         lbu     $t1,1024($i1)           # Td4[s0>>16]
1032         _xtr    $i1,$s3,8
1033         lbu     $t2,1024($i2)           # Td4[s1>>16]
1034         _xtr    $i2,$s0,8
1035         lbu     $t3,1024($i3)           # Td4[s2>>16]
1036         _xtr    $i3,$s1,8
1037
1038         and     $i0,0xff
1039         and     $i1,0xff
1040         and     $i2,0xff
1041         and     $i3,0xff
1042         $PTR_ADD $i0,$Tbl
1043         $PTR_ADD $i1,$Tbl
1044         $PTR_ADD $i2,$Tbl
1045         $PTR_ADD $i3,$Tbl
1046 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1047 # if defined(_MIPSEL)
1048         lbu     $t4,1024($i0)           # Td4[s2>>8]
1049         $PTR_INS $i0,$s0,0,8
1050         lbu     $t5,1024($i1)           # Td4[s3>>8]
1051         $PTR_INS $i1,$s1,0,8
1052         lbu     $t6,1024($i2)           # Td4[s0>>8]
1053         $PTR_INS $i2,$s2,0,8
1054         lbu     $t7,1024($i3)           # Td4[s1>>8]
1055         $PTR_INS $i3,$s3,0,8
1056
1057         lbu     $t8,1024($i0)           # Td4[s0>>24]
1058         _xtr    $i0,$s1,0
1059         lbu     $t9,1024($i1)           # Td4[s1>>24]
1060         _xtr    $i1,$s2,0
1061         lbu     $t10,1024($i2)          # Td4[s2>>24]
1062         _xtr    $i2,$s3,0
1063         lbu     $t11,1024($i3)          # Td4[s3>>24]
1064         _xtr    $i3,$s0,0
1065
1066         $PTR_ADD $i0,$Tbl
1067         $PTR_ADD $i1,$Tbl
1068         $PTR_ADD $i2,$Tbl
1069         $PTR_ADD $i3,$Tbl
1070 # else
1071         lbu     $t4,1024($i0)           # Td4[s2>>8]
1072         _xtr    $i0,$s0,24
1073         lbu     $t5,1024($i1)           # Td4[s3>>8]
1074         _xtr    $i1,$s1,24
1075         lbu     $t6,1024($i2)           # Td4[s0>>8]
1076         _xtr    $i2,$s2,24
1077         lbu     $t7,1024($i3)           # Td4[s1>>8]
1078         _xtr    $i3,$s3,24
1079
1080         $PTR_ADD $i0,$Tbl
1081         $PTR_ADD $i1,$Tbl
1082         $PTR_ADD $i2,$Tbl
1083         $PTR_ADD $i3,$Tbl
1084         lbu     $t8,1024($i0)           # Td4[s0>>24]
1085         $PTR_INS $i0,$s1,0,8
1086         lbu     $t9,1024($i1)           # Td4[s1>>24]
1087         $PTR_INS $i1,$s2,0,8
1088         lbu     $t10,1024($i2)          # Td4[s2>>24]
1089         $PTR_INS $i2,$s3,0,8
1090         lbu     $t11,1024($i3)          # Td4[s3>>24]
1091         $PTR_INS $i3,$s0,0,8
1092 # endif
1093         _ins    $t0,16
1094         _ins    $t1,16
1095         _ins    $t2,16
1096         _ins    $t3,16
1097
1098         _ins2   $t0,$t4,8
1099         lbu     $t4,1024($i0)           # Td4[s1]
1100         _ins2   $t1,$t5,8
1101         lbu     $t5,1024($i1)           # Td4[s2]
1102         _ins2   $t2,$t6,8
1103         lbu     $t6,1024($i2)           # Td4[s3]
1104         _ins2   $t3,$t7,8
1105         lbu     $t7,1024($i3)           # Td4[s0]
1106
1107         _ins2   $t0,$t8,24
1108         lw      $s0,0($key0)
1109         _ins2   $t1,$t9,24
1110         lw      $s1,4($key0)
1111         _ins2   $t2,$t10,24
1112         lw      $s2,8($key0)
1113         _ins2   $t3,$t11,24
1114         lw      $s3,12($key0)
1115
1116         _ins2   $t0,$t4,0
1117         _ins2   $t1,$t5,0
1118         _ins2   $t2,$t6,0
1119         _ins2   $t3,$t7,0
1120 #else
1121         lbu     $t4,1024($i0)           # Td4[s2>>8]
1122         _xtr    $i0,$s0,24
1123         lbu     $t5,1024($i1)           # Td4[s3>>8]
1124         _xtr    $i1,$s1,24
1125         lbu     $t6,1024($i2)           # Td4[s0>>8]
1126         _xtr    $i2,$s2,24
1127         lbu     $t7,1024($i3)           # Td4[s1>>8]
1128         _xtr    $i3,$s3,24
1129
1130         $PTR_ADD $i0,$Tbl
1131         $PTR_ADD $i1,$Tbl
1132         $PTR_ADD $i2,$Tbl
1133         $PTR_ADD $i3,$Tbl
1134         lbu     $t8,1024($i0)           # Td4[s0>>24]
1135         _xtr    $i0,$s1,0
1136         lbu     $t9,1024($i1)           # Td4[s1>>24]
1137         _xtr    $i1,$s2,0
1138         lbu     $t10,1024($i2)          # Td4[s2>>24]
1139         _xtr    $i2,$s3,0
1140         lbu     $t11,1024($i3)          # Td4[s3>>24]
1141         _xtr    $i3,$s0,0
1142
1143         $PTR_ADD $i0,$Tbl
1144         $PTR_ADD $i1,$Tbl
1145         $PTR_ADD $i2,$Tbl
1146         $PTR_ADD $i3,$Tbl
1147
1148         _ins    $t0,16
1149         _ins    $t1,16
1150         _ins    $t2,16
1151         _ins    $t3,16
1152
1153         _ins    $t4,8
1154         _ins    $t5,8
1155         _ins    $t6,8
1156         _ins    $t7,8
1157
1158         xor     $t0,$t4
1159         lbu     $t4,1024($i0)           # Td4[s1]
1160         xor     $t1,$t5
1161         lbu     $t5,1024($i1)           # Td4[s2]
1162         xor     $t2,$t6
1163         lbu     $t6,1024($i2)           # Td4[s3]
1164         xor     $t3,$t7
1165         lbu     $t7,1024($i3)           # Td4[s0]
1166
1167         _ins    $t8,24
1168         lw      $s0,0($key0)
1169         _ins    $t9,24
1170         lw      $s1,4($key0)
1171         _ins    $t10,24
1172         lw      $s2,8($key0)
1173         _ins    $t11,24
1174         lw      $s3,12($key0)
1175
1176         xor     $t0,$t8
1177         xor     $t1,$t9
1178         xor     $t2,$t10
1179         xor     $t3,$t11
1180
1181         _ins    $t4,0
1182         _ins    $t5,0
1183         _ins    $t6,0
1184         _ins    $t7,0
1185
1186         xor     $t0,$t4
1187         xor     $t1,$t5
1188         xor     $t2,$t6
1189         xor     $t3,$t7
1190 #endif
1191
1192         xor     $s0,$t0
1193         xor     $s1,$t1
1194         xor     $s2,$t2
1195         xor     $s3,$t3
1196
1197         jr      $ra
1198 .end    _mips_AES_decrypt
1199
1200 .align  5
1201 .globl  AES_decrypt
1202 .ent    AES_decrypt
1203 AES_decrypt:
1204         .frame  $sp,$FRAMESIZE,$ra
1205         .mask   $SAVED_REGS_MASK,-$SZREG
1206         .set    noreorder
1207 ___
1208 $code.=<<___ if ($flavour =~ /o32/i);   # o32 PIC-ification
1209         .cpload $pf
1210 ___
1211 $code.=<<___;
1212         $PTR_SUB $sp,$FRAMESIZE
1213         $REG_S  $ra,$FRAMESIZE-1*$SZREG($sp)
1214         $REG_S  $fp,$FRAMESIZE-2*$SZREG($sp)
1215         $REG_S  $s11,$FRAMESIZE-3*$SZREG($sp)
1216         $REG_S  $s10,$FRAMESIZE-4*$SZREG($sp)
1217         $REG_S  $s9,$FRAMESIZE-5*$SZREG($sp)
1218         $REG_S  $s8,$FRAMESIZE-6*$SZREG($sp)
1219         $REG_S  $s7,$FRAMESIZE-7*$SZREG($sp)
1220         $REG_S  $s6,$FRAMESIZE-8*$SZREG($sp)
1221         $REG_S  $s5,$FRAMESIZE-9*$SZREG($sp)
1222         $REG_S  $s4,$FRAMESIZE-10*$SZREG($sp)
1223 ___
1224 $code.=<<___ if ($flavour =~ /nubi/i);  # optimize non-nubi prologue
1225         $REG_S  \$15,$FRAMESIZE-11*$SZREG($sp)
1226         $REG_S  \$14,$FRAMESIZE-12*$SZREG($sp)
1227         $REG_S  \$13,$FRAMESIZE-13*$SZREG($sp)
1228         $REG_S  \$12,$FRAMESIZE-14*$SZREG($sp)
1229         $REG_S  $gp,$FRAMESIZE-15*$SZREG($sp)
1230 ___
1231 $code.=<<___ if ($flavour !~ /o32/i);   # non-o32 PIC-ification
1232         .cplocal        $Tbl
1233         .cpsetup        $pf,$zero,AES_decrypt
1234 ___
1235 $code.=<<___;
1236         .set    reorder
1237         $PTR_LA $Tbl,AES_Td             # PIC-ified 'load address'
1238
1239 #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
1240         lw      $s0,0($inp)
1241         lw      $s1,4($inp)
1242         lw      $s2,8($inp)
1243         lw      $s3,12($inp)
1244 #else
1245         lwl     $s0,0+$MSB($inp)
1246         lwl     $s1,4+$MSB($inp)
1247         lwl     $s2,8+$MSB($inp)
1248         lwl     $s3,12+$MSB($inp)
1249         lwr     $s0,0+$LSB($inp)
1250         lwr     $s1,4+$LSB($inp)
1251         lwr     $s2,8+$LSB($inp)
1252         lwr     $s3,12+$LSB($inp)
1253 #endif
1254
1255         bal     _mips_AES_decrypt
1256
1257 #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
1258         sw      $s0,0($out)
1259         sw      $s1,4($out)
1260         sw      $s2,8($out)
1261         sw      $s3,12($out)
1262 #else
1263         swr     $s0,0+$LSB($out)
1264         swr     $s1,4+$LSB($out)
1265         swr     $s2,8+$LSB($out)
1266         swr     $s3,12+$LSB($out)
1267         swl     $s0,0+$MSB($out)
1268         swl     $s1,4+$MSB($out)
1269         swl     $s2,8+$MSB($out)
1270         swl     $s3,12+$MSB($out)
1271 #endif
1272
1273         .set    noreorder
1274         $REG_L  $ra,$FRAMESIZE-1*$SZREG($sp)
1275         $REG_L  $fp,$FRAMESIZE-2*$SZREG($sp)
1276         $REG_L  $s11,$FRAMESIZE-3*$SZREG($sp)
1277         $REG_L  $s10,$FRAMESIZE-4*$SZREG($sp)
1278         $REG_L  $s9,$FRAMESIZE-5*$SZREG($sp)
1279         $REG_L  $s8,$FRAMESIZE-6*$SZREG($sp)
1280         $REG_L  $s7,$FRAMESIZE-7*$SZREG($sp)
1281         $REG_L  $s6,$FRAMESIZE-8*$SZREG($sp)
1282         $REG_L  $s5,$FRAMESIZE-9*$SZREG($sp)
1283         $REG_L  $s4,$FRAMESIZE-10*$SZREG($sp)
1284 ___
1285 $code.=<<___ if ($flavour =~ /nubi/i);
1286         $REG_L  \$15,$FRAMESIZE-11*$SZREG($sp)
1287         $REG_L  \$14,$FRAMESIZE-12*$SZREG($sp)
1288         $REG_L  \$13,$FRAMESIZE-13*$SZREG($sp)
1289         $REG_L  \$12,$FRAMESIZE-14*$SZREG($sp)
1290         $REG_L  $gp,$FRAMESIZE-15*$SZREG($sp)
1291 ___
1292 $code.=<<___;
1293         jr      $ra
1294         $PTR_ADD $sp,$FRAMESIZE
1295 .end    AES_decrypt
1296 ___
1297 }}}
1298 \f
1299 {{{
1300 my $FRAMESIZE=8*$SZREG;
1301 my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc000f008" : "0xc0000000";
1302
1303 my ($inp,$bits,$key,$Tbl)=($a0,$a1,$a2,$a3);
1304 my ($rk0,$rk1,$rk2,$rk3,$rk4,$rk5,$rk6,$rk7)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
1305 my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
1306 my ($rcon,$cnt)=($gp,$fp);
1307
1308 $code.=<<___;
1309 .align  5
1310 .ent    _mips_AES_set_encrypt_key
1311 _mips_AES_set_encrypt_key:
1312         .frame  $sp,0,$ra
1313         .set    noreorder
1314         beqz    $inp,.Lekey_done
1315         li      $t0,-1
1316         beqz    $key,.Lekey_done
1317         $PTR_ADD $rcon,$Tbl,256
1318
1319         .set    reorder
1320 #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
1321         lw      $rk0,0($inp)            # load 128 bits
1322         lw      $rk1,4($inp)
1323         lw      $rk2,8($inp)
1324         lw      $rk3,12($inp)
1325 #else
1326         lwl     $rk0,0+$MSB($inp)       # load 128 bits
1327         lwl     $rk1,4+$MSB($inp)
1328         lwl     $rk2,8+$MSB($inp)
1329         lwl     $rk3,12+$MSB($inp)
1330         lwr     $rk0,0+$LSB($inp)
1331         lwr     $rk1,4+$LSB($inp)
1332         lwr     $rk2,8+$LSB($inp)
1333         lwr     $rk3,12+$LSB($inp)
1334 #endif
1335         li      $at,128
1336         .set    noreorder
1337         beq     $bits,$at,.L128bits
1338         li      $cnt,10
1339
1340         .set    reorder
1341 #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
1342         lw      $rk4,16($inp)           # load 192 bits
1343         lw      $rk5,20($inp)
1344 #else
1345         lwl     $rk4,16+$MSB($inp)      # load 192 bits
1346         lwl     $rk5,20+$MSB($inp)
1347         lwr     $rk4,16+$LSB($inp)
1348         lwr     $rk5,20+$LSB($inp)
1349 #endif
1350         li      $at,192
1351         .set    noreorder
1352         beq     $bits,$at,.L192bits
1353         li      $cnt,8
1354
1355         .set    reorder
1356 #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
1357         lw      $rk6,24($inp)           # load 256 bits
1358         lw      $rk7,28($inp)
1359 #else
1360         lwl     $rk6,24+$MSB($inp)      # load 256 bits
1361         lwl     $rk7,28+$MSB($inp)
1362         lwr     $rk6,24+$LSB($inp)
1363         lwr     $rk7,28+$LSB($inp)
1364 #endif
1365         li      $at,256
1366         .set    noreorder
1367         beq     $bits,$at,.L256bits
1368         li      $cnt,7
1369
1370         b       .Lekey_done
1371         li      $t0,-2
1372
1373 .align  4
1374 .L128bits:
1375         .set    reorder
1376         srl     $i0,$rk3,16
1377         srl     $i1,$rk3,8
1378         and     $i0,0xff
1379         and     $i1,0xff
1380         and     $i2,$rk3,0xff
1381         srl     $i3,$rk3,24
1382         $PTR_ADD $i0,$Tbl
1383         $PTR_ADD $i1,$Tbl
1384         $PTR_ADD $i2,$Tbl
1385         $PTR_ADD $i3,$Tbl
1386         lbu     $i0,0($i0)
1387         lbu     $i1,0($i1)
1388         lbu     $i2,0($i2)
1389         lbu     $i3,0($i3)
1390
1391         sw      $rk0,0($key)
1392         sw      $rk1,4($key)
1393         sw      $rk2,8($key)
1394         sw      $rk3,12($key)
1395         subu    $cnt,1
1396         $PTR_ADD $key,16
1397
1398         _bias   $i0,24
1399         _bias   $i1,16
1400         _bias   $i2,8
1401         _bias   $i3,0
1402
1403         xor     $rk0,$i0
1404         lw      $i0,0($rcon)
1405         xor     $rk0,$i1
1406         xor     $rk0,$i2
1407         xor     $rk0,$i3
1408         xor     $rk0,$i0
1409
1410         xor     $rk1,$rk0
1411         xor     $rk2,$rk1
1412         xor     $rk3,$rk2
1413
1414         .set    noreorder
1415         bnez    $cnt,.L128bits
1416         $PTR_ADD $rcon,4
1417
1418         sw      $rk0,0($key)
1419         sw      $rk1,4($key)
1420         sw      $rk2,8($key)
1421         li      $cnt,10
1422         sw      $rk3,12($key)
1423         li      $t0,0
1424         sw      $cnt,80($key)
1425         b       .Lekey_done
1426         $PTR_SUB $key,10*16
1427
1428 .align  4
1429 .L192bits:
1430         .set    reorder
1431         srl     $i0,$rk5,16
1432         srl     $i1,$rk5,8
1433         and     $i0,0xff
1434         and     $i1,0xff
1435         and     $i2,$rk5,0xff
1436         srl     $i3,$rk5,24
1437         $PTR_ADD $i0,$Tbl
1438         $PTR_ADD $i1,$Tbl
1439         $PTR_ADD $i2,$Tbl
1440         $PTR_ADD $i3,$Tbl
1441         lbu     $i0,0($i0)
1442         lbu     $i1,0($i1)
1443         lbu     $i2,0($i2)
1444         lbu     $i3,0($i3)
1445
1446         sw      $rk0,0($key)
1447         sw      $rk1,4($key)
1448         sw      $rk2,8($key)
1449         sw      $rk3,12($key)
1450         sw      $rk4,16($key)
1451         sw      $rk5,20($key)
1452         subu    $cnt,1
1453         $PTR_ADD $key,24
1454
1455         _bias   $i0,24
1456         _bias   $i1,16
1457         _bias   $i2,8
1458         _bias   $i3,0
1459
1460         xor     $rk0,$i0
1461         lw      $i0,0($rcon)
1462         xor     $rk0,$i1
1463         xor     $rk0,$i2
1464         xor     $rk0,$i3
1465         xor     $rk0,$i0
1466
1467         xor     $rk1,$rk0
1468         xor     $rk2,$rk1
1469         xor     $rk3,$rk2
1470         xor     $rk4,$rk3
1471         xor     $rk5,$rk4
1472
1473         .set    noreorder
1474         bnez    $cnt,.L192bits
1475         $PTR_ADD $rcon,4
1476
1477         sw      $rk0,0($key)
1478         sw      $rk1,4($key)
1479         sw      $rk2,8($key)
1480         li      $cnt,12
1481         sw      $rk3,12($key)
1482         li      $t0,0
1483         sw      $cnt,48($key)
1484         b       .Lekey_done
1485         $PTR_SUB $key,12*16
1486
1487 .align  4
1488 .L256bits:
1489         .set    reorder
1490         srl     $i0,$rk7,16
1491         srl     $i1,$rk7,8
1492         and     $i0,0xff
1493         and     $i1,0xff
1494         and     $i2,$rk7,0xff
1495         srl     $i3,$rk7,24
1496         $PTR_ADD $i0,$Tbl
1497         $PTR_ADD $i1,$Tbl
1498         $PTR_ADD $i2,$Tbl
1499         $PTR_ADD $i3,$Tbl
1500         lbu     $i0,0($i0)
1501         lbu     $i1,0($i1)
1502         lbu     $i2,0($i2)
1503         lbu     $i3,0($i3)
1504
1505         sw      $rk0,0($key)
1506         sw      $rk1,4($key)
1507         sw      $rk2,8($key)
1508         sw      $rk3,12($key)
1509         sw      $rk4,16($key)
1510         sw      $rk5,20($key)
1511         sw      $rk6,24($key)
1512         sw      $rk7,28($key)
1513         subu    $cnt,1
1514
1515         _bias   $i0,24
1516         _bias   $i1,16
1517         _bias   $i2,8
1518         _bias   $i3,0
1519
1520         xor     $rk0,$i0
1521         lw      $i0,0($rcon)
1522         xor     $rk0,$i1
1523         xor     $rk0,$i2
1524         xor     $rk0,$i3
1525         xor     $rk0,$i0
1526
1527         xor     $rk1,$rk0
1528         xor     $rk2,$rk1
1529         xor     $rk3,$rk2
1530         beqz    $cnt,.L256bits_done
1531
1532         srl     $i0,$rk3,24
1533         srl     $i1,$rk3,16
1534         srl     $i2,$rk3,8
1535         and     $i3,$rk3,0xff
1536         and     $i1,0xff
1537         and     $i2,0xff
1538         $PTR_ADD $i0,$Tbl
1539         $PTR_ADD $i1,$Tbl
1540         $PTR_ADD $i2,$Tbl
1541         $PTR_ADD $i3,$Tbl
1542         lbu     $i0,0($i0)
1543         lbu     $i1,0($i1)
1544         lbu     $i2,0($i2)
1545         lbu     $i3,0($i3)
1546         sll     $i0,24
1547         sll     $i1,16
1548         sll     $i2,8
1549
1550         xor     $rk4,$i0
1551         xor     $rk4,$i1
1552         xor     $rk4,$i2
1553         xor     $rk4,$i3
1554
1555         xor     $rk5,$rk4
1556         xor     $rk6,$rk5
1557         xor     $rk7,$rk6
1558
1559         $PTR_ADD $key,32
1560         .set    noreorder
1561         b       .L256bits
1562         $PTR_ADD $rcon,4
1563
1564 .L256bits_done:
1565         sw      $rk0,32($key)
1566         sw      $rk1,36($key)
1567         sw      $rk2,40($key)
1568         li      $cnt,14
1569         sw      $rk3,44($key)
1570         li      $t0,0
1571         sw      $cnt,48($key)
1572         $PTR_SUB $key,12*16
1573
1574 .Lekey_done:
1575         jr      $ra
1576         nop
1577 .end    _mips_AES_set_encrypt_key
1578
1579 .globl  AES_set_encrypt_key
1580 .ent    AES_set_encrypt_key
1581 AES_set_encrypt_key:
1582         .frame  $sp,$FRAMESIZE,$ra
1583         .mask   $SAVED_REGS_MASK,-$SZREG
1584         .set    noreorder
1585 ___
1586 $code.=<<___ if ($flavour =~ /o32/i);   # o32 PIC-ification
1587         .cpload $pf
1588 ___
1589 $code.=<<___;
1590         $PTR_SUB $sp,$FRAMESIZE
1591         $REG_S  $ra,$FRAMESIZE-1*$SZREG($sp)
1592         $REG_S  $fp,$FRAMESIZE-2*$SZREG($sp)
1593 ___
1594 $code.=<<___ if ($flavour =~ /nubi/i);  # optimize non-nubi prologue
1595         $REG_S  $s3,$FRAMESIZE-3*$SZREG($sp)
1596         $REG_S  $s2,$FRAMESIZE-4*$SZREG($sp)
1597         $REG_S  $s1,$FRAMESIZE-5*$SZREG($sp)
1598         $REG_S  $s0,$FRAMESIZE-6*$SZREG($sp)
1599         $REG_S  $gp,$FRAMESIZE-7*$SZREG($sp)
1600 ___
1601 $code.=<<___ if ($flavour !~ /o32/i);   # non-o32 PIC-ification
1602         .cplocal        $Tbl
1603         .cpsetup        $pf,$zero,AES_set_encrypt_key
1604 ___
1605 $code.=<<___;
1606         .set    reorder
1607         $PTR_LA $Tbl,AES_Te4            # PIC-ified 'load address'
1608
1609         bal     _mips_AES_set_encrypt_key
1610
1611         .set    noreorder
1612         move    $a0,$t0
1613         $REG_L  $ra,$FRAMESIZE-1*$SZREG($sp)
1614         $REG_L  $fp,$FRAMESIZE-2*$SZREG($sp)
1615 ___
1616 $code.=<<___ if ($flavour =~ /nubi/i);
1617         $REG_L  $s3,$FRAMESIZE-11*$SZREG($sp)
1618         $REG_L  $s2,$FRAMESIZE-12*$SZREG($sp)
1619         $REG_L  $s1,$FRAMESIZE-13*$SZREG($sp)
1620         $REG_L  $s0,$FRAMESIZE-14*$SZREG($sp)
1621         $REG_L  $gp,$FRAMESIZE-15*$SZREG($sp)
1622 ___
1623 $code.=<<___;
1624         jr      $ra
1625         $PTR_ADD $sp,$FRAMESIZE
1626 .end    AES_set_encrypt_key
1627 ___
1628 \f
1629 my ($head,$tail)=($inp,$bits);
1630 my ($tp1,$tp2,$tp4,$tp8,$tp9,$tpb,$tpd,$tpe)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
1631 my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2);
1632 $code.=<<___;
1633 .align  5
1634 .globl  AES_set_decrypt_key
1635 .ent    AES_set_decrypt_key
1636 AES_set_decrypt_key:
1637         .frame  $sp,$FRAMESIZE,$ra
1638         .mask   $SAVED_REGS_MASK,-$SZREG
1639         .set    noreorder
1640 ___
1641 $code.=<<___ if ($flavour =~ /o32/i);   # o32 PIC-ification
1642         .cpload $pf
1643 ___
1644 $code.=<<___;
1645         $PTR_SUB $sp,$FRAMESIZE
1646         $REG_S  $ra,$FRAMESIZE-1*$SZREG($sp)
1647         $REG_S  $fp,$FRAMESIZE-2*$SZREG($sp)
1648 ___
1649 $code.=<<___ if ($flavour =~ /nubi/i);  # optimize non-nubi prologue
1650         $REG_S  $s3,$FRAMESIZE-3*$SZREG($sp)
1651         $REG_S  $s2,$FRAMESIZE-4*$SZREG($sp)
1652         $REG_S  $s1,$FRAMESIZE-5*$SZREG($sp)
1653         $REG_S  $s0,$FRAMESIZE-6*$SZREG($sp)
1654         $REG_S  $gp,$FRAMESIZE-7*$SZREG($sp)
1655 ___
1656 $code.=<<___ if ($flavour !~ /o32/i);   # non-o32 PIC-ification
1657         .cplocal        $Tbl
1658         .cpsetup        $pf,$zero,AES_set_decrypt_key
1659 ___
1660 $code.=<<___;
1661         .set    reorder
1662         $PTR_LA $Tbl,AES_Te4            # PIC-ified 'load address'
1663
1664         bal     _mips_AES_set_encrypt_key
1665
1666         bltz    $t0,.Ldkey_done
1667
1668         sll     $at,$cnt,4
1669         $PTR_ADD $head,$key,0
1670         $PTR_ADD $tail,$key,$at
1671 .align  4
1672 .Lswap:
1673         lw      $rk0,0($head)
1674         lw      $rk1,4($head)
1675         lw      $rk2,8($head)
1676         lw      $rk3,12($head)
1677         lw      $rk4,0($tail)
1678         lw      $rk5,4($tail)
1679         lw      $rk6,8($tail)
1680         lw      $rk7,12($tail)
1681         sw      $rk0,0($tail)
1682         sw      $rk1,4($tail)
1683         sw      $rk2,8($tail)
1684         sw      $rk3,12($tail)
1685         $PTR_ADD $head,16
1686         $PTR_SUB $tail,16
1687         sw      $rk4,-16($head)
1688         sw      $rk5,-12($head)
1689         sw      $rk6,-8($head)
1690         sw      $rk7,-4($head)
1691         bne     $head,$tail,.Lswap
1692
1693         lw      $tp1,16($key)           # modulo-scheduled
1694         lui     $x80808080,0x8080
1695         subu    $cnt,1
1696         or      $x80808080,0x8080
1697         sll     $cnt,2
1698         $PTR_ADD $key,16
1699         lui     $x1b1b1b1b,0x1b1b
1700         nor     $x7f7f7f7f,$zero,$x80808080
1701         or      $x1b1b1b1b,0x1b1b
1702 .align  4
1703 .Lmix:
1704         and     $m,$tp1,$x80808080
1705         and     $tp2,$tp1,$x7f7f7f7f
1706         srl     $tp4,$m,7
1707         addu    $tp2,$tp2               # tp2<<1
1708         subu    $m,$tp4
1709         and     $m,$x1b1b1b1b
1710         xor     $tp2,$m
1711
1712         and     $m,$tp2,$x80808080
1713         and     $tp4,$tp2,$x7f7f7f7f
1714         srl     $tp8,$m,7
1715         addu    $tp4,$tp4               # tp4<<1
1716         subu    $m,$tp8
1717         and     $m,$x1b1b1b1b
1718         xor     $tp4,$m
1719
1720         and     $m,$tp4,$x80808080
1721         and     $tp8,$tp4,$x7f7f7f7f
1722         srl     $tp9,$m,7
1723         addu    $tp8,$tp8               # tp8<<1
1724         subu    $m,$tp9
1725         and     $m,$x1b1b1b1b
1726         xor     $tp8,$m
1727
1728         xor     $tp9,$tp8,$tp1
1729         xor     $tpe,$tp8,$tp4
1730         xor     $tpb,$tp9,$tp2
1731         xor     $tpd,$tp9,$tp4
1732
1733 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1734         rotr    $tp1,$tpd,16
1735          xor    $tpe,$tp2
1736         rotr    $tp2,$tp9,8
1737         xor     $tpe,$tp1
1738         rotr    $tp4,$tpb,24
1739         xor     $tpe,$tp2
1740         lw      $tp1,4($key)            # modulo-scheduled
1741         xor     $tpe,$tp4
1742 #else
1743         _ror    $tp1,$tpd,16
1744          xor    $tpe,$tp2
1745         _ror    $tp2,$tpd,-16
1746         xor     $tpe,$tp1
1747         _ror    $tp1,$tp9,8
1748         xor     $tpe,$tp2
1749         _ror    $tp2,$tp9,-24
1750         xor     $tpe,$tp1
1751         _ror    $tp1,$tpb,24
1752         xor     $tpe,$tp2
1753         _ror    $tp2,$tpb,-8
1754         xor     $tpe,$tp1
1755         lw      $tp1,4($key)            # modulo-scheduled
1756         xor     $tpe,$tp2
1757 #endif
1758         subu    $cnt,1
1759         sw      $tpe,0($key)
1760         $PTR_ADD $key,4
1761         bnez    $cnt,.Lmix
1762
1763         li      $t0,0
1764 .Ldkey_done:
1765         .set    noreorder
1766         move    $a0,$t0
1767         $REG_L  $ra,$FRAMESIZE-1*$SZREG($sp)
1768         $REG_L  $fp,$FRAMESIZE-2*$SZREG($sp)
1769 ___
1770 $code.=<<___ if ($flavour =~ /nubi/i);
1771         $REG_L  $s3,$FRAMESIZE-11*$SZREG($sp)
1772         $REG_L  $s2,$FRAMESIZE-12*$SZREG($sp)
1773         $REG_L  $s1,$FRAMESIZE-13*$SZREG($sp)
1774         $REG_L  $s0,$FRAMESIZE-14*$SZREG($sp)
1775         $REG_L  $gp,$FRAMESIZE-15*$SZREG($sp)
1776 ___
1777 $code.=<<___;
1778         jr      $ra
1779         $PTR_ADD $sp,$FRAMESIZE
1780 .end    AES_set_decrypt_key
1781 ___
1782 }}}
1783
1784 ######################################################################
1785 # Tables are kept in endian-neutral manner
1786 $code.=<<___;
1787 .rdata
1788 .align  10
1789 AES_Te:
1790 .byte   0xc6,0x63,0x63,0xa5,    0xf8,0x7c,0x7c,0x84     # Te0
1791 .byte   0xee,0x77,0x77,0x99,    0xf6,0x7b,0x7b,0x8d
1792 .byte   0xff,0xf2,0xf2,0x0d,    0xd6,0x6b,0x6b,0xbd
1793 .byte   0xde,0x6f,0x6f,0xb1,    0x91,0xc5,0xc5,0x54
1794 .byte   0x60,0x30,0x30,0x50,    0x02,0x01,0x01,0x03
1795 .byte   0xce,0x67,0x67,0xa9,    0x56,0x2b,0x2b,0x7d
1796 .byte   0xe7,0xfe,0xfe,0x19,    0xb5,0xd7,0xd7,0x62
1797 .byte   0x4d,0xab,0xab,0xe6,    0xec,0x76,0x76,0x9a
1798 .byte   0x8f,0xca,0xca,0x45,    0x1f,0x82,0x82,0x9d
1799 .byte   0x89,0xc9,0xc9,0x40,    0xfa,0x7d,0x7d,0x87
1800 .byte   0xef,0xfa,0xfa,0x15,    0xb2,0x59,0x59,0xeb
1801 .byte   0x8e,0x47,0x47,0xc9,    0xfb,0xf0,0xf0,0x0b
1802 .byte   0x41,0xad,0xad,0xec,    0xb3,0xd4,0xd4,0x67
1803 .byte   0x5f,0xa2,0xa2,0xfd,    0x45,0xaf,0xaf,0xea
1804 .byte   0x23,0x9c,0x9c,0xbf,    0x53,0xa4,0xa4,0xf7
1805 .byte   0xe4,0x72,0x72,0x96,    0x9b,0xc0,0xc0,0x5b
1806 .byte   0x75,0xb7,0xb7,0xc2,    0xe1,0xfd,0xfd,0x1c
1807 .byte   0x3d,0x93,0x93,0xae,    0x4c,0x26,0x26,0x6a
1808 .byte   0x6c,0x36,0x36,0x5a,    0x7e,0x3f,0x3f,0x41
1809 .byte   0xf5,0xf7,0xf7,0x02,    0x83,0xcc,0xcc,0x4f
1810 .byte   0x68,0x34,0x34,0x5c,    0x51,0xa5,0xa5,0xf4
1811 .byte   0xd1,0xe5,0xe5,0x34,    0xf9,0xf1,0xf1,0x08
1812 .byte   0xe2,0x71,0x71,0x93,    0xab,0xd8,0xd8,0x73
1813 .byte   0x62,0x31,0x31,0x53,    0x2a,0x15,0x15,0x3f
1814 .byte   0x08,0x04,0x04,0x0c,    0x95,0xc7,0xc7,0x52
1815 .byte   0x46,0x23,0x23,0x65,    0x9d,0xc3,0xc3,0x5e
1816 .byte   0x30,0x18,0x18,0x28,    0x37,0x96,0x96,0xa1
1817 .byte   0x0a,0x05,0x05,0x0f,    0x2f,0x9a,0x9a,0xb5
1818 .byte   0x0e,0x07,0x07,0x09,    0x24,0x12,0x12,0x36
1819 .byte   0x1b,0x80,0x80,0x9b,    0xdf,0xe2,0xe2,0x3d
1820 .byte   0xcd,0xeb,0xeb,0x26,    0x4e,0x27,0x27,0x69
1821 .byte   0x7f,0xb2,0xb2,0xcd,    0xea,0x75,0x75,0x9f
1822 .byte   0x12,0x09,0x09,0x1b,    0x1d,0x83,0x83,0x9e
1823 .byte   0x58,0x2c,0x2c,0x74,    0x34,0x1a,0x1a,0x2e
1824 .byte   0x36,0x1b,0x1b,0x2d,    0xdc,0x6e,0x6e,0xb2
1825 .byte   0xb4,0x5a,0x5a,0xee,    0x5b,0xa0,0xa0,0xfb
1826 .byte   0xa4,0x52,0x52,0xf6,    0x76,0x3b,0x3b,0x4d
1827 .byte   0xb7,0xd6,0xd6,0x61,    0x7d,0xb3,0xb3,0xce
1828 .byte   0x52,0x29,0x29,0x7b,    0xdd,0xe3,0xe3,0x3e
1829 .byte   0x5e,0x2f,0x2f,0x71,    0x13,0x84,0x84,0x97
1830 .byte   0xa6,0x53,0x53,0xf5,    0xb9,0xd1,0xd1,0x68
1831 .byte   0x00,0x00,0x00,0x00,    0xc1,0xed,0xed,0x2c
1832 .byte   0x40,0x20,0x20,0x60,    0xe3,0xfc,0xfc,0x1f
1833 .byte   0x79,0xb1,0xb1,0xc8,    0xb6,0x5b,0x5b,0xed
1834 .byte   0xd4,0x6a,0x6a,0xbe,    0x8d,0xcb,0xcb,0x46
1835 .byte   0x67,0xbe,0xbe,0xd9,    0x72,0x39,0x39,0x4b
1836 .byte   0x94,0x4a,0x4a,0xde,    0x98,0x4c,0x4c,0xd4
1837 .byte   0xb0,0x58,0x58,0xe8,    0x85,0xcf,0xcf,0x4a
1838 .byte   0xbb,0xd0,0xd0,0x6b,    0xc5,0xef,0xef,0x2a
1839 .byte   0x4f,0xaa,0xaa,0xe5,    0xed,0xfb,0xfb,0x16
1840 .byte   0x86,0x43,0x43,0xc5,    0x9a,0x4d,0x4d,0xd7
1841 .byte   0x66,0x33,0x33,0x55,    0x11,0x85,0x85,0x94
1842 .byte   0x8a,0x45,0x45,0xcf,    0xe9,0xf9,0xf9,0x10
1843 .byte   0x04,0x02,0x02,0x06,    0xfe,0x7f,0x7f,0x81
1844 .byte   0xa0,0x50,0x50,0xf0,    0x78,0x3c,0x3c,0x44
1845 .byte   0x25,0x9f,0x9f,0xba,    0x4b,0xa8,0xa8,0xe3
1846 .byte   0xa2,0x51,0x51,0xf3,    0x5d,0xa3,0xa3,0xfe
1847 .byte   0x80,0x40,0x40,0xc0,    0x05,0x8f,0x8f,0x8a
1848 .byte   0x3f,0x92,0x92,0xad,    0x21,0x9d,0x9d,0xbc
1849 .byte   0x70,0x38,0x38,0x48,    0xf1,0xf5,0xf5,0x04
1850 .byte   0x63,0xbc,0xbc,0xdf,    0x77,0xb6,0xb6,0xc1
1851 .byte   0xaf,0xda,0xda,0x75,    0x42,0x21,0x21,0x63
1852 .byte   0x20,0x10,0x10,0x30,    0xe5,0xff,0xff,0x1a
1853 .byte   0xfd,0xf3,0xf3,0x0e,    0xbf,0xd2,0xd2,0x6d
1854 .byte   0x81,0xcd,0xcd,0x4c,    0x18,0x0c,0x0c,0x14
1855 .byte   0x26,0x13,0x13,0x35,    0xc3,0xec,0xec,0x2f
1856 .byte   0xbe,0x5f,0x5f,0xe1,    0x35,0x97,0x97,0xa2
1857 .byte   0x88,0x44,0x44,0xcc,    0x2e,0x17,0x17,0x39
1858 .byte   0x93,0xc4,0xc4,0x57,    0x55,0xa7,0xa7,0xf2
1859 .byte   0xfc,0x7e,0x7e,0x82,    0x7a,0x3d,0x3d,0x47
1860 .byte   0xc8,0x64,0x64,0xac,    0xba,0x5d,0x5d,0xe7
1861 .byte   0x32,0x19,0x19,0x2b,    0xe6,0x73,0x73,0x95
1862 .byte   0xc0,0x60,0x60,0xa0,    0x19,0x81,0x81,0x98
1863 .byte   0x9e,0x4f,0x4f,0xd1,    0xa3,0xdc,0xdc,0x7f
1864 .byte   0x44,0x22,0x22,0x66,    0x54,0x2a,0x2a,0x7e
1865 .byte   0x3b,0x90,0x90,0xab,    0x0b,0x88,0x88,0x83
1866 .byte   0x8c,0x46,0x46,0xca,    0xc7,0xee,0xee,0x29
1867 .byte   0x6b,0xb8,0xb8,0xd3,    0x28,0x14,0x14,0x3c
1868 .byte   0xa7,0xde,0xde,0x79,    0xbc,0x5e,0x5e,0xe2
1869 .byte   0x16,0x0b,0x0b,0x1d,    0xad,0xdb,0xdb,0x76
1870 .byte   0xdb,0xe0,0xe0,0x3b,    0x64,0x32,0x32,0x56
1871 .byte   0x74,0x3a,0x3a,0x4e,    0x14,0x0a,0x0a,0x1e
1872 .byte   0x92,0x49,0x49,0xdb,    0x0c,0x06,0x06,0x0a
1873 .byte   0x48,0x24,0x24,0x6c,    0xb8,0x5c,0x5c,0xe4
1874 .byte   0x9f,0xc2,0xc2,0x5d,    0xbd,0xd3,0xd3,0x6e
1875 .byte   0x43,0xac,0xac,0xef,    0xc4,0x62,0x62,0xa6
1876 .byte   0x39,0x91,0x91,0xa8,    0x31,0x95,0x95,0xa4
1877 .byte   0xd3,0xe4,0xe4,0x37,    0xf2,0x79,0x79,0x8b
1878 .byte   0xd5,0xe7,0xe7,0x32,    0x8b,0xc8,0xc8,0x43
1879 .byte   0x6e,0x37,0x37,0x59,    0xda,0x6d,0x6d,0xb7
1880 .byte   0x01,0x8d,0x8d,0x8c,    0xb1,0xd5,0xd5,0x64
1881 .byte   0x9c,0x4e,0x4e,0xd2,    0x49,0xa9,0xa9,0xe0
1882 .byte   0xd8,0x6c,0x6c,0xb4,    0xac,0x56,0x56,0xfa
1883 .byte   0xf3,0xf4,0xf4,0x07,    0xcf,0xea,0xea,0x25
1884 .byte   0xca,0x65,0x65,0xaf,    0xf4,0x7a,0x7a,0x8e
1885 .byte   0x47,0xae,0xae,0xe9,    0x10,0x08,0x08,0x18
1886 .byte   0x6f,0xba,0xba,0xd5,    0xf0,0x78,0x78,0x88
1887 .byte   0x4a,0x25,0x25,0x6f,    0x5c,0x2e,0x2e,0x72
1888 .byte   0x38,0x1c,0x1c,0x24,    0x57,0xa6,0xa6,0xf1
1889 .byte   0x73,0xb4,0xb4,0xc7,    0x97,0xc6,0xc6,0x51
1890 .byte   0xcb,0xe8,0xe8,0x23,    0xa1,0xdd,0xdd,0x7c
1891 .byte   0xe8,0x74,0x74,0x9c,    0x3e,0x1f,0x1f,0x21
1892 .byte   0x96,0x4b,0x4b,0xdd,    0x61,0xbd,0xbd,0xdc
1893 .byte   0x0d,0x8b,0x8b,0x86,    0x0f,0x8a,0x8a,0x85
1894 .byte   0xe0,0x70,0x70,0x90,    0x7c,0x3e,0x3e,0x42
1895 .byte   0x71,0xb5,0xb5,0xc4,    0xcc,0x66,0x66,0xaa
1896 .byte   0x90,0x48,0x48,0xd8,    0x06,0x03,0x03,0x05
1897 .byte   0xf7,0xf6,0xf6,0x01,    0x1c,0x0e,0x0e,0x12
1898 .byte   0xc2,0x61,0x61,0xa3,    0x6a,0x35,0x35,0x5f
1899 .byte   0xae,0x57,0x57,0xf9,    0x69,0xb9,0xb9,0xd0
1900 .byte   0x17,0x86,0x86,0x91,    0x99,0xc1,0xc1,0x58
1901 .byte   0x3a,0x1d,0x1d,0x27,    0x27,0x9e,0x9e,0xb9
1902 .byte   0xd9,0xe1,0xe1,0x38,    0xeb,0xf8,0xf8,0x13
1903 .byte   0x2b,0x98,0x98,0xb3,    0x22,0x11,0x11,0x33
1904 .byte   0xd2,0x69,0x69,0xbb,    0xa9,0xd9,0xd9,0x70
1905 .byte   0x07,0x8e,0x8e,0x89,    0x33,0x94,0x94,0xa7
1906 .byte   0x2d,0x9b,0x9b,0xb6,    0x3c,0x1e,0x1e,0x22
1907 .byte   0x15,0x87,0x87,0x92,    0xc9,0xe9,0xe9,0x20
1908 .byte   0x87,0xce,0xce,0x49,    0xaa,0x55,0x55,0xff
1909 .byte   0x50,0x28,0x28,0x78,    0xa5,0xdf,0xdf,0x7a
1910 .byte   0x03,0x8c,0x8c,0x8f,    0x59,0xa1,0xa1,0xf8
1911 .byte   0x09,0x89,0x89,0x80,    0x1a,0x0d,0x0d,0x17
1912 .byte   0x65,0xbf,0xbf,0xda,    0xd7,0xe6,0xe6,0x31
1913 .byte   0x84,0x42,0x42,0xc6,    0xd0,0x68,0x68,0xb8
1914 .byte   0x82,0x41,0x41,0xc3,    0x29,0x99,0x99,0xb0
1915 .byte   0x5a,0x2d,0x2d,0x77,    0x1e,0x0f,0x0f,0x11
1916 .byte   0x7b,0xb0,0xb0,0xcb,    0xa8,0x54,0x54,0xfc
1917 .byte   0x6d,0xbb,0xbb,0xd6,    0x2c,0x16,0x16,0x3a
1918
1919 AES_Td:
1920 .byte   0x51,0xf4,0xa7,0x50,    0x7e,0x41,0x65,0x53     # Td0
1921 .byte   0x1a,0x17,0xa4,0xc3,    0x3a,0x27,0x5e,0x96
1922 .byte   0x3b,0xab,0x6b,0xcb,    0x1f,0x9d,0x45,0xf1
1923 .byte   0xac,0xfa,0x58,0xab,    0x4b,0xe3,0x03,0x93
1924 .byte   0x20,0x30,0xfa,0x55,    0xad,0x76,0x6d,0xf6
1925 .byte   0x88,0xcc,0x76,0x91,    0xf5,0x02,0x4c,0x25
1926 .byte   0x4f,0xe5,0xd7,0xfc,    0xc5,0x2a,0xcb,0xd7
1927 .byte   0x26,0x35,0x44,0x80,    0xb5,0x62,0xa3,0x8f
1928 .byte   0xde,0xb1,0x5a,0x49,    0x25,0xba,0x1b,0x67
1929 .byte   0x45,0xea,0x0e,0x98,    0x5d,0xfe,0xc0,0xe1
1930 .byte   0xc3,0x2f,0x75,0x02,    0x81,0x4c,0xf0,0x12
1931 .byte   0x8d,0x46,0x97,0xa3,    0x6b,0xd3,0xf9,0xc6
1932 .byte   0x03,0x8f,0x5f,0xe7,    0x15,0x92,0x9c,0x95
1933 .byte   0xbf,0x6d,0x7a,0xeb,    0x95,0x52,0x59,0xda
1934 .byte   0xd4,0xbe,0x83,0x2d,    0x58,0x74,0x21,0xd3
1935 .byte   0x49,0xe0,0x69,0x29,    0x8e,0xc9,0xc8,0x44
1936 .byte   0x75,0xc2,0x89,0x6a,    0xf4,0x8e,0x79,0x78
1937 .byte   0x99,0x58,0x3e,0x6b,    0x27,0xb9,0x71,0xdd
1938 .byte   0xbe,0xe1,0x4f,0xb6,    0xf0,0x88,0xad,0x17
1939 .byte   0xc9,0x20,0xac,0x66,    0x7d,0xce,0x3a,0xb4
1940 .byte   0x63,0xdf,0x4a,0x18,    0xe5,0x1a,0x31,0x82
1941 .byte   0x97,0x51,0x33,0x60,    0x62,0x53,0x7f,0x45
1942 .byte   0xb1,0x64,0x77,0xe0,    0xbb,0x6b,0xae,0x84
1943 .byte   0xfe,0x81,0xa0,0x1c,    0xf9,0x08,0x2b,0x94
1944 .byte   0x70,0x48,0x68,0x58,    0x8f,0x45,0xfd,0x19
1945 .byte   0x94,0xde,0x6c,0x87,    0x52,0x7b,0xf8,0xb7
1946 .byte   0xab,0x73,0xd3,0x23,    0x72,0x4b,0x02,0xe2
1947 .byte   0xe3,0x1f,0x8f,0x57,    0x66,0x55,0xab,0x2a
1948 .byte   0xb2,0xeb,0x28,0x07,    0x2f,0xb5,0xc2,0x03
1949 .byte   0x86,0xc5,0x7b,0x9a,    0xd3,0x37,0x08,0xa5
1950 .byte   0x30,0x28,0x87,0xf2,    0x23,0xbf,0xa5,0xb2
1951 .byte   0x02,0x03,0x6a,0xba,    0xed,0x16,0x82,0x5c
1952 .byte   0x8a,0xcf,0x1c,0x2b,    0xa7,0x79,0xb4,0x92
1953 .byte   0xf3,0x07,0xf2,0xf0,    0x4e,0x69,0xe2,0xa1
1954 .byte   0x65,0xda,0xf4,0xcd,    0x06,0x05,0xbe,0xd5
1955 .byte   0xd1,0x34,0x62,0x1f,    0xc4,0xa6,0xfe,0x8a
1956 .byte   0x34,0x2e,0x53,0x9d,    0xa2,0xf3,0x55,0xa0
1957 .byte   0x05,0x8a,0xe1,0x32,    0xa4,0xf6,0xeb,0x75
1958 .byte   0x0b,0x83,0xec,0x39,    0x40,0x60,0xef,0xaa
1959 .byte   0x5e,0x71,0x9f,0x06,    0xbd,0x6e,0x10,0x51
1960 .byte   0x3e,0x21,0x8a,0xf9,    0x96,0xdd,0x06,0x3d
1961 .byte   0xdd,0x3e,0x05,0xae,    0x4d,0xe6,0xbd,0x46
1962 .byte   0x91,0x54,0x8d,0xb5,    0x71,0xc4,0x5d,0x05
1963 .byte   0x04,0x06,0xd4,0x6f,    0x60,0x50,0x15,0xff
1964 .byte   0x19,0x98,0xfb,0x24,    0xd6,0xbd,0xe9,0x97
1965 .byte   0x89,0x40,0x43,0xcc,    0x67,0xd9,0x9e,0x77
1966 .byte   0xb0,0xe8,0x42,0xbd,    0x07,0x89,0x8b,0x88
1967 .byte   0xe7,0x19,0x5b,0x38,    0x79,0xc8,0xee,0xdb
1968 .byte   0xa1,0x7c,0x0a,0x47,    0x7c,0x42,0x0f,0xe9
1969 .byte   0xf8,0x84,0x1e,0xc9,    0x00,0x00,0x00,0x00
1970 .byte   0x09,0x80,0x86,0x83,    0x32,0x2b,0xed,0x48
1971 .byte   0x1e,0x11,0x70,0xac,    0x6c,0x5a,0x72,0x4e
1972 .byte   0xfd,0x0e,0xff,0xfb,    0x0f,0x85,0x38,0x56
1973 .byte   0x3d,0xae,0xd5,0x1e,    0x36,0x2d,0x39,0x27
1974 .byte   0x0a,0x0f,0xd9,0x64,    0x68,0x5c,0xa6,0x21
1975 .byte   0x9b,0x5b,0x54,0xd1,    0x24,0x36,0x2e,0x3a
1976 .byte   0x0c,0x0a,0x67,0xb1,    0x93,0x57,0xe7,0x0f
1977 .byte   0xb4,0xee,0x96,0xd2,    0x1b,0x9b,0x91,0x9e
1978 .byte   0x80,0xc0,0xc5,0x4f,    0x61,0xdc,0x20,0xa2
1979 .byte   0x5a,0x77,0x4b,0x69,    0x1c,0x12,0x1a,0x16
1980 .byte   0xe2,0x93,0xba,0x0a,    0xc0,0xa0,0x2a,0xe5
1981 .byte   0x3c,0x22,0xe0,0x43,    0x12,0x1b,0x17,0x1d
1982 .byte   0x0e,0x09,0x0d,0x0b,    0xf2,0x8b,0xc7,0xad
1983 .byte   0x2d,0xb6,0xa8,0xb9,    0x14,0x1e,0xa9,0xc8
1984 .byte   0x57,0xf1,0x19,0x85,    0xaf,0x75,0x07,0x4c
1985 .byte   0xee,0x99,0xdd,0xbb,    0xa3,0x7f,0x60,0xfd
1986 .byte   0xf7,0x01,0x26,0x9f,    0x5c,0x72,0xf5,0xbc
1987 .byte   0x44,0x66,0x3b,0xc5,    0x5b,0xfb,0x7e,0x34
1988 .byte   0x8b,0x43,0x29,0x76,    0xcb,0x23,0xc6,0xdc
1989 .byte   0xb6,0xed,0xfc,0x68,    0xb8,0xe4,0xf1,0x63
1990 .byte   0xd7,0x31,0xdc,0xca,    0x42,0x63,0x85,0x10
1991 .byte   0x13,0x97,0x22,0x40,    0x84,0xc6,0x11,0x20
1992 .byte   0x85,0x4a,0x24,0x7d,    0xd2,0xbb,0x3d,0xf8
1993 .byte   0xae,0xf9,0x32,0x11,    0xc7,0x29,0xa1,0x6d
1994 .byte   0x1d,0x9e,0x2f,0x4b,    0xdc,0xb2,0x30,0xf3
1995 .byte   0x0d,0x86,0x52,0xec,    0x77,0xc1,0xe3,0xd0
1996 .byte   0x2b,0xb3,0x16,0x6c,    0xa9,0x70,0xb9,0x99
1997 .byte   0x11,0x94,0x48,0xfa,    0x47,0xe9,0x64,0x22
1998 .byte   0xa8,0xfc,0x8c,0xc4,    0xa0,0xf0,0x3f,0x1a
1999 .byte   0x56,0x7d,0x2c,0xd8,    0x22,0x33,0x90,0xef
2000 .byte   0x87,0x49,0x4e,0xc7,    0xd9,0x38,0xd1,0xc1
2001 .byte   0x8c,0xca,0xa2,0xfe,    0x98,0xd4,0x0b,0x36
2002 .byte   0xa6,0xf5,0x81,0xcf,    0xa5,0x7a,0xde,0x28
2003 .byte   0xda,0xb7,0x8e,0x26,    0x3f,0xad,0xbf,0xa4
2004 .byte   0x2c,0x3a,0x9d,0xe4,    0x50,0x78,0x92,0x0d
2005 .byte   0x6a,0x5f,0xcc,0x9b,    0x54,0x7e,0x46,0x62
2006 .byte   0xf6,0x8d,0x13,0xc2,    0x90,0xd8,0xb8,0xe8
2007 .byte   0x2e,0x39,0xf7,0x5e,    0x82,0xc3,0xaf,0xf5
2008 .byte   0x9f,0x5d,0x80,0xbe,    0x69,0xd0,0x93,0x7c
2009 .byte   0x6f,0xd5,0x2d,0xa9,    0xcf,0x25,0x12,0xb3
2010 .byte   0xc8,0xac,0x99,0x3b,    0x10,0x18,0x7d,0xa7
2011 .byte   0xe8,0x9c,0x63,0x6e,    0xdb,0x3b,0xbb,0x7b
2012 .byte   0xcd,0x26,0x78,0x09,    0x6e,0x59,0x18,0xf4
2013 .byte   0xec,0x9a,0xb7,0x01,    0x83,0x4f,0x9a,0xa8
2014 .byte   0xe6,0x95,0x6e,0x65,    0xaa,0xff,0xe6,0x7e
2015 .byte   0x21,0xbc,0xcf,0x08,    0xef,0x15,0xe8,0xe6
2016 .byte   0xba,0xe7,0x9b,0xd9,    0x4a,0x6f,0x36,0xce
2017 .byte   0xea,0x9f,0x09,0xd4,    0x29,0xb0,0x7c,0xd6
2018 .byte   0x31,0xa4,0xb2,0xaf,    0x2a,0x3f,0x23,0x31
2019 .byte   0xc6,0xa5,0x94,0x30,    0x35,0xa2,0x66,0xc0
2020 .byte   0x74,0x4e,0xbc,0x37,    0xfc,0x82,0xca,0xa6
2021 .byte   0xe0,0x90,0xd0,0xb0,    0x33,0xa7,0xd8,0x15
2022 .byte   0xf1,0x04,0x98,0x4a,    0x41,0xec,0xda,0xf7
2023 .byte   0x7f,0xcd,0x50,0x0e,    0x17,0x91,0xf6,0x2f
2024 .byte   0x76,0x4d,0xd6,0x8d,    0x43,0xef,0xb0,0x4d
2025 .byte   0xcc,0xaa,0x4d,0x54,    0xe4,0x96,0x04,0xdf
2026 .byte   0x9e,0xd1,0xb5,0xe3,    0x4c,0x6a,0x88,0x1b
2027 .byte   0xc1,0x2c,0x1f,0xb8,    0x46,0x65,0x51,0x7f
2028 .byte   0x9d,0x5e,0xea,0x04,    0x01,0x8c,0x35,0x5d
2029 .byte   0xfa,0x87,0x74,0x73,    0xfb,0x0b,0x41,0x2e
2030 .byte   0xb3,0x67,0x1d,0x5a,    0x92,0xdb,0xd2,0x52
2031 .byte   0xe9,0x10,0x56,0x33,    0x6d,0xd6,0x47,0x13
2032 .byte   0x9a,0xd7,0x61,0x8c,    0x37,0xa1,0x0c,0x7a
2033 .byte   0x59,0xf8,0x14,0x8e,    0xeb,0x13,0x3c,0x89
2034 .byte   0xce,0xa9,0x27,0xee,    0xb7,0x61,0xc9,0x35
2035 .byte   0xe1,0x1c,0xe5,0xed,    0x7a,0x47,0xb1,0x3c
2036 .byte   0x9c,0xd2,0xdf,0x59,    0x55,0xf2,0x73,0x3f
2037 .byte   0x18,0x14,0xce,0x79,    0x73,0xc7,0x37,0xbf
2038 .byte   0x53,0xf7,0xcd,0xea,    0x5f,0xfd,0xaa,0x5b
2039 .byte   0xdf,0x3d,0x6f,0x14,    0x78,0x44,0xdb,0x86
2040 .byte   0xca,0xaf,0xf3,0x81,    0xb9,0x68,0xc4,0x3e
2041 .byte   0x38,0x24,0x34,0x2c,    0xc2,0xa3,0x40,0x5f
2042 .byte   0x16,0x1d,0xc3,0x72,    0xbc,0xe2,0x25,0x0c
2043 .byte   0x28,0x3c,0x49,0x8b,    0xff,0x0d,0x95,0x41
2044 .byte   0x39,0xa8,0x01,0x71,    0x08,0x0c,0xb3,0xde
2045 .byte   0xd8,0xb4,0xe4,0x9c,    0x64,0x56,0xc1,0x90
2046 .byte   0x7b,0xcb,0x84,0x61,    0xd5,0x32,0xb6,0x70
2047 .byte   0x48,0x6c,0x5c,0x74,    0xd0,0xb8,0x57,0x42
2048
2049 .byte   0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38  # Td4
2050 .byte   0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
2051 .byte   0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
2052 .byte   0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
2053 .byte   0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
2054 .byte   0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
2055 .byte   0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
2056 .byte   0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
2057 .byte   0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
2058 .byte   0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
2059 .byte   0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
2060 .byte   0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
2061 .byte   0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
2062 .byte   0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
2063 .byte   0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
2064 .byte   0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
2065 .byte   0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
2066 .byte   0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
2067 .byte   0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
2068 .byte   0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
2069 .byte   0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
2070 .byte   0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
2071 .byte   0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
2072 .byte   0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
2073 .byte   0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
2074 .byte   0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
2075 .byte   0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
2076 .byte   0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
2077 .byte   0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
2078 .byte   0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
2079 .byte   0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
2080 .byte   0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
2081
2082 AES_Te4:
2083 .byte   0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5  # Te4
2084 .byte   0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
2085 .byte   0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
2086 .byte   0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
2087 .byte   0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
2088 .byte   0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
2089 .byte   0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
2090 .byte   0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
2091 .byte   0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
2092 .byte   0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
2093 .byte   0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
2094 .byte   0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
2095 .byte   0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
2096 .byte   0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
2097 .byte   0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
2098 .byte   0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
2099 .byte   0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
2100 .byte   0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
2101 .byte   0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
2102 .byte   0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
2103 .byte   0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
2104 .byte   0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
2105 .byte   0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
2106 .byte   0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
2107 .byte   0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
2108 .byte   0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
2109 .byte   0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
2110 .byte   0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
2111 .byte   0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
2112 .byte   0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
2113 .byte   0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
2114 .byte   0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
2115
2116 .byte   0x01,0x00,0x00,0x00,    0x02,0x00,0x00,0x00     # rcon
2117 .byte   0x04,0x00,0x00,0x00,    0x08,0x00,0x00,0x00
2118 .byte   0x10,0x00,0x00,0x00,    0x20,0x00,0x00,0x00
2119 .byte   0x40,0x00,0x00,0x00,    0x80,0x00,0x00,0x00
2120 .byte   0x1B,0x00,0x00,0x00,    0x36,0x00,0x00,0x00
2121 ___
2122 \f
2123 foreach (split("\n",$code)) {
2124         s/\`([^\`]*)\`/eval $1/ge;
2125
2126         # made-up _instructions, _xtr, _ins, _ror and _bias, cope
2127         # with byte order dependencies...
2128         if (/^\s+_/) {
2129             s/(_[a-z]+\s+)(\$[0-9]+),([^,]+)(#.*)*$/$1$2,$2,$3/;
2130
2131             s/_xtr\s+(\$[0-9]+),(\$[0-9]+),([0-9]+(\-2)*)/
2132                 sprintf("srl\t$1,$2,%d",$big_endian ?   eval($3)
2133                                         :               eval("24-$3"))/e or
2134             s/_ins\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
2135                 sprintf("sll\t$1,$2,%d",$big_endian ?   eval($3)
2136                                         :               eval("24-$3"))/e or
2137             s/_ins2\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
2138                 sprintf("ins\t$1,$2,%d,8",$big_endian ? eval($3)
2139                                         :               eval("24-$3"))/e or
2140             s/_ror\s+(\$[0-9]+),(\$[0-9]+),(\-?[0-9]+)/
2141                 sprintf("srl\t$1,$2,%d",$big_endian ?   eval($3)
2142                                         :               eval("$3*-1"))/e or
2143             s/_bias\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
2144                 sprintf("sll\t$1,$2,%d",$big_endian ?   eval($3)
2145                                         :               eval("($3-16)&31"))/e;
2146
2147             s/srl\s+(\$[0-9]+),(\$[0-9]+),\-([0-9]+)/
2148                 sprintf("sll\t$1,$2,$3")/e                              or
2149             s/srl\s+(\$[0-9]+),(\$[0-9]+),0/
2150                 sprintf("and\t$1,$2,0xff")/e                            or
2151             s/(sll\s+\$[0-9]+,\$[0-9]+,0)/#$1/;
2152         }
2153
2154         # convert lwl/lwr and swr/swl to little-endian order
2155         if (!$big_endian && /^\s+[sl]w[lr]\s+/) {
2156             s/([sl]wl.*)([0-9]+)\((\$[0-9]+)\)/
2157                 sprintf("$1%d($3)",eval("$2-$2%4+($2%4-1)&3"))/e        or
2158             s/([sl]wr.*)([0-9]+)\((\$[0-9]+)\)/
2159                 sprintf("$1%d($3)",eval("$2-$2%4+($2%4+1)&3"))/e;
2160         }
2161
2162         if (!$big_endian) {
2163             s/(rotr\s+\$[0-9]+,\$[0-9]+),([0-9]+)/sprintf("$1,%d",32-$2)/e;
2164             s/(ext\s+\$[0-9]+,\$[0-9]+),([0-9]+),8/sprintf("$1,%d,8",24-$2)/e;
2165         }
2166
2167         print $_,"\n";
2168 }
2169
2170 close STDOUT;