MIPS assembly pack: adapt it for MIPS[32|64]R6.
[openssl.git] / crypto / aes / asm / aes-mips.pl
1 #! /usr/bin/env perl
2 # Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved.
3 #
4 # Licensed under the OpenSSL license (the "License").  You may not use
5 # this file except in compliance with the License.  You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
8
9
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
16
17 # AES for MIPS
18
19 # October 2010
20 #
21 # Code uses 1K[+256B] S-box and on single-issue core [such as R5000]
22 # spends ~68 cycles per byte processed with 128-bit key. This is ~16%
23 # faster than gcc-generated code, which is not very impressive. But
24 # recall that compressed S-box requires extra processing, namely
25 # additional rotations. Rotations are implemented with lwl/lwr pairs,
26 # which is normally used for loading unaligned data. Another cool
27 # thing about this module is its endian neutrality, which means that
28 # it processes data without ever changing byte order...
29
30 # September 2012
31 #
32 # Add MIPS32R2 (~10% less instructions) and SmartMIPS ASE (further
33 # ~25% less instructions) code. Note that there is no run-time switch,
34 # instead, code path is chosen upon pre-process time, pass -mips32r2
35 # or/and -msmartmips.
36
37 ######################################################################
38 # There is a number of MIPS ABI in use, O32 and N32/64 are most
39 # widely used. Then there is a new contender: NUBI. It appears that if
40 # one picks the latter, it's possible to arrange code in ABI neutral
41 # manner. Therefore let's stick to NUBI register layout:
42 #
43 ($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
44 ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
45 ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
46 ($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
47 #
48 # The return value is placed in $a0. Following coding rules facilitate
49 # interoperability:
50 #
51 # - never ever touch $tp, "thread pointer", former $gp;
52 # - copy return value to $t0, former $v0 [or to $a0 if you're adapting
53 #   old code];
54 # - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
55 #
56 # For reference here is register layout for N32/64 MIPS ABIs:
57 #
58 # ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
59 # ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
60 # ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
61 # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
62 # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
63 #
64 $flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
65
66 if ($flavour =~ /64|n32/i) {
67         $PTR_LA="dla";
68         $PTR_ADD="daddu";       # incidentally works even on n32
69         $PTR_SUB="dsubu";       # incidentally works even on n32
70         $PTR_INS="dins";
71         $REG_S="sd";
72         $REG_L="ld";
73         $PTR_SLL="dsll";        # incidentally works even on n32
74         $SZREG=8;
75 } else {
76         $PTR_LA="la";
77         $PTR_ADD="addu";
78         $PTR_SUB="subu";
79         $PTR_INS="ins";
80         $REG_S="sw";
81         $REG_L="lw";
82         $PTR_SLL="sll";
83         $SZREG=4;
84 }
85 $pf = ($flavour =~ /nubi/i) ? $t0 : $t2;
86 #
87 # <appro@openssl.org>
88 #
89 ######################################################################
90
91 $big_endian=(`echo MIPSEL | $ENV{CC} -E -`=~/MIPSEL/)?1:0 if ($ENV{CC});
92
93 for (@ARGV) {   $output=$_ if (/\w[\w\-]*\.\w+$/);      }
94 open STDOUT,">$output";
95
96 if (!defined($big_endian))
97 {    $big_endian=(unpack('L',pack('N',1))==1);   }
98
99 while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
100 open STDOUT,">$output";
101
102 my ($MSB,$LSB)=(0,3);   # automatically converted to little-endian
103
104 $code.=<<___;
105 #include "mips_arch.h"
106
107 .text
108 #ifdef OPENSSL_FIPSCANISTER
109 # include <openssl/fipssyms.h>
110 #endif
111
112 #if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__))
113 .option pic2
114 #endif
115 .set    noat
116 ___
117 \f
118 {{{
119 my $FRAMESIZE=16*$SZREG;
120 my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc0fff008" : "0xc0ff0000";
121
122 my ($inp,$out,$key,$Tbl,$s0,$s1,$s2,$s3)=($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7);
123 my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
124 my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$t9,$t10,$t11) = map("\$$_",(12..23));
125 my ($key0,$cnt)=($gp,$fp);
126
127 # instuction ordering is "stolen" from output from MIPSpro assembler
128 # invoked with -mips3 -O3 arguments...
129 $code.=<<___;
130 .align  5
131 .ent    _mips_AES_encrypt
132 _mips_AES_encrypt:
133         .frame  $sp,0,$ra
134         .set    reorder
135         lw      $t0,0($key)
136         lw      $t1,4($key)
137         lw      $t2,8($key)
138         lw      $t3,12($key)
139         lw      $cnt,240($key)
140         $PTR_ADD $key0,$key,16
141
142         xor     $s0,$t0
143         xor     $s1,$t1
144         xor     $s2,$t2
145         xor     $s3,$t3
146
147         subu    $cnt,1
148 #if defined(__mips_smartmips)
149         ext     $i0,$s1,16,8
150 .Loop_enc:
151         ext     $i1,$s2,16,8
152         ext     $i2,$s3,16,8
153         ext     $i3,$s0,16,8
154         lwxs    $t0,$i0($Tbl)           # Te1[s1>>16]
155         ext     $i0,$s2,8,8
156         lwxs    $t1,$i1($Tbl)           # Te1[s2>>16]
157         ext     $i1,$s3,8,8
158         lwxs    $t2,$i2($Tbl)           # Te1[s3>>16]
159         ext     $i2,$s0,8,8
160         lwxs    $t3,$i3($Tbl)           # Te1[s0>>16]
161         ext     $i3,$s1,8,8
162
163         lwxs    $t4,$i0($Tbl)           # Te2[s2>>8]
164         ext     $i0,$s3,0,8
165         lwxs    $t5,$i1($Tbl)           # Te2[s3>>8]
166         ext     $i1,$s0,0,8
167         lwxs    $t6,$i2($Tbl)           # Te2[s0>>8]
168         ext     $i2,$s1,0,8
169         lwxs    $t7,$i3($Tbl)           # Te2[s1>>8]
170         ext     $i3,$s2,0,8
171
172         lwxs    $t8,$i0($Tbl)           # Te3[s3]
173         ext     $i0,$s0,24,8
174         lwxs    $t9,$i1($Tbl)           # Te3[s0]
175         ext     $i1,$s1,24,8
176         lwxs    $t10,$i2($Tbl)          # Te3[s1]
177         ext     $i2,$s2,24,8
178         lwxs    $t11,$i3($Tbl)          # Te3[s2]
179         ext     $i3,$s3,24,8
180
181         rotr    $t0,$t0,8
182         rotr    $t1,$t1,8
183         rotr    $t2,$t2,8
184         rotr    $t3,$t3,8
185
186         rotr    $t4,$t4,16
187         rotr    $t5,$t5,16
188         rotr    $t6,$t6,16
189         rotr    $t7,$t7,16
190
191         xor     $t0,$t4
192         lwxs    $t4,$i0($Tbl)           # Te0[s0>>24]
193         xor     $t1,$t5
194         lwxs    $t5,$i1($Tbl)           # Te0[s1>>24]
195         xor     $t2,$t6
196         lwxs    $t6,$i2($Tbl)           # Te0[s2>>24]
197         xor     $t3,$t7
198         lwxs    $t7,$i3($Tbl)           # Te0[s3>>24]
199
200         rotr    $t8,$t8,24
201         lw      $s0,0($key0)
202         rotr    $t9,$t9,24
203         lw      $s1,4($key0)
204         rotr    $t10,$t10,24
205         lw      $s2,8($key0)
206         rotr    $t11,$t11,24
207         lw      $s3,12($key0)
208
209         xor     $t0,$t8
210         xor     $t1,$t9
211         xor     $t2,$t10
212         xor     $t3,$t11
213
214         xor     $t0,$t4
215         xor     $t1,$t5
216         xor     $t2,$t6
217         xor     $t3,$t7
218
219         subu    $cnt,1
220         $PTR_ADD $key0,16
221         xor     $s0,$t0
222         xor     $s1,$t1
223         xor     $s2,$t2
224         xor     $s3,$t3
225         .set    noreorder
226         bnez    $cnt,.Loop_enc
227         ext     $i0,$s1,16,8
228
229         _xtr    $i0,$s1,16-2
230 #else
231         _xtr    $i0,$s1,16-2
232 .Loop_enc:
233         _xtr    $i1,$s2,16-2
234         _xtr    $i2,$s3,16-2
235         _xtr    $i3,$s0,16-2
236         and     $i0,0x3fc
237         and     $i1,0x3fc
238         and     $i2,0x3fc
239         and     $i3,0x3fc
240         $PTR_ADD $i0,$Tbl
241         $PTR_ADD $i1,$Tbl
242         $PTR_ADD $i2,$Tbl
243         $PTR_ADD $i3,$Tbl
244 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
245         lw      $t0,0($i0)              # Te1[s1>>16]
246         _xtr    $i0,$s2,8-2
247         lw      $t1,0($i1)              # Te1[s2>>16]
248         _xtr    $i1,$s3,8-2
249         lw      $t2,0($i2)              # Te1[s3>>16]
250         _xtr    $i2,$s0,8-2
251         lw      $t3,0($i3)              # Te1[s0>>16]
252         _xtr    $i3,$s1,8-2
253 #else
254         lwl     $t0,3($i0)              # Te1[s1>>16]
255         lwl     $t1,3($i1)              # Te1[s2>>16]
256         lwl     $t2,3($i2)              # Te1[s3>>16]
257         lwl     $t3,3($i3)              # Te1[s0>>16]
258         lwr     $t0,2($i0)              # Te1[s1>>16]
259         _xtr    $i0,$s2,8-2
260         lwr     $t1,2($i1)              # Te1[s2>>16]
261         _xtr    $i1,$s3,8-2
262         lwr     $t2,2($i2)              # Te1[s3>>16]
263         _xtr    $i2,$s0,8-2
264         lwr     $t3,2($i3)              # Te1[s0>>16]
265         _xtr    $i3,$s1,8-2
266 #endif
267         and     $i0,0x3fc
268         and     $i1,0x3fc
269         and     $i2,0x3fc
270         and     $i3,0x3fc
271         $PTR_ADD $i0,$Tbl
272         $PTR_ADD $i1,$Tbl
273         $PTR_ADD $i2,$Tbl
274         $PTR_ADD $i3,$Tbl
275 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
276         rotr    $t0,$t0,8
277         rotr    $t1,$t1,8
278         rotr    $t2,$t2,8
279         rotr    $t3,$t3,8
280 # if defined(_MIPSEL)
281         lw      $t4,0($i0)              # Te2[s2>>8]
282         _xtr    $i0,$s3,0-2
283         lw      $t5,0($i1)              # Te2[s3>>8]
284         _xtr    $i1,$s0,0-2
285         lw      $t6,0($i2)              # Te2[s0>>8]
286         _xtr    $i2,$s1,0-2
287         lw      $t7,0($i3)              # Te2[s1>>8]
288         _xtr    $i3,$s2,0-2
289
290         and     $i0,0x3fc
291         and     $i1,0x3fc
292         and     $i2,0x3fc
293         and     $i3,0x3fc
294         $PTR_ADD $i0,$Tbl
295         $PTR_ADD $i1,$Tbl
296         $PTR_ADD $i2,$Tbl
297         $PTR_ADD $i3,$Tbl
298         lw      $t8,0($i0)              # Te3[s3]
299         $PTR_INS $i0,$s0,2,8
300         lw      $t9,0($i1)              # Te3[s0]
301         $PTR_INS $i1,$s1,2,8
302         lw      $t10,0($i2)             # Te3[s1]
303         $PTR_INS $i2,$s2,2,8
304         lw      $t11,0($i3)             # Te3[s2]
305         $PTR_INS $i3,$s3,2,8
306 # else
307         lw      $t4,0($i0)              # Te2[s2>>8]
308         $PTR_INS $i0,$s3,2,8
309         lw      $t5,0($i1)              # Te2[s3>>8]
310         $PTR_INS $i1,$s0,2,8
311         lw      $t6,0($i2)              # Te2[s0>>8]
312         $PTR_INS $i2,$s1,2,8
313         lw      $t7,0($i3)              # Te2[s1>>8]
314         $PTR_INS $i3,$s2,2,8
315
316         lw      $t8,0($i0)              # Te3[s3]
317         _xtr    $i0,$s0,24-2
318         lw      $t9,0($i1)              # Te3[s0]
319         _xtr    $i1,$s1,24-2
320         lw      $t10,0($i2)             # Te3[s1]
321         _xtr    $i2,$s2,24-2
322         lw      $t11,0($i3)             # Te3[s2]
323         _xtr    $i3,$s3,24-2
324
325         and     $i0,0x3fc
326         and     $i1,0x3fc
327         and     $i2,0x3fc
328         and     $i3,0x3fc
329         $PTR_ADD $i0,$Tbl
330         $PTR_ADD $i1,$Tbl
331         $PTR_ADD $i2,$Tbl
332         $PTR_ADD $i3,$Tbl
333 # endif
334         rotr    $t4,$t4,16
335         rotr    $t5,$t5,16
336         rotr    $t6,$t6,16
337         rotr    $t7,$t7,16
338
339         rotr    $t8,$t8,24
340         rotr    $t9,$t9,24
341         rotr    $t10,$t10,24
342         rotr    $t11,$t11,24
343 #else
344         lwl     $t4,2($i0)              # Te2[s2>>8]
345         lwl     $t5,2($i1)              # Te2[s3>>8]
346         lwl     $t6,2($i2)              # Te2[s0>>8]
347         lwl     $t7,2($i3)              # Te2[s1>>8]
348         lwr     $t4,1($i0)              # Te2[s2>>8]
349         _xtr    $i0,$s3,0-2
350         lwr     $t5,1($i1)              # Te2[s3>>8]
351         _xtr    $i1,$s0,0-2
352         lwr     $t6,1($i2)              # Te2[s0>>8]
353         _xtr    $i2,$s1,0-2
354         lwr     $t7,1($i3)              # Te2[s1>>8]
355         _xtr    $i3,$s2,0-2
356
357         and     $i0,0x3fc
358         and     $i1,0x3fc
359         and     $i2,0x3fc
360         and     $i3,0x3fc
361         $PTR_ADD $i0,$Tbl
362         $PTR_ADD $i1,$Tbl
363         $PTR_ADD $i2,$Tbl
364         $PTR_ADD $i3,$Tbl
365         lwl     $t8,1($i0)              # Te3[s3]
366         lwl     $t9,1($i1)              # Te3[s0]
367         lwl     $t10,1($i2)             # Te3[s1]
368         lwl     $t11,1($i3)             # Te3[s2]
369         lwr     $t8,0($i0)              # Te3[s3]
370         _xtr    $i0,$s0,24-2
371         lwr     $t9,0($i1)              # Te3[s0]
372         _xtr    $i1,$s1,24-2
373         lwr     $t10,0($i2)             # Te3[s1]
374         _xtr    $i2,$s2,24-2
375         lwr     $t11,0($i3)             # Te3[s2]
376         _xtr    $i3,$s3,24-2
377
378         and     $i0,0x3fc
379         and     $i1,0x3fc
380         and     $i2,0x3fc
381         and     $i3,0x3fc
382         $PTR_ADD $i0,$Tbl
383         $PTR_ADD $i1,$Tbl
384         $PTR_ADD $i2,$Tbl
385         $PTR_ADD $i3,$Tbl
386 #endif
387         xor     $t0,$t4
388         lw      $t4,0($i0)              # Te0[s0>>24]
389         xor     $t1,$t5
390         lw      $t5,0($i1)              # Te0[s1>>24]
391         xor     $t2,$t6
392         lw      $t6,0($i2)              # Te0[s2>>24]
393         xor     $t3,$t7
394         lw      $t7,0($i3)              # Te0[s3>>24]
395
396         xor     $t0,$t8
397         lw      $s0,0($key0)
398         xor     $t1,$t9
399         lw      $s1,4($key0)
400         xor     $t2,$t10
401         lw      $s2,8($key0)
402         xor     $t3,$t11
403         lw      $s3,12($key0)
404
405         xor     $t0,$t4
406         xor     $t1,$t5
407         xor     $t2,$t6
408         xor     $t3,$t7
409
410         subu    $cnt,1
411         $PTR_ADD $key0,16
412         xor     $s0,$t0
413         xor     $s1,$t1
414         xor     $s2,$t2
415         xor     $s3,$t3
416         .set    noreorder
417         bnez    $cnt,.Loop_enc
418         _xtr    $i0,$s1,16-2
419 #endif
420
421         .set    reorder
422         _xtr    $i1,$s2,16-2
423         _xtr    $i2,$s3,16-2
424         _xtr    $i3,$s0,16-2
425         and     $i0,0x3fc
426         and     $i1,0x3fc
427         and     $i2,0x3fc
428         and     $i3,0x3fc
429         $PTR_ADD $i0,$Tbl
430         $PTR_ADD $i1,$Tbl
431         $PTR_ADD $i2,$Tbl
432         $PTR_ADD $i3,$Tbl
433         lbu     $t0,2($i0)              # Te4[s1>>16]
434         _xtr    $i0,$s2,8-2
435         lbu     $t1,2($i1)              # Te4[s2>>16]
436         _xtr    $i1,$s3,8-2
437         lbu     $t2,2($i2)              # Te4[s3>>16]
438         _xtr    $i2,$s0,8-2
439         lbu     $t3,2($i3)              # Te4[s0>>16]
440         _xtr    $i3,$s1,8-2
441
442         and     $i0,0x3fc
443         and     $i1,0x3fc
444         and     $i2,0x3fc
445         and     $i3,0x3fc
446         $PTR_ADD $i0,$Tbl
447         $PTR_ADD $i1,$Tbl
448         $PTR_ADD $i2,$Tbl
449         $PTR_ADD $i3,$Tbl
450 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
451 # if defined(_MIPSEL)
452         lbu     $t4,2($i0)              # Te4[s2>>8]
453         $PTR_INS $i0,$s0,2,8
454         lbu     $t5,2($i1)              # Te4[s3>>8]
455         $PTR_INS $i1,$s1,2,8
456         lbu     $t6,2($i2)              # Te4[s0>>8]
457         $PTR_INS $i2,$s2,2,8
458         lbu     $t7,2($i3)              # Te4[s1>>8]
459         $PTR_INS $i3,$s3,2,8
460
461         lbu     $t8,2($i0)              # Te4[s0>>24]
462         _xtr    $i0,$s3,0-2
463         lbu     $t9,2($i1)              # Te4[s1>>24]
464         _xtr    $i1,$s0,0-2
465         lbu     $t10,2($i2)             # Te4[s2>>24]
466         _xtr    $i2,$s1,0-2
467         lbu     $t11,2($i3)             # Te4[s3>>24]
468         _xtr    $i3,$s2,0-2
469
470         and     $i0,0x3fc
471         and     $i1,0x3fc
472         and     $i2,0x3fc
473         and     $i3,0x3fc
474         $PTR_ADD $i0,$Tbl
475         $PTR_ADD $i1,$Tbl
476         $PTR_ADD $i2,$Tbl
477         $PTR_ADD $i3,$Tbl
478 # else
479         lbu     $t4,2($i0)              # Te4[s2>>8]
480         _xtr    $i0,$s0,24-2
481         lbu     $t5,2($i1)              # Te4[s3>>8]
482         _xtr    $i1,$s1,24-2
483         lbu     $t6,2($i2)              # Te4[s0>>8]
484         _xtr    $i2,$s2,24-2
485         lbu     $t7,2($i3)              # Te4[s1>>8]
486         _xtr    $i3,$s3,24-2
487
488         and     $i0,0x3fc
489         and     $i1,0x3fc
490         and     $i2,0x3fc
491         and     $i3,0x3fc
492         $PTR_ADD $i0,$Tbl
493         $PTR_ADD $i1,$Tbl
494         $PTR_ADD $i2,$Tbl
495         $PTR_ADD $i3,$Tbl
496         lbu     $t8,2($i0)              # Te4[s0>>24]
497         $PTR_INS $i0,$s3,2,8
498         lbu     $t9,2($i1)              # Te4[s1>>24]
499         $PTR_INS $i1,$s0,2,8
500         lbu     $t10,2($i2)             # Te4[s2>>24]
501         $PTR_INS $i2,$s1,2,8
502         lbu     $t11,2($i3)             # Te4[s3>>24]
503         $PTR_INS $i3,$s2,2,8
504 # endif
505         _ins    $t0,16
506         _ins    $t1,16
507         _ins    $t2,16
508         _ins    $t3,16
509
510         _ins2   $t0,$t4,8
511         lbu     $t4,2($i0)              # Te4[s3]
512         _ins2   $t1,$t5,8
513         lbu     $t5,2($i1)              # Te4[s0]
514         _ins2   $t2,$t6,8
515         lbu     $t6,2($i2)              # Te4[s1]
516         _ins2   $t3,$t7,8
517         lbu     $t7,2($i3)              # Te4[s2]
518
519         _ins2   $t0,$t8,24
520         lw      $s0,0($key0)
521         _ins2   $t1,$t9,24
522         lw      $s1,4($key0)
523         _ins2   $t2,$t10,24
524         lw      $s2,8($key0)
525         _ins2   $t3,$t11,24
526         lw      $s3,12($key0)
527
528         _ins2   $t0,$t4,0
529         _ins2   $t1,$t5,0
530         _ins2   $t2,$t6,0
531         _ins2   $t3,$t7,0
532 #else
533         lbu     $t4,2($i0)              # Te4[s2>>8]
534         _xtr    $i0,$s0,24-2
535         lbu     $t5,2($i1)              # Te4[s3>>8]
536         _xtr    $i1,$s1,24-2
537         lbu     $t6,2($i2)              # Te4[s0>>8]
538         _xtr    $i2,$s2,24-2
539         lbu     $t7,2($i3)              # Te4[s1>>8]
540         _xtr    $i3,$s3,24-2
541
542         and     $i0,0x3fc
543         and     $i1,0x3fc
544         and     $i2,0x3fc
545         and     $i3,0x3fc
546         $PTR_ADD $i0,$Tbl
547         $PTR_ADD $i1,$Tbl
548         $PTR_ADD $i2,$Tbl
549         $PTR_ADD $i3,$Tbl
550         lbu     $t8,2($i0)              # Te4[s0>>24]
551         _xtr    $i0,$s3,0-2
552         lbu     $t9,2($i1)              # Te4[s1>>24]
553         _xtr    $i1,$s0,0-2
554         lbu     $t10,2($i2)             # Te4[s2>>24]
555         _xtr    $i2,$s1,0-2
556         lbu     $t11,2($i3)             # Te4[s3>>24]
557         _xtr    $i3,$s2,0-2
558
559         and     $i0,0x3fc
560         and     $i1,0x3fc
561         and     $i2,0x3fc
562         and     $i3,0x3fc
563         $PTR_ADD $i0,$Tbl
564         $PTR_ADD $i1,$Tbl
565         $PTR_ADD $i2,$Tbl
566         $PTR_ADD $i3,$Tbl
567
568         _ins    $t0,16
569         _ins    $t1,16
570         _ins    $t2,16
571         _ins    $t3,16
572
573         _ins    $t4,8
574         _ins    $t5,8
575         _ins    $t6,8
576         _ins    $t7,8
577
578         xor     $t0,$t4
579         lbu     $t4,2($i0)              # Te4[s3]
580         xor     $t1,$t5
581         lbu     $t5,2($i1)              # Te4[s0]
582         xor     $t2,$t6
583         lbu     $t6,2($i2)              # Te4[s1]
584         xor     $t3,$t7
585         lbu     $t7,2($i3)              # Te4[s2]
586
587         _ins    $t8,24
588         lw      $s0,0($key0)
589         _ins    $t9,24
590         lw      $s1,4($key0)
591         _ins    $t10,24
592         lw      $s2,8($key0)
593         _ins    $t11,24
594         lw      $s3,12($key0)
595
596         xor     $t0,$t8
597         xor     $t1,$t9
598         xor     $t2,$t10
599         xor     $t3,$t11
600
601         _ins    $t4,0
602         _ins    $t5,0
603         _ins    $t6,0
604         _ins    $t7,0
605
606         xor     $t0,$t4
607         xor     $t1,$t5
608         xor     $t2,$t6
609         xor     $t3,$t7
610 #endif
611         xor     $s0,$t0
612         xor     $s1,$t1
613         xor     $s2,$t2
614         xor     $s3,$t3
615
616         jr      $ra
617 .end    _mips_AES_encrypt
618
619 .align  5
620 .globl  AES_encrypt
621 .ent    AES_encrypt
622 AES_encrypt:
623         .frame  $sp,$FRAMESIZE,$ra
624         .mask   $SAVED_REGS_MASK,-$SZREG
625         .set    noreorder
626 ___
627 $code.=<<___ if ($flavour =~ /o32/i);   # o32 PIC-ification
628         .cpload $pf
629 ___
630 $code.=<<___;
631         $PTR_SUB $sp,$FRAMESIZE
632         $REG_S  $ra,$FRAMESIZE-1*$SZREG($sp)
633         $REG_S  $fp,$FRAMESIZE-2*$SZREG($sp)
634         $REG_S  $s11,$FRAMESIZE-3*$SZREG($sp)
635         $REG_S  $s10,$FRAMESIZE-4*$SZREG($sp)
636         $REG_S  $s9,$FRAMESIZE-5*$SZREG($sp)
637         $REG_S  $s8,$FRAMESIZE-6*$SZREG($sp)
638         $REG_S  $s7,$FRAMESIZE-7*$SZREG($sp)
639         $REG_S  $s6,$FRAMESIZE-8*$SZREG($sp)
640         $REG_S  $s5,$FRAMESIZE-9*$SZREG($sp)
641         $REG_S  $s4,$FRAMESIZE-10*$SZREG($sp)
642 ___
643 $code.=<<___ if ($flavour =~ /nubi/i);  # optimize non-nubi prologue
644         $REG_S  \$15,$FRAMESIZE-11*$SZREG($sp)
645         $REG_S  \$14,$FRAMESIZE-12*$SZREG($sp)
646         $REG_S  \$13,$FRAMESIZE-13*$SZREG($sp)
647         $REG_S  \$12,$FRAMESIZE-14*$SZREG($sp)
648         $REG_S  $gp,$FRAMESIZE-15*$SZREG($sp)
649 ___
650 $code.=<<___ if ($flavour !~ /o32/i);   # non-o32 PIC-ification
651         .cplocal        $Tbl
652         .cpsetup        $pf,$zero,AES_encrypt
653 ___
654 $code.=<<___;
655         .set    reorder
656         $PTR_LA $Tbl,AES_Te             # PIC-ified 'load address'
657
658 #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
659         lw      $s0,0($inp)
660         lw      $s1,4($inp)
661         lw      $s2,8($inp)
662         lw      $s3,12($inp)
663 #else
664         lwl     $s0,0+$MSB($inp)
665         lwl     $s1,4+$MSB($inp)
666         lwl     $s2,8+$MSB($inp)
667         lwl     $s3,12+$MSB($inp)
668         lwr     $s0,0+$LSB($inp)
669         lwr     $s1,4+$LSB($inp)
670         lwr     $s2,8+$LSB($inp)
671         lwr     $s3,12+$LSB($inp)
672 #endif
673
674         bal     _mips_AES_encrypt
675
676 #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
677         sw      $s0,0($out)
678         sw      $s1,4($out)
679         sw      $s2,8($out)
680         sw      $s3,12($out)
681 #else
682         swr     $s0,0+$LSB($out)
683         swr     $s1,4+$LSB($out)
684         swr     $s2,8+$LSB($out)
685         swr     $s3,12+$LSB($out)
686         swl     $s0,0+$MSB($out)
687         swl     $s1,4+$MSB($out)
688         swl     $s2,8+$MSB($out)
689         swl     $s3,12+$MSB($out)
690 #endif
691
692         .set    noreorder
693         $REG_L  $ra,$FRAMESIZE-1*$SZREG($sp)
694         $REG_L  $fp,$FRAMESIZE-2*$SZREG($sp)
695         $REG_L  $s11,$FRAMESIZE-3*$SZREG($sp)
696         $REG_L  $s10,$FRAMESIZE-4*$SZREG($sp)
697         $REG_L  $s9,$FRAMESIZE-5*$SZREG($sp)
698         $REG_L  $s8,$FRAMESIZE-6*$SZREG($sp)
699         $REG_L  $s7,$FRAMESIZE-7*$SZREG($sp)
700         $REG_L  $s6,$FRAMESIZE-8*$SZREG($sp)
701         $REG_L  $s5,$FRAMESIZE-9*$SZREG($sp)
702         $REG_L  $s4,$FRAMESIZE-10*$SZREG($sp)
703 ___
704 $code.=<<___ if ($flavour =~ /nubi/i);
705         $REG_L  \$15,$FRAMESIZE-11*$SZREG($sp)
706         $REG_L  \$14,$FRAMESIZE-12*$SZREG($sp)
707         $REG_L  \$13,$FRAMESIZE-13*$SZREG($sp)
708         $REG_L  \$12,$FRAMESIZE-14*$SZREG($sp)
709         $REG_L  $gp,$FRAMESIZE-15*$SZREG($sp)
710 ___
711 $code.=<<___;
712         jr      $ra
713         $PTR_ADD $sp,$FRAMESIZE
714 .end    AES_encrypt
715 ___
716 \f
717 $code.=<<___;
718 .align  5
719 .ent    _mips_AES_decrypt
720 _mips_AES_decrypt:
721         .frame  $sp,0,$ra
722         .set    reorder
723         lw      $t0,0($key)
724         lw      $t1,4($key)
725         lw      $t2,8($key)
726         lw      $t3,12($key)
727         lw      $cnt,240($key)
728         $PTR_ADD $key0,$key,16
729
730         xor     $s0,$t0
731         xor     $s1,$t1
732         xor     $s2,$t2
733         xor     $s3,$t3
734
735         subu    $cnt,1
736 #if defined(__mips_smartmips)
737         ext     $i0,$s3,16,8
738 .Loop_dec:
739         ext     $i1,$s0,16,8
740         ext     $i2,$s1,16,8
741         ext     $i3,$s2,16,8
742         lwxs    $t0,$i0($Tbl)           # Td1[s3>>16]
743         ext     $i0,$s2,8,8
744         lwxs    $t1,$i1($Tbl)           # Td1[s0>>16]
745         ext     $i1,$s3,8,8
746         lwxs    $t2,$i2($Tbl)           # Td1[s1>>16]
747         ext     $i2,$s0,8,8
748         lwxs    $t3,$i3($Tbl)           # Td1[s2>>16]
749         ext     $i3,$s1,8,8
750
751         lwxs    $t4,$i0($Tbl)           # Td2[s2>>8]
752         ext     $i0,$s1,0,8
753         lwxs    $t5,$i1($Tbl)           # Td2[s3>>8]
754         ext     $i1,$s2,0,8
755         lwxs    $t6,$i2($Tbl)           # Td2[s0>>8]
756         ext     $i2,$s3,0,8
757         lwxs    $t7,$i3($Tbl)           # Td2[s1>>8]
758         ext     $i3,$s0,0,8
759
760         lwxs    $t8,$i0($Tbl)           # Td3[s1]
761         ext     $i0,$s0,24,8
762         lwxs    $t9,$i1($Tbl)           # Td3[s2]
763         ext     $i1,$s1,24,8
764         lwxs    $t10,$i2($Tbl)          # Td3[s3]
765         ext     $i2,$s2,24,8
766         lwxs    $t11,$i3($Tbl)          # Td3[s0]
767         ext     $i3,$s3,24,8
768
769         rotr    $t0,$t0,8
770         rotr    $t1,$t1,8
771         rotr    $t2,$t2,8
772         rotr    $t3,$t3,8
773
774         rotr    $t4,$t4,16
775         rotr    $t5,$t5,16
776         rotr    $t6,$t6,16
777         rotr    $t7,$t7,16
778
779         xor     $t0,$t4
780         lwxs    $t4,$i0($Tbl)           # Td0[s0>>24]
781         xor     $t1,$t5
782         lwxs    $t5,$i1($Tbl)           # Td0[s1>>24]
783         xor     $t2,$t6
784         lwxs    $t6,$i2($Tbl)           # Td0[s2>>24]
785         xor     $t3,$t7
786         lwxs    $t7,$i3($Tbl)           # Td0[s3>>24]
787
788         rotr    $t8,$t8,24
789         lw      $s0,0($key0)
790         rotr    $t9,$t9,24
791         lw      $s1,4($key0)
792         rotr    $t10,$t10,24
793         lw      $s2,8($key0)
794         rotr    $t11,$t11,24
795         lw      $s3,12($key0)
796
797         xor     $t0,$t8
798         xor     $t1,$t9
799         xor     $t2,$t10
800         xor     $t3,$t11
801
802         xor     $t0,$t4
803         xor     $t1,$t5
804         xor     $t2,$t6
805         xor     $t3,$t7
806
807         subu    $cnt,1
808         $PTR_ADD $key0,16
809         xor     $s0,$t0
810         xor     $s1,$t1
811         xor     $s2,$t2
812         xor     $s3,$t3
813         .set    noreorder
814         bnez    $cnt,.Loop_dec
815         ext     $i0,$s3,16,8
816
817         _xtr    $i0,$s3,16-2
818 #else
819         _xtr    $i0,$s3,16-2
820 .Loop_dec:
821         _xtr    $i1,$s0,16-2
822         _xtr    $i2,$s1,16-2
823         _xtr    $i3,$s2,16-2
824         and     $i0,0x3fc
825         and     $i1,0x3fc
826         and     $i2,0x3fc
827         and     $i3,0x3fc
828         $PTR_ADD $i0,$Tbl
829         $PTR_ADD $i1,$Tbl
830         $PTR_ADD $i2,$Tbl
831         $PTR_ADD $i3,$Tbl
832 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
833         lw      $t0,0($i0)              # Td1[s3>>16]
834         _xtr    $i0,$s2,8-2
835         lw      $t1,0($i1)              # Td1[s0>>16]
836         _xtr    $i1,$s3,8-2
837         lw      $t2,0($i2)              # Td1[s1>>16]
838         _xtr    $i2,$s0,8-2
839         lw      $t3,0($i3)              # Td1[s2>>16]
840         _xtr    $i3,$s1,8-2
841 #else
842         lwl     $t0,3($i0)              # Td1[s3>>16]
843         lwl     $t1,3($i1)              # Td1[s0>>16]
844         lwl     $t2,3($i2)              # Td1[s1>>16]
845         lwl     $t3,3($i3)              # Td1[s2>>16]
846         lwr     $t0,2($i0)              # Td1[s3>>16]
847         _xtr    $i0,$s2,8-2
848         lwr     $t1,2($i1)              # Td1[s0>>16]
849         _xtr    $i1,$s3,8-2
850         lwr     $t2,2($i2)              # Td1[s1>>16]
851         _xtr    $i2,$s0,8-2
852         lwr     $t3,2($i3)              # Td1[s2>>16]
853         _xtr    $i3,$s1,8-2
854 #endif
855
856         and     $i0,0x3fc
857         and     $i1,0x3fc
858         and     $i2,0x3fc
859         and     $i3,0x3fc
860         $PTR_ADD $i0,$Tbl
861         $PTR_ADD $i1,$Tbl
862         $PTR_ADD $i2,$Tbl
863         $PTR_ADD $i3,$Tbl
864 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
865         rotr    $t0,$t0,8
866         rotr    $t1,$t1,8
867         rotr    $t2,$t2,8
868         rotr    $t3,$t3,8
869 # if defined(_MIPSEL)
870         lw      $t4,0($i0)              # Td2[s2>>8]
871         _xtr    $i0,$s1,0-2
872         lw      $t5,0($i1)              # Td2[s3>>8]
873         _xtr    $i1,$s2,0-2
874         lw      $t6,0($i2)              # Td2[s0>>8]
875         _xtr    $i2,$s3,0-2
876         lw      $t7,0($i3)              # Td2[s1>>8]
877         _xtr    $i3,$s0,0-2
878
879         and     $i0,0x3fc
880         and     $i1,0x3fc
881         and     $i2,0x3fc
882         and     $i3,0x3fc
883         $PTR_ADD $i0,$Tbl
884         $PTR_ADD $i1,$Tbl
885         $PTR_ADD $i2,$Tbl
886         $PTR_ADD $i3,$Tbl
887         lw      $t8,0($i0)              # Td3[s1]
888         $PTR_INS $i0,$s0,2,8
889         lw      $t9,0($i1)              # Td3[s2]
890         $PTR_INS $i1,$s1,2,8
891         lw      $t10,0($i2)             # Td3[s3]
892         $PTR_INS $i2,$s2,2,8
893         lw      $t11,0($i3)             # Td3[s0]
894         $PTR_INS $i3,$s3,2,8
895 #else
896         lw      $t4,0($i0)              # Td2[s2>>8]
897         $PTR_INS $i0,$s1,2,8
898         lw      $t5,0($i1)              # Td2[s3>>8]
899         $PTR_INS $i1,$s2,2,8
900         lw      $t6,0($i2)              # Td2[s0>>8]
901         $PTR_INS $i2,$s3,2,8
902         lw      $t7,0($i3)              # Td2[s1>>8]
903         $PTR_INS $i3,$s0,2,8
904
905         lw      $t8,0($i0)              # Td3[s1]
906         _xtr    $i0,$s0,24-2
907         lw      $t9,0($i1)              # Td3[s2]
908         _xtr    $i1,$s1,24-2
909         lw      $t10,0($i2)             # Td3[s3]
910         _xtr    $i2,$s2,24-2
911         lw      $t11,0($i3)             # Td3[s0]
912         _xtr    $i3,$s3,24-2
913
914         and     $i0,0x3fc
915         and     $i1,0x3fc
916         and     $i2,0x3fc
917         and     $i3,0x3fc
918         $PTR_ADD $i0,$Tbl
919         $PTR_ADD $i1,$Tbl
920         $PTR_ADD $i2,$Tbl
921         $PTR_ADD $i3,$Tbl
922 #endif
923         rotr    $t4,$t4,16
924         rotr    $t5,$t5,16
925         rotr    $t6,$t6,16
926         rotr    $t7,$t7,16
927
928         rotr    $t8,$t8,24
929         rotr    $t9,$t9,24
930         rotr    $t10,$t10,24
931         rotr    $t11,$t11,24
932 #else
933         lwl     $t4,2($i0)              # Td2[s2>>8]
934         lwl     $t5,2($i1)              # Td2[s3>>8]
935         lwl     $t6,2($i2)              # Td2[s0>>8]
936         lwl     $t7,2($i3)              # Td2[s1>>8]
937         lwr     $t4,1($i0)              # Td2[s2>>8]
938         _xtr    $i0,$s1,0-2
939         lwr     $t5,1($i1)              # Td2[s3>>8]
940         _xtr    $i1,$s2,0-2
941         lwr     $t6,1($i2)              # Td2[s0>>8]
942         _xtr    $i2,$s3,0-2
943         lwr     $t7,1($i3)              # Td2[s1>>8]
944         _xtr    $i3,$s0,0-2
945
946         and     $i0,0x3fc
947         and     $i1,0x3fc
948         and     $i2,0x3fc
949         and     $i3,0x3fc
950         $PTR_ADD $i0,$Tbl
951         $PTR_ADD $i1,$Tbl
952         $PTR_ADD $i2,$Tbl
953         $PTR_ADD $i3,$Tbl
954         lwl     $t8,1($i0)              # Td3[s1]
955         lwl     $t9,1($i1)              # Td3[s2]
956         lwl     $t10,1($i2)             # Td3[s3]
957         lwl     $t11,1($i3)             # Td3[s0]
958         lwr     $t8,0($i0)              # Td3[s1]
959         _xtr    $i0,$s0,24-2
960         lwr     $t9,0($i1)              # Td3[s2]
961         _xtr    $i1,$s1,24-2
962         lwr     $t10,0($i2)             # Td3[s3]
963         _xtr    $i2,$s2,24-2
964         lwr     $t11,0($i3)             # Td3[s0]
965         _xtr    $i3,$s3,24-2
966
967         and     $i0,0x3fc
968         and     $i1,0x3fc
969         and     $i2,0x3fc
970         and     $i3,0x3fc
971         $PTR_ADD $i0,$Tbl
972         $PTR_ADD $i1,$Tbl
973         $PTR_ADD $i2,$Tbl
974         $PTR_ADD $i3,$Tbl
975 #endif
976
977         xor     $t0,$t4
978         lw      $t4,0($i0)              # Td0[s0>>24]
979         xor     $t1,$t5
980         lw      $t5,0($i1)              # Td0[s1>>24]
981         xor     $t2,$t6
982         lw      $t6,0($i2)              # Td0[s2>>24]
983         xor     $t3,$t7
984         lw      $t7,0($i3)              # Td0[s3>>24]
985
986         xor     $t0,$t8
987         lw      $s0,0($key0)
988         xor     $t1,$t9
989         lw      $s1,4($key0)
990         xor     $t2,$t10
991         lw      $s2,8($key0)
992         xor     $t3,$t11
993         lw      $s3,12($key0)
994
995         xor     $t0,$t4
996         xor     $t1,$t5
997         xor     $t2,$t6
998         xor     $t3,$t7
999
1000         subu    $cnt,1
1001         $PTR_ADD $key0,16
1002         xor     $s0,$t0
1003         xor     $s1,$t1
1004         xor     $s2,$t2
1005         xor     $s3,$t3
1006         .set    noreorder
1007         bnez    $cnt,.Loop_dec
1008         _xtr    $i0,$s3,16-2
1009 #endif
1010
1011         .set    reorder
1012         lw      $t4,1024($Tbl)          # prefetch Td4
1013         _xtr    $i0,$s3,16
1014         lw      $t5,1024+32($Tbl)
1015         _xtr    $i1,$s0,16
1016         lw      $t6,1024+64($Tbl)
1017         _xtr    $i2,$s1,16
1018         lw      $t7,1024+96($Tbl)
1019         _xtr    $i3,$s2,16
1020         lw      $t8,1024+128($Tbl)
1021         and     $i0,0xff
1022         lw      $t9,1024+160($Tbl)
1023         and     $i1,0xff
1024         lw      $t10,1024+192($Tbl)
1025         and     $i2,0xff
1026         lw      $t11,1024+224($Tbl)
1027         and     $i3,0xff
1028
1029         $PTR_ADD $i0,$Tbl
1030         $PTR_ADD $i1,$Tbl
1031         $PTR_ADD $i2,$Tbl
1032         $PTR_ADD $i3,$Tbl
1033         lbu     $t0,1024($i0)           # Td4[s3>>16]
1034         _xtr    $i0,$s2,8
1035         lbu     $t1,1024($i1)           # Td4[s0>>16]
1036         _xtr    $i1,$s3,8
1037         lbu     $t2,1024($i2)           # Td4[s1>>16]
1038         _xtr    $i2,$s0,8
1039         lbu     $t3,1024($i3)           # Td4[s2>>16]
1040         _xtr    $i3,$s1,8
1041
1042         and     $i0,0xff
1043         and     $i1,0xff
1044         and     $i2,0xff
1045         and     $i3,0xff
1046         $PTR_ADD $i0,$Tbl
1047         $PTR_ADD $i1,$Tbl
1048         $PTR_ADD $i2,$Tbl
1049         $PTR_ADD $i3,$Tbl
1050 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1051 # if defined(_MIPSEL)
1052         lbu     $t4,1024($i0)           # Td4[s2>>8]
1053         $PTR_INS $i0,$s0,0,8
1054         lbu     $t5,1024($i1)           # Td4[s3>>8]
1055         $PTR_INS $i1,$s1,0,8
1056         lbu     $t6,1024($i2)           # Td4[s0>>8]
1057         $PTR_INS $i2,$s2,0,8
1058         lbu     $t7,1024($i3)           # Td4[s1>>8]
1059         $PTR_INS $i3,$s3,0,8
1060
1061         lbu     $t8,1024($i0)           # Td4[s0>>24]
1062         _xtr    $i0,$s1,0
1063         lbu     $t9,1024($i1)           # Td4[s1>>24]
1064         _xtr    $i1,$s2,0
1065         lbu     $t10,1024($i2)          # Td4[s2>>24]
1066         _xtr    $i2,$s3,0
1067         lbu     $t11,1024($i3)          # Td4[s3>>24]
1068         _xtr    $i3,$s0,0
1069
1070         $PTR_ADD $i0,$Tbl
1071         $PTR_ADD $i1,$Tbl
1072         $PTR_ADD $i2,$Tbl
1073         $PTR_ADD $i3,$Tbl
1074 # else
1075         lbu     $t4,1024($i0)           # Td4[s2>>8]
1076         _xtr    $i0,$s0,24
1077         lbu     $t5,1024($i1)           # Td4[s3>>8]
1078         _xtr    $i1,$s1,24
1079         lbu     $t6,1024($i2)           # Td4[s0>>8]
1080         _xtr    $i2,$s2,24
1081         lbu     $t7,1024($i3)           # Td4[s1>>8]
1082         _xtr    $i3,$s3,24
1083
1084         $PTR_ADD $i0,$Tbl
1085         $PTR_ADD $i1,$Tbl
1086         $PTR_ADD $i2,$Tbl
1087         $PTR_ADD $i3,$Tbl
1088         lbu     $t8,1024($i0)           # Td4[s0>>24]
1089         $PTR_INS $i0,$s1,0,8
1090         lbu     $t9,1024($i1)           # Td4[s1>>24]
1091         $PTR_INS $i1,$s2,0,8
1092         lbu     $t10,1024($i2)          # Td4[s2>>24]
1093         $PTR_INS $i2,$s3,0,8
1094         lbu     $t11,1024($i3)          # Td4[s3>>24]
1095         $PTR_INS $i3,$s0,0,8
1096 # endif
1097         _ins    $t0,16
1098         _ins    $t1,16
1099         _ins    $t2,16
1100         _ins    $t3,16
1101
1102         _ins2   $t0,$t4,8
1103         lbu     $t4,1024($i0)           # Td4[s1]
1104         _ins2   $t1,$t5,8
1105         lbu     $t5,1024($i1)           # Td4[s2]
1106         _ins2   $t2,$t6,8
1107         lbu     $t6,1024($i2)           # Td4[s3]
1108         _ins2   $t3,$t7,8
1109         lbu     $t7,1024($i3)           # Td4[s0]
1110
1111         _ins2   $t0,$t8,24
1112         lw      $s0,0($key0)
1113         _ins2   $t1,$t9,24
1114         lw      $s1,4($key0)
1115         _ins2   $t2,$t10,24
1116         lw      $s2,8($key0)
1117         _ins2   $t3,$t11,24
1118         lw      $s3,12($key0)
1119
1120         _ins2   $t0,$t4,0
1121         _ins2   $t1,$t5,0
1122         _ins2   $t2,$t6,0
1123         _ins2   $t3,$t7,0
1124 #else
1125         lbu     $t4,1024($i0)           # Td4[s2>>8]
1126         _xtr    $i0,$s0,24
1127         lbu     $t5,1024($i1)           # Td4[s3>>8]
1128         _xtr    $i1,$s1,24
1129         lbu     $t6,1024($i2)           # Td4[s0>>8]
1130         _xtr    $i2,$s2,24
1131         lbu     $t7,1024($i3)           # Td4[s1>>8]
1132         _xtr    $i3,$s3,24
1133
1134         $PTR_ADD $i0,$Tbl
1135         $PTR_ADD $i1,$Tbl
1136         $PTR_ADD $i2,$Tbl
1137         $PTR_ADD $i3,$Tbl
1138         lbu     $t8,1024($i0)           # Td4[s0>>24]
1139         _xtr    $i0,$s1,0
1140         lbu     $t9,1024($i1)           # Td4[s1>>24]
1141         _xtr    $i1,$s2,0
1142         lbu     $t10,1024($i2)          # Td4[s2>>24]
1143         _xtr    $i2,$s3,0
1144         lbu     $t11,1024($i3)          # Td4[s3>>24]
1145         _xtr    $i3,$s0,0
1146
1147         $PTR_ADD $i0,$Tbl
1148         $PTR_ADD $i1,$Tbl
1149         $PTR_ADD $i2,$Tbl
1150         $PTR_ADD $i3,$Tbl
1151
1152         _ins    $t0,16
1153         _ins    $t1,16
1154         _ins    $t2,16
1155         _ins    $t3,16
1156
1157         _ins    $t4,8
1158         _ins    $t5,8
1159         _ins    $t6,8
1160         _ins    $t7,8
1161
1162         xor     $t0,$t4
1163         lbu     $t4,1024($i0)           # Td4[s1]
1164         xor     $t1,$t5
1165         lbu     $t5,1024($i1)           # Td4[s2]
1166         xor     $t2,$t6
1167         lbu     $t6,1024($i2)           # Td4[s3]
1168         xor     $t3,$t7
1169         lbu     $t7,1024($i3)           # Td4[s0]
1170
1171         _ins    $t8,24
1172         lw      $s0,0($key0)
1173         _ins    $t9,24
1174         lw      $s1,4($key0)
1175         _ins    $t10,24
1176         lw      $s2,8($key0)
1177         _ins    $t11,24
1178         lw      $s3,12($key0)
1179
1180         xor     $t0,$t8
1181         xor     $t1,$t9
1182         xor     $t2,$t10
1183         xor     $t3,$t11
1184
1185         _ins    $t4,0
1186         _ins    $t5,0
1187         _ins    $t6,0
1188         _ins    $t7,0
1189
1190         xor     $t0,$t4
1191         xor     $t1,$t5
1192         xor     $t2,$t6
1193         xor     $t3,$t7
1194 #endif
1195
1196         xor     $s0,$t0
1197         xor     $s1,$t1
1198         xor     $s2,$t2
1199         xor     $s3,$t3
1200
1201         jr      $ra
1202 .end    _mips_AES_decrypt
1203
1204 .align  5
1205 .globl  AES_decrypt
1206 .ent    AES_decrypt
1207 AES_decrypt:
1208         .frame  $sp,$FRAMESIZE,$ra
1209         .mask   $SAVED_REGS_MASK,-$SZREG
1210         .set    noreorder
1211 ___
1212 $code.=<<___ if ($flavour =~ /o32/i);   # o32 PIC-ification
1213         .cpload $pf
1214 ___
1215 $code.=<<___;
1216         $PTR_SUB $sp,$FRAMESIZE
1217         $REG_S  $ra,$FRAMESIZE-1*$SZREG($sp)
1218         $REG_S  $fp,$FRAMESIZE-2*$SZREG($sp)
1219         $REG_S  $s11,$FRAMESIZE-3*$SZREG($sp)
1220         $REG_S  $s10,$FRAMESIZE-4*$SZREG($sp)
1221         $REG_S  $s9,$FRAMESIZE-5*$SZREG($sp)
1222         $REG_S  $s8,$FRAMESIZE-6*$SZREG($sp)
1223         $REG_S  $s7,$FRAMESIZE-7*$SZREG($sp)
1224         $REG_S  $s6,$FRAMESIZE-8*$SZREG($sp)
1225         $REG_S  $s5,$FRAMESIZE-9*$SZREG($sp)
1226         $REG_S  $s4,$FRAMESIZE-10*$SZREG($sp)
1227 ___
1228 $code.=<<___ if ($flavour =~ /nubi/i);  # optimize non-nubi prologue
1229         $REG_S  \$15,$FRAMESIZE-11*$SZREG($sp)
1230         $REG_S  \$14,$FRAMESIZE-12*$SZREG($sp)
1231         $REG_S  \$13,$FRAMESIZE-13*$SZREG($sp)
1232         $REG_S  \$12,$FRAMESIZE-14*$SZREG($sp)
1233         $REG_S  $gp,$FRAMESIZE-15*$SZREG($sp)
1234 ___
1235 $code.=<<___ if ($flavour !~ /o32/i);   # non-o32 PIC-ification
1236         .cplocal        $Tbl
1237         .cpsetup        $pf,$zero,AES_decrypt
1238 ___
1239 $code.=<<___;
1240         .set    reorder
1241         $PTR_LA $Tbl,AES_Td             # PIC-ified 'load address'
1242
1243 #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
1244         lw      $s0,0($inp)
1245         lw      $s1,4($inp)
1246         lw      $s2,8($inp)
1247         lw      $s3,12($inp)
1248 #else
1249         lwl     $s0,0+$MSB($inp)
1250         lwl     $s1,4+$MSB($inp)
1251         lwl     $s2,8+$MSB($inp)
1252         lwl     $s3,12+$MSB($inp)
1253         lwr     $s0,0+$LSB($inp)
1254         lwr     $s1,4+$LSB($inp)
1255         lwr     $s2,8+$LSB($inp)
1256         lwr     $s3,12+$LSB($inp)
1257 #endif
1258
1259         bal     _mips_AES_decrypt
1260
1261 #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
1262         sw      $s0,0($out)
1263         sw      $s1,4($out)
1264         sw      $s2,8($out)
1265         sw      $s3,12($out)
1266 #else
1267         swr     $s0,0+$LSB($out)
1268         swr     $s1,4+$LSB($out)
1269         swr     $s2,8+$LSB($out)
1270         swr     $s3,12+$LSB($out)
1271         swl     $s0,0+$MSB($out)
1272         swl     $s1,4+$MSB($out)
1273         swl     $s2,8+$MSB($out)
1274         swl     $s3,12+$MSB($out)
1275 #endif
1276
1277         .set    noreorder
1278         $REG_L  $ra,$FRAMESIZE-1*$SZREG($sp)
1279         $REG_L  $fp,$FRAMESIZE-2*$SZREG($sp)
1280         $REG_L  $s11,$FRAMESIZE-3*$SZREG($sp)
1281         $REG_L  $s10,$FRAMESIZE-4*$SZREG($sp)
1282         $REG_L  $s9,$FRAMESIZE-5*$SZREG($sp)
1283         $REG_L  $s8,$FRAMESIZE-6*$SZREG($sp)
1284         $REG_L  $s7,$FRAMESIZE-7*$SZREG($sp)
1285         $REG_L  $s6,$FRAMESIZE-8*$SZREG($sp)
1286         $REG_L  $s5,$FRAMESIZE-9*$SZREG($sp)
1287         $REG_L  $s4,$FRAMESIZE-10*$SZREG($sp)
1288 ___
1289 $code.=<<___ if ($flavour =~ /nubi/i);
1290         $REG_L  \$15,$FRAMESIZE-11*$SZREG($sp)
1291         $REG_L  \$14,$FRAMESIZE-12*$SZREG($sp)
1292         $REG_L  \$13,$FRAMESIZE-13*$SZREG($sp)
1293         $REG_L  \$12,$FRAMESIZE-14*$SZREG($sp)
1294         $REG_L  $gp,$FRAMESIZE-15*$SZREG($sp)
1295 ___
1296 $code.=<<___;
1297         jr      $ra
1298         $PTR_ADD $sp,$FRAMESIZE
1299 .end    AES_decrypt
1300 ___
1301 }}}
1302 \f
1303 {{{
1304 my $FRAMESIZE=8*$SZREG;
1305 my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc000f008" : "0xc0000000";
1306
1307 my ($inp,$bits,$key,$Tbl)=($a0,$a1,$a2,$a3);
1308 my ($rk0,$rk1,$rk2,$rk3,$rk4,$rk5,$rk6,$rk7)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
1309 my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
1310 my ($rcon,$cnt)=($gp,$fp);
1311
1312 $code.=<<___;
1313 .align  5
1314 .ent    _mips_AES_set_encrypt_key
1315 _mips_AES_set_encrypt_key:
1316         .frame  $sp,0,$ra
1317         .set    noreorder
1318         beqz    $inp,.Lekey_done
1319         li      $t0,-1
1320         beqz    $key,.Lekey_done
1321         $PTR_ADD $rcon,$Tbl,256
1322
1323         .set    reorder
1324 #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
1325         lw      $rk0,0($inp)            # load 128 bits
1326         lw      $rk1,4($inp)
1327         lw      $rk2,8($inp)
1328         lw      $rk3,12($inp)
1329 #else
1330         lwl     $rk0,0+$MSB($inp)       # load 128 bits
1331         lwl     $rk1,4+$MSB($inp)
1332         lwl     $rk2,8+$MSB($inp)
1333         lwl     $rk3,12+$MSB($inp)
1334         lwr     $rk0,0+$LSB($inp)
1335         lwr     $rk1,4+$LSB($inp)
1336         lwr     $rk2,8+$LSB($inp)
1337         lwr     $rk3,12+$LSB($inp)
1338 #endif
1339         li      $at,128
1340         .set    noreorder
1341         beq     $bits,$at,.L128bits
1342         li      $cnt,10
1343
1344         .set    reorder
1345 #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
1346         lw      $rk4,16($inp)           # load 192 bits
1347         lw      $rk5,20($inp)
1348 #else
1349         lwl     $rk4,16+$MSB($inp)      # load 192 bits
1350         lwl     $rk5,20+$MSB($inp)
1351         lwr     $rk4,16+$LSB($inp)
1352         lwr     $rk5,20+$LSB($inp)
1353 #endif
1354         li      $at,192
1355         .set    noreorder
1356         beq     $bits,$at,.L192bits
1357         li      $cnt,8
1358
1359         .set    reorder
1360 #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
1361         lw      $rk6,24($inp)           # load 256 bits
1362         lw      $rk7,28($inp)
1363 #else
1364         lwl     $rk6,24+$MSB($inp)      # load 256 bits
1365         lwl     $rk7,28+$MSB($inp)
1366         lwr     $rk6,24+$LSB($inp)
1367         lwr     $rk7,28+$LSB($inp)
1368 #endif
1369         li      $at,256
1370         .set    noreorder
1371         beq     $bits,$at,.L256bits
1372         li      $cnt,7
1373
1374         b       .Lekey_done
1375         li      $t0,-2
1376
1377 .align  4
1378 .L128bits:
1379         .set    reorder
1380         srl     $i0,$rk3,16
1381         srl     $i1,$rk3,8
1382         and     $i0,0xff
1383         and     $i1,0xff
1384         and     $i2,$rk3,0xff
1385         srl     $i3,$rk3,24
1386         $PTR_ADD $i0,$Tbl
1387         $PTR_ADD $i1,$Tbl
1388         $PTR_ADD $i2,$Tbl
1389         $PTR_ADD $i3,$Tbl
1390         lbu     $i0,0($i0)
1391         lbu     $i1,0($i1)
1392         lbu     $i2,0($i2)
1393         lbu     $i3,0($i3)
1394
1395         sw      $rk0,0($key)
1396         sw      $rk1,4($key)
1397         sw      $rk2,8($key)
1398         sw      $rk3,12($key)
1399         subu    $cnt,1
1400         $PTR_ADD $key,16
1401
1402         _bias   $i0,24
1403         _bias   $i1,16
1404         _bias   $i2,8
1405         _bias   $i3,0
1406
1407         xor     $rk0,$i0
1408         lw      $i0,0($rcon)
1409         xor     $rk0,$i1
1410         xor     $rk0,$i2
1411         xor     $rk0,$i3
1412         xor     $rk0,$i0
1413
1414         xor     $rk1,$rk0
1415         xor     $rk2,$rk1
1416         xor     $rk3,$rk2
1417
1418         .set    noreorder
1419         bnez    $cnt,.L128bits
1420         $PTR_ADD $rcon,4
1421
1422         sw      $rk0,0($key)
1423         sw      $rk1,4($key)
1424         sw      $rk2,8($key)
1425         li      $cnt,10
1426         sw      $rk3,12($key)
1427         li      $t0,0
1428         sw      $cnt,80($key)
1429         b       .Lekey_done
1430         $PTR_SUB $key,10*16
1431
1432 .align  4
1433 .L192bits:
1434         .set    reorder
1435         srl     $i0,$rk5,16
1436         srl     $i1,$rk5,8
1437         and     $i0,0xff
1438         and     $i1,0xff
1439         and     $i2,$rk5,0xff
1440         srl     $i3,$rk5,24
1441         $PTR_ADD $i0,$Tbl
1442         $PTR_ADD $i1,$Tbl
1443         $PTR_ADD $i2,$Tbl
1444         $PTR_ADD $i3,$Tbl
1445         lbu     $i0,0($i0)
1446         lbu     $i1,0($i1)
1447         lbu     $i2,0($i2)
1448         lbu     $i3,0($i3)
1449
1450         sw      $rk0,0($key)
1451         sw      $rk1,4($key)
1452         sw      $rk2,8($key)
1453         sw      $rk3,12($key)
1454         sw      $rk4,16($key)
1455         sw      $rk5,20($key)
1456         subu    $cnt,1
1457         $PTR_ADD $key,24
1458
1459         _bias   $i0,24
1460         _bias   $i1,16
1461         _bias   $i2,8
1462         _bias   $i3,0
1463
1464         xor     $rk0,$i0
1465         lw      $i0,0($rcon)
1466         xor     $rk0,$i1
1467         xor     $rk0,$i2
1468         xor     $rk0,$i3
1469         xor     $rk0,$i0
1470
1471         xor     $rk1,$rk0
1472         xor     $rk2,$rk1
1473         xor     $rk3,$rk2
1474         xor     $rk4,$rk3
1475         xor     $rk5,$rk4
1476
1477         .set    noreorder
1478         bnez    $cnt,.L192bits
1479         $PTR_ADD $rcon,4
1480
1481         sw      $rk0,0($key)
1482         sw      $rk1,4($key)
1483         sw      $rk2,8($key)
1484         li      $cnt,12
1485         sw      $rk3,12($key)
1486         li      $t0,0
1487         sw      $cnt,48($key)
1488         b       .Lekey_done
1489         $PTR_SUB $key,12*16
1490
1491 .align  4
1492 .L256bits:
1493         .set    reorder
1494         srl     $i0,$rk7,16
1495         srl     $i1,$rk7,8
1496         and     $i0,0xff
1497         and     $i1,0xff
1498         and     $i2,$rk7,0xff
1499         srl     $i3,$rk7,24
1500         $PTR_ADD $i0,$Tbl
1501         $PTR_ADD $i1,$Tbl
1502         $PTR_ADD $i2,$Tbl
1503         $PTR_ADD $i3,$Tbl
1504         lbu     $i0,0($i0)
1505         lbu     $i1,0($i1)
1506         lbu     $i2,0($i2)
1507         lbu     $i3,0($i3)
1508
1509         sw      $rk0,0($key)
1510         sw      $rk1,4($key)
1511         sw      $rk2,8($key)
1512         sw      $rk3,12($key)
1513         sw      $rk4,16($key)
1514         sw      $rk5,20($key)
1515         sw      $rk6,24($key)
1516         sw      $rk7,28($key)
1517         subu    $cnt,1
1518
1519         _bias   $i0,24
1520         _bias   $i1,16
1521         _bias   $i2,8
1522         _bias   $i3,0
1523
1524         xor     $rk0,$i0
1525         lw      $i0,0($rcon)
1526         xor     $rk0,$i1
1527         xor     $rk0,$i2
1528         xor     $rk0,$i3
1529         xor     $rk0,$i0
1530
1531         xor     $rk1,$rk0
1532         xor     $rk2,$rk1
1533         xor     $rk3,$rk2
1534         beqz    $cnt,.L256bits_done
1535
1536         srl     $i0,$rk3,24
1537         srl     $i1,$rk3,16
1538         srl     $i2,$rk3,8
1539         and     $i3,$rk3,0xff
1540         and     $i1,0xff
1541         and     $i2,0xff
1542         $PTR_ADD $i0,$Tbl
1543         $PTR_ADD $i1,$Tbl
1544         $PTR_ADD $i2,$Tbl
1545         $PTR_ADD $i3,$Tbl
1546         lbu     $i0,0($i0)
1547         lbu     $i1,0($i1)
1548         lbu     $i2,0($i2)
1549         lbu     $i3,0($i3)
1550         sll     $i0,24
1551         sll     $i1,16
1552         sll     $i2,8
1553
1554         xor     $rk4,$i0
1555         xor     $rk4,$i1
1556         xor     $rk4,$i2
1557         xor     $rk4,$i3
1558
1559         xor     $rk5,$rk4
1560         xor     $rk6,$rk5
1561         xor     $rk7,$rk6
1562
1563         $PTR_ADD $key,32
1564         .set    noreorder
1565         b       .L256bits
1566         $PTR_ADD $rcon,4
1567
1568 .L256bits_done:
1569         sw      $rk0,32($key)
1570         sw      $rk1,36($key)
1571         sw      $rk2,40($key)
1572         li      $cnt,14
1573         sw      $rk3,44($key)
1574         li      $t0,0
1575         sw      $cnt,48($key)
1576         $PTR_SUB $key,12*16
1577
1578 .Lekey_done:
1579         jr      $ra
1580         nop
1581 .end    _mips_AES_set_encrypt_key
1582
1583 .globl  AES_set_encrypt_key
1584 .ent    AES_set_encrypt_key
1585 AES_set_encrypt_key:
1586         .frame  $sp,$FRAMESIZE,$ra
1587         .mask   $SAVED_REGS_MASK,-$SZREG
1588         .set    noreorder
1589 ___
1590 $code.=<<___ if ($flavour =~ /o32/i);   # o32 PIC-ification
1591         .cpload $pf
1592 ___
1593 $code.=<<___;
1594         $PTR_SUB $sp,$FRAMESIZE
1595         $REG_S  $ra,$FRAMESIZE-1*$SZREG($sp)
1596         $REG_S  $fp,$FRAMESIZE-2*$SZREG($sp)
1597 ___
1598 $code.=<<___ if ($flavour =~ /nubi/i);  # optimize non-nubi prologue
1599         $REG_S  $s3,$FRAMESIZE-3*$SZREG($sp)
1600         $REG_S  $s2,$FRAMESIZE-4*$SZREG($sp)
1601         $REG_S  $s1,$FRAMESIZE-5*$SZREG($sp)
1602         $REG_S  $s0,$FRAMESIZE-6*$SZREG($sp)
1603         $REG_S  $gp,$FRAMESIZE-7*$SZREG($sp)
1604 ___
1605 $code.=<<___ if ($flavour !~ /o32/i);   # non-o32 PIC-ification
1606         .cplocal        $Tbl
1607         .cpsetup        $pf,$zero,AES_set_encrypt_key
1608 ___
1609 $code.=<<___;
1610         .set    reorder
1611         $PTR_LA $Tbl,AES_Te4            # PIC-ified 'load address'
1612
1613         bal     _mips_AES_set_encrypt_key
1614
1615         .set    noreorder
1616         move    $a0,$t0
1617         $REG_L  $ra,$FRAMESIZE-1*$SZREG($sp)
1618         $REG_L  $fp,$FRAMESIZE-2*$SZREG($sp)
1619 ___
1620 $code.=<<___ if ($flavour =~ /nubi/i);
1621         $REG_L  $s3,$FRAMESIZE-11*$SZREG($sp)
1622         $REG_L  $s2,$FRAMESIZE-12*$SZREG($sp)
1623         $REG_L  $s1,$FRAMESIZE-13*$SZREG($sp)
1624         $REG_L  $s0,$FRAMESIZE-14*$SZREG($sp)
1625         $REG_L  $gp,$FRAMESIZE-15*$SZREG($sp)
1626 ___
1627 $code.=<<___;
1628         jr      $ra
1629         $PTR_ADD $sp,$FRAMESIZE
1630 .end    AES_set_encrypt_key
1631 ___
1632 \f
1633 my ($head,$tail)=($inp,$bits);
1634 my ($tp1,$tp2,$tp4,$tp8,$tp9,$tpb,$tpd,$tpe)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
1635 my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2);
1636 $code.=<<___;
1637 .align  5
1638 .globl  AES_set_decrypt_key
1639 .ent    AES_set_decrypt_key
1640 AES_set_decrypt_key:
1641         .frame  $sp,$FRAMESIZE,$ra
1642         .mask   $SAVED_REGS_MASK,-$SZREG
1643         .set    noreorder
1644 ___
1645 $code.=<<___ if ($flavour =~ /o32/i);   # o32 PIC-ification
1646         .cpload $pf
1647 ___
1648 $code.=<<___;
1649         $PTR_SUB $sp,$FRAMESIZE
1650         $REG_S  $ra,$FRAMESIZE-1*$SZREG($sp)
1651         $REG_S  $fp,$FRAMESIZE-2*$SZREG($sp)
1652 ___
1653 $code.=<<___ if ($flavour =~ /nubi/i);  # optimize non-nubi prologue
1654         $REG_S  $s3,$FRAMESIZE-3*$SZREG($sp)
1655         $REG_S  $s2,$FRAMESIZE-4*$SZREG($sp)
1656         $REG_S  $s1,$FRAMESIZE-5*$SZREG($sp)
1657         $REG_S  $s0,$FRAMESIZE-6*$SZREG($sp)
1658         $REG_S  $gp,$FRAMESIZE-7*$SZREG($sp)
1659 ___
1660 $code.=<<___ if ($flavour !~ /o32/i);   # non-o32 PIC-ification
1661         .cplocal        $Tbl
1662         .cpsetup        $pf,$zero,AES_set_decrypt_key
1663 ___
1664 $code.=<<___;
1665         .set    reorder
1666         $PTR_LA $Tbl,AES_Te4            # PIC-ified 'load address'
1667
1668         bal     _mips_AES_set_encrypt_key
1669
1670         bltz    $t0,.Ldkey_done
1671
1672         sll     $at,$cnt,4
1673         $PTR_ADD $head,$key,0
1674         $PTR_ADD $tail,$key,$at
1675 .align  4
1676 .Lswap:
1677         lw      $rk0,0($head)
1678         lw      $rk1,4($head)
1679         lw      $rk2,8($head)
1680         lw      $rk3,12($head)
1681         lw      $rk4,0($tail)
1682         lw      $rk5,4($tail)
1683         lw      $rk6,8($tail)
1684         lw      $rk7,12($tail)
1685         sw      $rk0,0($tail)
1686         sw      $rk1,4($tail)
1687         sw      $rk2,8($tail)
1688         sw      $rk3,12($tail)
1689         $PTR_ADD $head,16
1690         $PTR_SUB $tail,16
1691         sw      $rk4,-16($head)
1692         sw      $rk5,-12($head)
1693         sw      $rk6,-8($head)
1694         sw      $rk7,-4($head)
1695         bne     $head,$tail,.Lswap
1696
1697         lw      $tp1,16($key)           # modulo-scheduled
1698         lui     $x80808080,0x8080
1699         subu    $cnt,1
1700         or      $x80808080,0x8080
1701         sll     $cnt,2
1702         $PTR_ADD $key,16
1703         lui     $x1b1b1b1b,0x1b1b
1704         nor     $x7f7f7f7f,$zero,$x80808080
1705         or      $x1b1b1b1b,0x1b1b
1706 .align  4
1707 .Lmix:
1708         and     $m,$tp1,$x80808080
1709         and     $tp2,$tp1,$x7f7f7f7f
1710         srl     $tp4,$m,7
1711         addu    $tp2,$tp2               # tp2<<1
1712         subu    $m,$tp4
1713         and     $m,$x1b1b1b1b
1714         xor     $tp2,$m
1715
1716         and     $m,$tp2,$x80808080
1717         and     $tp4,$tp2,$x7f7f7f7f
1718         srl     $tp8,$m,7
1719         addu    $tp4,$tp4               # tp4<<1
1720         subu    $m,$tp8
1721         and     $m,$x1b1b1b1b
1722         xor     $tp4,$m
1723
1724         and     $m,$tp4,$x80808080
1725         and     $tp8,$tp4,$x7f7f7f7f
1726         srl     $tp9,$m,7
1727         addu    $tp8,$tp8               # tp8<<1
1728         subu    $m,$tp9
1729         and     $m,$x1b1b1b1b
1730         xor     $tp8,$m
1731
1732         xor     $tp9,$tp8,$tp1
1733         xor     $tpe,$tp8,$tp4
1734         xor     $tpb,$tp9,$tp2
1735         xor     $tpd,$tp9,$tp4
1736
1737 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1738         rotr    $tp1,$tpd,16
1739          xor    $tpe,$tp2
1740         rotr    $tp2,$tp9,8
1741         xor     $tpe,$tp1
1742         rotr    $tp4,$tpb,24
1743         xor     $tpe,$tp2
1744         lw      $tp1,4($key)            # modulo-scheduled
1745         xor     $tpe,$tp4
1746 #else
1747         _ror    $tp1,$tpd,16
1748          xor    $tpe,$tp2
1749         _ror    $tp2,$tpd,-16
1750         xor     $tpe,$tp1
1751         _ror    $tp1,$tp9,8
1752         xor     $tpe,$tp2
1753         _ror    $tp2,$tp9,-24
1754         xor     $tpe,$tp1
1755         _ror    $tp1,$tpb,24
1756         xor     $tpe,$tp2
1757         _ror    $tp2,$tpb,-8
1758         xor     $tpe,$tp1
1759         lw      $tp1,4($key)            # modulo-scheduled
1760         xor     $tpe,$tp2
1761 #endif
1762         subu    $cnt,1
1763         sw      $tpe,0($key)
1764         $PTR_ADD $key,4
1765         bnez    $cnt,.Lmix
1766
1767         li      $t0,0
1768 .Ldkey_done:
1769         .set    noreorder
1770         move    $a0,$t0
1771         $REG_L  $ra,$FRAMESIZE-1*$SZREG($sp)
1772         $REG_L  $fp,$FRAMESIZE-2*$SZREG($sp)
1773 ___
1774 $code.=<<___ if ($flavour =~ /nubi/i);
1775         $REG_L  $s3,$FRAMESIZE-11*$SZREG($sp)
1776         $REG_L  $s2,$FRAMESIZE-12*$SZREG($sp)
1777         $REG_L  $s1,$FRAMESIZE-13*$SZREG($sp)
1778         $REG_L  $s0,$FRAMESIZE-14*$SZREG($sp)
1779         $REG_L  $gp,$FRAMESIZE-15*$SZREG($sp)
1780 ___
1781 $code.=<<___;
1782         jr      $ra
1783         $PTR_ADD $sp,$FRAMESIZE
1784 .end    AES_set_decrypt_key
1785 ___
1786 }}}
1787
1788 ######################################################################
1789 # Tables are kept in endian-neutral manner
1790 $code.=<<___;
1791 .rdata
1792 .align  10
1793 AES_Te:
1794 .byte   0xc6,0x63,0x63,0xa5,    0xf8,0x7c,0x7c,0x84     # Te0
1795 .byte   0xee,0x77,0x77,0x99,    0xf6,0x7b,0x7b,0x8d
1796 .byte   0xff,0xf2,0xf2,0x0d,    0xd6,0x6b,0x6b,0xbd
1797 .byte   0xde,0x6f,0x6f,0xb1,    0x91,0xc5,0xc5,0x54
1798 .byte   0x60,0x30,0x30,0x50,    0x02,0x01,0x01,0x03
1799 .byte   0xce,0x67,0x67,0xa9,    0x56,0x2b,0x2b,0x7d
1800 .byte   0xe7,0xfe,0xfe,0x19,    0xb5,0xd7,0xd7,0x62
1801 .byte   0x4d,0xab,0xab,0xe6,    0xec,0x76,0x76,0x9a
1802 .byte   0x8f,0xca,0xca,0x45,    0x1f,0x82,0x82,0x9d
1803 .byte   0x89,0xc9,0xc9,0x40,    0xfa,0x7d,0x7d,0x87
1804 .byte   0xef,0xfa,0xfa,0x15,    0xb2,0x59,0x59,0xeb
1805 .byte   0x8e,0x47,0x47,0xc9,    0xfb,0xf0,0xf0,0x0b
1806 .byte   0x41,0xad,0xad,0xec,    0xb3,0xd4,0xd4,0x67
1807 .byte   0x5f,0xa2,0xa2,0xfd,    0x45,0xaf,0xaf,0xea
1808 .byte   0x23,0x9c,0x9c,0xbf,    0x53,0xa4,0xa4,0xf7
1809 .byte   0xe4,0x72,0x72,0x96,    0x9b,0xc0,0xc0,0x5b
1810 .byte   0x75,0xb7,0xb7,0xc2,    0xe1,0xfd,0xfd,0x1c
1811 .byte   0x3d,0x93,0x93,0xae,    0x4c,0x26,0x26,0x6a
1812 .byte   0x6c,0x36,0x36,0x5a,    0x7e,0x3f,0x3f,0x41
1813 .byte   0xf5,0xf7,0xf7,0x02,    0x83,0xcc,0xcc,0x4f
1814 .byte   0x68,0x34,0x34,0x5c,    0x51,0xa5,0xa5,0xf4
1815 .byte   0xd1,0xe5,0xe5,0x34,    0xf9,0xf1,0xf1,0x08
1816 .byte   0xe2,0x71,0x71,0x93,    0xab,0xd8,0xd8,0x73
1817 .byte   0x62,0x31,0x31,0x53,    0x2a,0x15,0x15,0x3f
1818 .byte   0x08,0x04,0x04,0x0c,    0x95,0xc7,0xc7,0x52
1819 .byte   0x46,0x23,0x23,0x65,    0x9d,0xc3,0xc3,0x5e
1820 .byte   0x30,0x18,0x18,0x28,    0x37,0x96,0x96,0xa1
1821 .byte   0x0a,0x05,0x05,0x0f,    0x2f,0x9a,0x9a,0xb5
1822 .byte   0x0e,0x07,0x07,0x09,    0x24,0x12,0x12,0x36
1823 .byte   0x1b,0x80,0x80,0x9b,    0xdf,0xe2,0xe2,0x3d
1824 .byte   0xcd,0xeb,0xeb,0x26,    0x4e,0x27,0x27,0x69
1825 .byte   0x7f,0xb2,0xb2,0xcd,    0xea,0x75,0x75,0x9f
1826 .byte   0x12,0x09,0x09,0x1b,    0x1d,0x83,0x83,0x9e
1827 .byte   0x58,0x2c,0x2c,0x74,    0x34,0x1a,0x1a,0x2e
1828 .byte   0x36,0x1b,0x1b,0x2d,    0xdc,0x6e,0x6e,0xb2
1829 .byte   0xb4,0x5a,0x5a,0xee,    0x5b,0xa0,0xa0,0xfb
1830 .byte   0xa4,0x52,0x52,0xf6,    0x76,0x3b,0x3b,0x4d
1831 .byte   0xb7,0xd6,0xd6,0x61,    0x7d,0xb3,0xb3,0xce
1832 .byte   0x52,0x29,0x29,0x7b,    0xdd,0xe3,0xe3,0x3e
1833 .byte   0x5e,0x2f,0x2f,0x71,    0x13,0x84,0x84,0x97
1834 .byte   0xa6,0x53,0x53,0xf5,    0xb9,0xd1,0xd1,0x68
1835 .byte   0x00,0x00,0x00,0x00,    0xc1,0xed,0xed,0x2c
1836 .byte   0x40,0x20,0x20,0x60,    0xe3,0xfc,0xfc,0x1f
1837 .byte   0x79,0xb1,0xb1,0xc8,    0xb6,0x5b,0x5b,0xed
1838 .byte   0xd4,0x6a,0x6a,0xbe,    0x8d,0xcb,0xcb,0x46
1839 .byte   0x67,0xbe,0xbe,0xd9,    0x72,0x39,0x39,0x4b
1840 .byte   0x94,0x4a,0x4a,0xde,    0x98,0x4c,0x4c,0xd4
1841 .byte   0xb0,0x58,0x58,0xe8,    0x85,0xcf,0xcf,0x4a
1842 .byte   0xbb,0xd0,0xd0,0x6b,    0xc5,0xef,0xef,0x2a
1843 .byte   0x4f,0xaa,0xaa,0xe5,    0xed,0xfb,0xfb,0x16
1844 .byte   0x86,0x43,0x43,0xc5,    0x9a,0x4d,0x4d,0xd7
1845 .byte   0x66,0x33,0x33,0x55,    0x11,0x85,0x85,0x94
1846 .byte   0x8a,0x45,0x45,0xcf,    0xe9,0xf9,0xf9,0x10
1847 .byte   0x04,0x02,0x02,0x06,    0xfe,0x7f,0x7f,0x81
1848 .byte   0xa0,0x50,0x50,0xf0,    0x78,0x3c,0x3c,0x44
1849 .byte   0x25,0x9f,0x9f,0xba,    0x4b,0xa8,0xa8,0xe3
1850 .byte   0xa2,0x51,0x51,0xf3,    0x5d,0xa3,0xa3,0xfe
1851 .byte   0x80,0x40,0x40,0xc0,    0x05,0x8f,0x8f,0x8a
1852 .byte   0x3f,0x92,0x92,0xad,    0x21,0x9d,0x9d,0xbc
1853 .byte   0x70,0x38,0x38,0x48,    0xf1,0xf5,0xf5,0x04
1854 .byte   0x63,0xbc,0xbc,0xdf,    0x77,0xb6,0xb6,0xc1
1855 .byte   0xaf,0xda,0xda,0x75,    0x42,0x21,0x21,0x63
1856 .byte   0x20,0x10,0x10,0x30,    0xe5,0xff,0xff,0x1a
1857 .byte   0xfd,0xf3,0xf3,0x0e,    0xbf,0xd2,0xd2,0x6d
1858 .byte   0x81,0xcd,0xcd,0x4c,    0x18,0x0c,0x0c,0x14
1859 .byte   0x26,0x13,0x13,0x35,    0xc3,0xec,0xec,0x2f
1860 .byte   0xbe,0x5f,0x5f,0xe1,    0x35,0x97,0x97,0xa2
1861 .byte   0x88,0x44,0x44,0xcc,    0x2e,0x17,0x17,0x39
1862 .byte   0x93,0xc4,0xc4,0x57,    0x55,0xa7,0xa7,0xf2
1863 .byte   0xfc,0x7e,0x7e,0x82,    0x7a,0x3d,0x3d,0x47
1864 .byte   0xc8,0x64,0x64,0xac,    0xba,0x5d,0x5d,0xe7
1865 .byte   0x32,0x19,0x19,0x2b,    0xe6,0x73,0x73,0x95
1866 .byte   0xc0,0x60,0x60,0xa0,    0x19,0x81,0x81,0x98
1867 .byte   0x9e,0x4f,0x4f,0xd1,    0xa3,0xdc,0xdc,0x7f
1868 .byte   0x44,0x22,0x22,0x66,    0x54,0x2a,0x2a,0x7e
1869 .byte   0x3b,0x90,0x90,0xab,    0x0b,0x88,0x88,0x83
1870 .byte   0x8c,0x46,0x46,0xca,    0xc7,0xee,0xee,0x29
1871 .byte   0x6b,0xb8,0xb8,0xd3,    0x28,0x14,0x14,0x3c
1872 .byte   0xa7,0xde,0xde,0x79,    0xbc,0x5e,0x5e,0xe2
1873 .byte   0x16,0x0b,0x0b,0x1d,    0xad,0xdb,0xdb,0x76
1874 .byte   0xdb,0xe0,0xe0,0x3b,    0x64,0x32,0x32,0x56
1875 .byte   0x74,0x3a,0x3a,0x4e,    0x14,0x0a,0x0a,0x1e
1876 .byte   0x92,0x49,0x49,0xdb,    0x0c,0x06,0x06,0x0a
1877 .byte   0x48,0x24,0x24,0x6c,    0xb8,0x5c,0x5c,0xe4
1878 .byte   0x9f,0xc2,0xc2,0x5d,    0xbd,0xd3,0xd3,0x6e
1879 .byte   0x43,0xac,0xac,0xef,    0xc4,0x62,0x62,0xa6
1880 .byte   0x39,0x91,0x91,0xa8,    0x31,0x95,0x95,0xa4
1881 .byte   0xd3,0xe4,0xe4,0x37,    0xf2,0x79,0x79,0x8b
1882 .byte   0xd5,0xe7,0xe7,0x32,    0x8b,0xc8,0xc8,0x43
1883 .byte   0x6e,0x37,0x37,0x59,    0xda,0x6d,0x6d,0xb7
1884 .byte   0x01,0x8d,0x8d,0x8c,    0xb1,0xd5,0xd5,0x64
1885 .byte   0x9c,0x4e,0x4e,0xd2,    0x49,0xa9,0xa9,0xe0
1886 .byte   0xd8,0x6c,0x6c,0xb4,    0xac,0x56,0x56,0xfa
1887 .byte   0xf3,0xf4,0xf4,0x07,    0xcf,0xea,0xea,0x25
1888 .byte   0xca,0x65,0x65,0xaf,    0xf4,0x7a,0x7a,0x8e
1889 .byte   0x47,0xae,0xae,0xe9,    0x10,0x08,0x08,0x18
1890 .byte   0x6f,0xba,0xba,0xd5,    0xf0,0x78,0x78,0x88
1891 .byte   0x4a,0x25,0x25,0x6f,    0x5c,0x2e,0x2e,0x72
1892 .byte   0x38,0x1c,0x1c,0x24,    0x57,0xa6,0xa6,0xf1
1893 .byte   0x73,0xb4,0xb4,0xc7,    0x97,0xc6,0xc6,0x51
1894 .byte   0xcb,0xe8,0xe8,0x23,    0xa1,0xdd,0xdd,0x7c
1895 .byte   0xe8,0x74,0x74,0x9c,    0x3e,0x1f,0x1f,0x21
1896 .byte   0x96,0x4b,0x4b,0xdd,    0x61,0xbd,0xbd,0xdc
1897 .byte   0x0d,0x8b,0x8b,0x86,    0x0f,0x8a,0x8a,0x85
1898 .byte   0xe0,0x70,0x70,0x90,    0x7c,0x3e,0x3e,0x42
1899 .byte   0x71,0xb5,0xb5,0xc4,    0xcc,0x66,0x66,0xaa
1900 .byte   0x90,0x48,0x48,0xd8,    0x06,0x03,0x03,0x05
1901 .byte   0xf7,0xf6,0xf6,0x01,    0x1c,0x0e,0x0e,0x12
1902 .byte   0xc2,0x61,0x61,0xa3,    0x6a,0x35,0x35,0x5f
1903 .byte   0xae,0x57,0x57,0xf9,    0x69,0xb9,0xb9,0xd0
1904 .byte   0x17,0x86,0x86,0x91,    0x99,0xc1,0xc1,0x58
1905 .byte   0x3a,0x1d,0x1d,0x27,    0x27,0x9e,0x9e,0xb9
1906 .byte   0xd9,0xe1,0xe1,0x38,    0xeb,0xf8,0xf8,0x13
1907 .byte   0x2b,0x98,0x98,0xb3,    0x22,0x11,0x11,0x33
1908 .byte   0xd2,0x69,0x69,0xbb,    0xa9,0xd9,0xd9,0x70
1909 .byte   0x07,0x8e,0x8e,0x89,    0x33,0x94,0x94,0xa7
1910 .byte   0x2d,0x9b,0x9b,0xb6,    0x3c,0x1e,0x1e,0x22
1911 .byte   0x15,0x87,0x87,0x92,    0xc9,0xe9,0xe9,0x20
1912 .byte   0x87,0xce,0xce,0x49,    0xaa,0x55,0x55,0xff
1913 .byte   0x50,0x28,0x28,0x78,    0xa5,0xdf,0xdf,0x7a
1914 .byte   0x03,0x8c,0x8c,0x8f,    0x59,0xa1,0xa1,0xf8
1915 .byte   0x09,0x89,0x89,0x80,    0x1a,0x0d,0x0d,0x17
1916 .byte   0x65,0xbf,0xbf,0xda,    0xd7,0xe6,0xe6,0x31
1917 .byte   0x84,0x42,0x42,0xc6,    0xd0,0x68,0x68,0xb8
1918 .byte   0x82,0x41,0x41,0xc3,    0x29,0x99,0x99,0xb0
1919 .byte   0x5a,0x2d,0x2d,0x77,    0x1e,0x0f,0x0f,0x11
1920 .byte   0x7b,0xb0,0xb0,0xcb,    0xa8,0x54,0x54,0xfc
1921 .byte   0x6d,0xbb,0xbb,0xd6,    0x2c,0x16,0x16,0x3a
1922
1923 AES_Td:
1924 .byte   0x51,0xf4,0xa7,0x50,    0x7e,0x41,0x65,0x53     # Td0
1925 .byte   0x1a,0x17,0xa4,0xc3,    0x3a,0x27,0x5e,0x96
1926 .byte   0x3b,0xab,0x6b,0xcb,    0x1f,0x9d,0x45,0xf1
1927 .byte   0xac,0xfa,0x58,0xab,    0x4b,0xe3,0x03,0x93
1928 .byte   0x20,0x30,0xfa,0x55,    0xad,0x76,0x6d,0xf6
1929 .byte   0x88,0xcc,0x76,0x91,    0xf5,0x02,0x4c,0x25
1930 .byte   0x4f,0xe5,0xd7,0xfc,    0xc5,0x2a,0xcb,0xd7
1931 .byte   0x26,0x35,0x44,0x80,    0xb5,0x62,0xa3,0x8f
1932 .byte   0xde,0xb1,0x5a,0x49,    0x25,0xba,0x1b,0x67
1933 .byte   0x45,0xea,0x0e,0x98,    0x5d,0xfe,0xc0,0xe1
1934 .byte   0xc3,0x2f,0x75,0x02,    0x81,0x4c,0xf0,0x12
1935 .byte   0x8d,0x46,0x97,0xa3,    0x6b,0xd3,0xf9,0xc6
1936 .byte   0x03,0x8f,0x5f,0xe7,    0x15,0x92,0x9c,0x95
1937 .byte   0xbf,0x6d,0x7a,0xeb,    0x95,0x52,0x59,0xda
1938 .byte   0xd4,0xbe,0x83,0x2d,    0x58,0x74,0x21,0xd3
1939 .byte   0x49,0xe0,0x69,0x29,    0x8e,0xc9,0xc8,0x44
1940 .byte   0x75,0xc2,0x89,0x6a,    0xf4,0x8e,0x79,0x78
1941 .byte   0x99,0x58,0x3e,0x6b,    0x27,0xb9,0x71,0xdd
1942 .byte   0xbe,0xe1,0x4f,0xb6,    0xf0,0x88,0xad,0x17
1943 .byte   0xc9,0x20,0xac,0x66,    0x7d,0xce,0x3a,0xb4
1944 .byte   0x63,0xdf,0x4a,0x18,    0xe5,0x1a,0x31,0x82
1945 .byte   0x97,0x51,0x33,0x60,    0x62,0x53,0x7f,0x45
1946 .byte   0xb1,0x64,0x77,0xe0,    0xbb,0x6b,0xae,0x84
1947 .byte   0xfe,0x81,0xa0,0x1c,    0xf9,0x08,0x2b,0x94
1948 .byte   0x70,0x48,0x68,0x58,    0x8f,0x45,0xfd,0x19
1949 .byte   0x94,0xde,0x6c,0x87,    0x52,0x7b,0xf8,0xb7
1950 .byte   0xab,0x73,0xd3,0x23,    0x72,0x4b,0x02,0xe2
1951 .byte   0xe3,0x1f,0x8f,0x57,    0x66,0x55,0xab,0x2a
1952 .byte   0xb2,0xeb,0x28,0x07,    0x2f,0xb5,0xc2,0x03
1953 .byte   0x86,0xc5,0x7b,0x9a,    0xd3,0x37,0x08,0xa5
1954 .byte   0x30,0x28,0x87,0xf2,    0x23,0xbf,0xa5,0xb2
1955 .byte   0x02,0x03,0x6a,0xba,    0xed,0x16,0x82,0x5c
1956 .byte   0x8a,0xcf,0x1c,0x2b,    0xa7,0x79,0xb4,0x92
1957 .byte   0xf3,0x07,0xf2,0xf0,    0x4e,0x69,0xe2,0xa1
1958 .byte   0x65,0xda,0xf4,0xcd,    0x06,0x05,0xbe,0xd5
1959 .byte   0xd1,0x34,0x62,0x1f,    0xc4,0xa6,0xfe,0x8a
1960 .byte   0x34,0x2e,0x53,0x9d,    0xa2,0xf3,0x55,0xa0
1961 .byte   0x05,0x8a,0xe1,0x32,    0xa4,0xf6,0xeb,0x75
1962 .byte   0x0b,0x83,0xec,0x39,    0x40,0x60,0xef,0xaa
1963 .byte   0x5e,0x71,0x9f,0x06,    0xbd,0x6e,0x10,0x51
1964 .byte   0x3e,0x21,0x8a,0xf9,    0x96,0xdd,0x06,0x3d
1965 .byte   0xdd,0x3e,0x05,0xae,    0x4d,0xe6,0xbd,0x46
1966 .byte   0x91,0x54,0x8d,0xb5,    0x71,0xc4,0x5d,0x05
1967 .byte   0x04,0x06,0xd4,0x6f,    0x60,0x50,0x15,0xff
1968 .byte   0x19,0x98,0xfb,0x24,    0xd6,0xbd,0xe9,0x97
1969 .byte   0x89,0x40,0x43,0xcc,    0x67,0xd9,0x9e,0x77
1970 .byte   0xb0,0xe8,0x42,0xbd,    0x07,0x89,0x8b,0x88
1971 .byte   0xe7,0x19,0x5b,0x38,    0x79,0xc8,0xee,0xdb
1972 .byte   0xa1,0x7c,0x0a,0x47,    0x7c,0x42,0x0f,0xe9
1973 .byte   0xf8,0x84,0x1e,0xc9,    0x00,0x00,0x00,0x00
1974 .byte   0x09,0x80,0x86,0x83,    0x32,0x2b,0xed,0x48
1975 .byte   0x1e,0x11,0x70,0xac,    0x6c,0x5a,0x72,0x4e
1976 .byte   0xfd,0x0e,0xff,0xfb,    0x0f,0x85,0x38,0x56
1977 .byte   0x3d,0xae,0xd5,0x1e,    0x36,0x2d,0x39,0x27
1978 .byte   0x0a,0x0f,0xd9,0x64,    0x68,0x5c,0xa6,0x21
1979 .byte   0x9b,0x5b,0x54,0xd1,    0x24,0x36,0x2e,0x3a
1980 .byte   0x0c,0x0a,0x67,0xb1,    0x93,0x57,0xe7,0x0f
1981 .byte   0xb4,0xee,0x96,0xd2,    0x1b,0x9b,0x91,0x9e
1982 .byte   0x80,0xc0,0xc5,0x4f,    0x61,0xdc,0x20,0xa2
1983 .byte   0x5a,0x77,0x4b,0x69,    0x1c,0x12,0x1a,0x16
1984 .byte   0xe2,0x93,0xba,0x0a,    0xc0,0xa0,0x2a,0xe5
1985 .byte   0x3c,0x22,0xe0,0x43,    0x12,0x1b,0x17,0x1d
1986 .byte   0x0e,0x09,0x0d,0x0b,    0xf2,0x8b,0xc7,0xad
1987 .byte   0x2d,0xb6,0xa8,0xb9,    0x14,0x1e,0xa9,0xc8
1988 .byte   0x57,0xf1,0x19,0x85,    0xaf,0x75,0x07,0x4c
1989 .byte   0xee,0x99,0xdd,0xbb,    0xa3,0x7f,0x60,0xfd
1990 .byte   0xf7,0x01,0x26,0x9f,    0x5c,0x72,0xf5,0xbc
1991 .byte   0x44,0x66,0x3b,0xc5,    0x5b,0xfb,0x7e,0x34
1992 .byte   0x8b,0x43,0x29,0x76,    0xcb,0x23,0xc6,0xdc
1993 .byte   0xb6,0xed,0xfc,0x68,    0xb8,0xe4,0xf1,0x63
1994 .byte   0xd7,0x31,0xdc,0xca,    0x42,0x63,0x85,0x10
1995 .byte   0x13,0x97,0x22,0x40,    0x84,0xc6,0x11,0x20
1996 .byte   0x85,0x4a,0x24,0x7d,    0xd2,0xbb,0x3d,0xf8
1997 .byte   0xae,0xf9,0x32,0x11,    0xc7,0x29,0xa1,0x6d
1998 .byte   0x1d,0x9e,0x2f,0x4b,    0xdc,0xb2,0x30,0xf3
1999 .byte   0x0d,0x86,0x52,0xec,    0x77,0xc1,0xe3,0xd0
2000 .byte   0x2b,0xb3,0x16,0x6c,    0xa9,0x70,0xb9,0x99
2001 .byte   0x11,0x94,0x48,0xfa,    0x47,0xe9,0x64,0x22
2002 .byte   0xa8,0xfc,0x8c,0xc4,    0xa0,0xf0,0x3f,0x1a
2003 .byte   0x56,0x7d,0x2c,0xd8,    0x22,0x33,0x90,0xef
2004 .byte   0x87,0x49,0x4e,0xc7,    0xd9,0x38,0xd1,0xc1
2005 .byte   0x8c,0xca,0xa2,0xfe,    0x98,0xd4,0x0b,0x36
2006 .byte   0xa6,0xf5,0x81,0xcf,    0xa5,0x7a,0xde,0x28
2007 .byte   0xda,0xb7,0x8e,0x26,    0x3f,0xad,0xbf,0xa4
2008 .byte   0x2c,0x3a,0x9d,0xe4,    0x50,0x78,0x92,0x0d
2009 .byte   0x6a,0x5f,0xcc,0x9b,    0x54,0x7e,0x46,0x62
2010 .byte   0xf6,0x8d,0x13,0xc2,    0x90,0xd8,0xb8,0xe8
2011 .byte   0x2e,0x39,0xf7,0x5e,    0x82,0xc3,0xaf,0xf5
2012 .byte   0x9f,0x5d,0x80,0xbe,    0x69,0xd0,0x93,0x7c
2013 .byte   0x6f,0xd5,0x2d,0xa9,    0xcf,0x25,0x12,0xb3
2014 .byte   0xc8,0xac,0x99,0x3b,    0x10,0x18,0x7d,0xa7
2015 .byte   0xe8,0x9c,0x63,0x6e,    0xdb,0x3b,0xbb,0x7b
2016 .byte   0xcd,0x26,0x78,0x09,    0x6e,0x59,0x18,0xf4
2017 .byte   0xec,0x9a,0xb7,0x01,    0x83,0x4f,0x9a,0xa8
2018 .byte   0xe6,0x95,0x6e,0x65,    0xaa,0xff,0xe6,0x7e
2019 .byte   0x21,0xbc,0xcf,0x08,    0xef,0x15,0xe8,0xe6
2020 .byte   0xba,0xe7,0x9b,0xd9,    0x4a,0x6f,0x36,0xce
2021 .byte   0xea,0x9f,0x09,0xd4,    0x29,0xb0,0x7c,0xd6
2022 .byte   0x31,0xa4,0xb2,0xaf,    0x2a,0x3f,0x23,0x31
2023 .byte   0xc6,0xa5,0x94,0x30,    0x35,0xa2,0x66,0xc0
2024 .byte   0x74,0x4e,0xbc,0x37,    0xfc,0x82,0xca,0xa6
2025 .byte   0xe0,0x90,0xd0,0xb0,    0x33,0xa7,0xd8,0x15
2026 .byte   0xf1,0x04,0x98,0x4a,    0x41,0xec,0xda,0xf7
2027 .byte   0x7f,0xcd,0x50,0x0e,    0x17,0x91,0xf6,0x2f
2028 .byte   0x76,0x4d,0xd6,0x8d,    0x43,0xef,0xb0,0x4d
2029 .byte   0xcc,0xaa,0x4d,0x54,    0xe4,0x96,0x04,0xdf
2030 .byte   0x9e,0xd1,0xb5,0xe3,    0x4c,0x6a,0x88,0x1b
2031 .byte   0xc1,0x2c,0x1f,0xb8,    0x46,0x65,0x51,0x7f
2032 .byte   0x9d,0x5e,0xea,0x04,    0x01,0x8c,0x35,0x5d
2033 .byte   0xfa,0x87,0x74,0x73,    0xfb,0x0b,0x41,0x2e
2034 .byte   0xb3,0x67,0x1d,0x5a,    0x92,0xdb,0xd2,0x52
2035 .byte   0xe9,0x10,0x56,0x33,    0x6d,0xd6,0x47,0x13
2036 .byte   0x9a,0xd7,0x61,0x8c,    0x37,0xa1,0x0c,0x7a
2037 .byte   0x59,0xf8,0x14,0x8e,    0xeb,0x13,0x3c,0x89
2038 .byte   0xce,0xa9,0x27,0xee,    0xb7,0x61,0xc9,0x35
2039 .byte   0xe1,0x1c,0xe5,0xed,    0x7a,0x47,0xb1,0x3c
2040 .byte   0x9c,0xd2,0xdf,0x59,    0x55,0xf2,0x73,0x3f
2041 .byte   0x18,0x14,0xce,0x79,    0x73,0xc7,0x37,0xbf
2042 .byte   0x53,0xf7,0xcd,0xea,    0x5f,0xfd,0xaa,0x5b
2043 .byte   0xdf,0x3d,0x6f,0x14,    0x78,0x44,0xdb,0x86
2044 .byte   0xca,0xaf,0xf3,0x81,    0xb9,0x68,0xc4,0x3e
2045 .byte   0x38,0x24,0x34,0x2c,    0xc2,0xa3,0x40,0x5f
2046 .byte   0x16,0x1d,0xc3,0x72,    0xbc,0xe2,0x25,0x0c
2047 .byte   0x28,0x3c,0x49,0x8b,    0xff,0x0d,0x95,0x41
2048 .byte   0x39,0xa8,0x01,0x71,    0x08,0x0c,0xb3,0xde
2049 .byte   0xd8,0xb4,0xe4,0x9c,    0x64,0x56,0xc1,0x90
2050 .byte   0x7b,0xcb,0x84,0x61,    0xd5,0x32,0xb6,0x70
2051 .byte   0x48,0x6c,0x5c,0x74,    0xd0,0xb8,0x57,0x42
2052
2053 .byte   0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38  # Td4
2054 .byte   0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
2055 .byte   0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
2056 .byte   0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
2057 .byte   0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
2058 .byte   0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
2059 .byte   0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
2060 .byte   0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
2061 .byte   0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
2062 .byte   0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
2063 .byte   0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
2064 .byte   0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
2065 .byte   0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
2066 .byte   0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
2067 .byte   0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
2068 .byte   0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
2069 .byte   0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
2070 .byte   0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
2071 .byte   0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
2072 .byte   0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
2073 .byte   0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
2074 .byte   0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
2075 .byte   0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
2076 .byte   0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
2077 .byte   0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
2078 .byte   0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
2079 .byte   0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
2080 .byte   0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
2081 .byte   0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
2082 .byte   0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
2083 .byte   0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
2084 .byte   0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
2085
2086 AES_Te4:
2087 .byte   0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5  # Te4
2088 .byte   0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
2089 .byte   0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
2090 .byte   0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
2091 .byte   0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
2092 .byte   0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
2093 .byte   0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
2094 .byte   0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
2095 .byte   0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
2096 .byte   0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
2097 .byte   0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
2098 .byte   0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
2099 .byte   0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
2100 .byte   0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
2101 .byte   0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
2102 .byte   0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
2103 .byte   0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
2104 .byte   0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
2105 .byte   0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
2106 .byte   0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
2107 .byte   0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
2108 .byte   0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
2109 .byte   0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
2110 .byte   0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
2111 .byte   0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
2112 .byte   0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
2113 .byte   0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
2114 .byte   0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
2115 .byte   0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
2116 .byte   0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
2117 .byte   0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
2118 .byte   0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
2119
2120 .byte   0x01,0x00,0x00,0x00,    0x02,0x00,0x00,0x00     # rcon
2121 .byte   0x04,0x00,0x00,0x00,    0x08,0x00,0x00,0x00
2122 .byte   0x10,0x00,0x00,0x00,    0x20,0x00,0x00,0x00
2123 .byte   0x40,0x00,0x00,0x00,    0x80,0x00,0x00,0x00
2124 .byte   0x1B,0x00,0x00,0x00,    0x36,0x00,0x00,0x00
2125 ___
2126 \f
2127 foreach (split("\n",$code)) {
2128         s/\`([^\`]*)\`/eval $1/ge;
2129
2130         # made-up _instructions, _xtr, _ins, _ror and _bias, cope
2131         # with byte order dependencies...
2132         if (/^\s+_/) {
2133             s/(_[a-z]+\s+)(\$[0-9]+),([^,]+)(#.*)*$/$1$2,$2,$3/;
2134
2135             s/_xtr\s+(\$[0-9]+),(\$[0-9]+),([0-9]+(\-2)*)/
2136                 sprintf("srl\t$1,$2,%d",$big_endian ?   eval($3)
2137                                         :               eval("24-$3"))/e or
2138             s/_ins\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
2139                 sprintf("sll\t$1,$2,%d",$big_endian ?   eval($3)
2140                                         :               eval("24-$3"))/e or
2141             s/_ins2\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
2142                 sprintf("ins\t$1,$2,%d,8",$big_endian ? eval($3)
2143                                         :               eval("24-$3"))/e or
2144             s/_ror\s+(\$[0-9]+),(\$[0-9]+),(\-?[0-9]+)/
2145                 sprintf("srl\t$1,$2,%d",$big_endian ?   eval($3)
2146                                         :               eval("$3*-1"))/e or
2147             s/_bias\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
2148                 sprintf("sll\t$1,$2,%d",$big_endian ?   eval($3)
2149                                         :               eval("($3-16)&31"))/e;
2150
2151             s/srl\s+(\$[0-9]+),(\$[0-9]+),\-([0-9]+)/
2152                 sprintf("sll\t$1,$2,$3")/e                              or
2153             s/srl\s+(\$[0-9]+),(\$[0-9]+),0/
2154                 sprintf("and\t$1,$2,0xff")/e                            or
2155             s/(sll\s+\$[0-9]+,\$[0-9]+,0)/#$1/;
2156         }
2157
2158         # convert lwl/lwr and swr/swl to little-endian order
2159         if (!$big_endian && /^\s+[sl]w[lr]\s+/) {
2160             s/([sl]wl.*)([0-9]+)\((\$[0-9]+)\)/
2161                 sprintf("$1%d($3)",eval("$2-$2%4+($2%4-1)&3"))/e        or
2162             s/([sl]wr.*)([0-9]+)\((\$[0-9]+)\)/
2163                 sprintf("$1%d($3)",eval("$2-$2%4+($2%4+1)&3"))/e;
2164         }
2165
2166         if (!$big_endian) {
2167             s/(rotr\s+\$[0-9]+,\$[0-9]+),([0-9]+)/sprintf("$1,%d",32-$2)/e;
2168             s/(ext\s+\$[0-9]+,\$[0-9]+),([0-9]+),8/sprintf("$1,%d,8",24-$2)/e;
2169         }
2170
2171         print $_,"\n";
2172 }
2173
2174 close STDOUT;