4a9b41af7622aa440af2d6ef65b0b5dc1498b7e9
[openssl.git] / crypto / aes / asm / aes-c64xplus.pl
1 #! /usr/bin/env perl
2 # Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved.
3 #
4 # Licensed under the Apache License 2.0 (the "License").  You may not use
5 # this file except in compliance with the License.  You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
8
9 #
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
16 #
17 # [Endian-neutral] AES for C64x+.
18 #
19 # Even though SPLOOPs are scheduled for 13 cycles, and thus expected
20 # performance is ~8.5 cycles per byte processed with 128-bit key,
21 # measured performance turned to be ~10 cycles per byte. Discrepancy
22 # must be caused by limitations of L1D memory banking(*), see SPRU871
23 # TI publication for further details. If any consolation it's still
24 # ~20% faster than TI's linear assembly module anyway... Compared to
25 # aes_core.c compiled with cl6x 6.0 with -mv6400+ -o2 options this
26 # code is 3.75x faster and almost 3x smaller (tables included).
27 #
28 # (*)   This means that there might be subtle correlation between data
29 #       and timing and one can wonder if it can be ... attacked:-(
30 #       On the other hand this also means that *if* one chooses to
31 #       implement *4* T-tables variant [instead of 1 T-table as in
32 #       this implementation, or in addition to], then one ought to
33 #       *interleave* them. Even though it complicates addressing,
34 #       references to interleaved tables would be guaranteed not to
35 #       clash. I reckon that it should be possible to break 8 cycles
36 #       per byte "barrier," i.e. improve by ~20%, naturally at the
37 #       cost of 8x increased pressure on L1D. 8x because you'd have
38 #       to interleave both Te and Td tables...
39
40 $output = pop and open STDOUT,">$output";
41
42 ($TEA,$TEB)=("A5","B5");
43 ($KPA,$KPB)=("A3","B1");
44 @K=("A6","B6","A7","B7");
45 @s=("A8","B8","A9","B9");
46 @Te0=@Td0=("A16","B16","A17","B17");
47 @Te1=@Td1=("A18","B18","A19","B19");
48 @Te2=@Td2=("A20","B20","A21","B21");
49 @Te3=@Td3=("A22","B22","A23","B23");
50
51 $code=<<___;
52         .text
53
54         .if     .ASSEMBLER_VERSION<7000000
55         .asg    0,__TI_EABI__
56         .endif
57         .if     __TI_EABI__
58         .nocmp
59         .asg    AES_encrypt,_AES_encrypt
60         .asg    AES_decrypt,_AES_decrypt
61         .asg    AES_set_encrypt_key,_AES_set_encrypt_key
62         .asg    AES_set_decrypt_key,_AES_set_decrypt_key
63         .asg    AES_ctr32_encrypt,_AES_ctr32_encrypt
64         .endif
65
66         .asg    B3,RA
67         .asg    A4,INP
68         .asg    B4,OUT
69         .asg    A6,KEY
70         .asg    A4,RET
71         .asg    B15,SP
72
73         .eval   24,EXT0
74         .eval   16,EXT1
75         .eval   8,EXT2
76         .eval   0,EXT3
77         .eval   8,TBL1
78         .eval   16,TBL2
79         .eval   24,TBL3
80
81         .if     .BIG_ENDIAN
82         .eval   24-EXT0,EXT0
83         .eval   24-EXT1,EXT1
84         .eval   24-EXT2,EXT2
85         .eval   24-EXT3,EXT3
86         .eval   32-TBL1,TBL1
87         .eval   32-TBL2,TBL2
88         .eval   32-TBL3,TBL3
89         .endif
90
91         .global _AES_encrypt
92 _AES_encrypt:
93         .asmfunc
94         MVK     1,B2
95 __encrypt:
96         .if     __TI_EABI__
97    [B2] LDNDW   *INP++,A9:A8                    ; load input
98 ||      MVKL    \$PCR_OFFSET(AES_Te,__encrypt),$TEA
99 ||      ADDKPC  __encrypt,B0
100    [B2] LDNDW   *INP++,B9:B8
101 ||      MVKH    \$PCR_OFFSET(AES_Te,__encrypt),$TEA
102 ||      ADD     0,KEY,$KPA
103 ||      ADD     4,KEY,$KPB
104         .else
105    [B2] LDNDW   *INP++,A9:A8                    ; load input
106 ||      MVKL    (AES_Te-__encrypt),$TEA
107 ||      ADDKPC  __encrypt,B0
108    [B2] LDNDW   *INP++,B9:B8
109 ||      MVKH    (AES_Te-__encrypt),$TEA
110 ||      ADD     0,KEY,$KPA
111 ||      ADD     4,KEY,$KPB
112         .endif
113         LDW     *$KPA++[2],$Te0[0]              ; zero round key
114 ||      LDW     *$KPB++[2],$Te0[1]
115 ||      MVK     60,A0
116 ||      ADD     B0,$TEA,$TEA                    ; AES_Te
117         LDW     *KEY[A0],B0                     ; rounds
118 ||      MVK     1024,A0                         ; sizeof(AES_Te)
119         LDW     *$KPA++[2],$Te0[2]
120 ||      LDW     *$KPB++[2],$Te0[3]
121 ||      MV      $TEA,$TEB
122         NOP
123         .if     .BIG_ENDIAN
124         MV      A9,$s[0]
125 ||      MV      A8,$s[1]
126 ||      MV      B9,$s[2]
127 ||      MV      B8,$s[3]
128         .else
129         MV      A8,$s[0]
130 ||      MV      A9,$s[1]
131 ||      MV      B8,$s[2]
132 ||      MV      B9,$s[3]
133         .endif
134         XOR     $Te0[0],$s[0],$s[0]
135 ||      XOR     $Te0[1],$s[1],$s[1]
136 ||      LDW     *$KPA++[2],$K[0]                ; 1st round key
137 ||      LDW     *$KPB++[2],$K[1]
138         SUB     B0,2,B0
139
140         SPLOOPD 13
141 ||      MVC     B0,ILC
142 ||      LDW     *$KPA++[2],$K[2]
143 ||      LDW     *$KPB++[2],$K[3]
144 ;;====================================================================
145         EXTU    $s[1],EXT1,24,$Te1[1]
146 ||      EXTU    $s[0],EXT3,24,$Te3[0]
147         LDW     *${TEB}[$Te1[1]],$Te1[1]        ; Te1[s1>>8],   t0
148 ||      LDW     *${TEA}[$Te3[0]],$Te3[0]        ; Te3[s0>>24],  t1
149 ||      XOR     $s[2],$Te0[2],$s[2]             ; modulo-scheduled
150 ||      XOR     $s[3],$Te0[3],$s[3]             ; modulo-scheduled
151 ||      EXTU    $s[1],EXT3,24,$Te3[1]
152 ||      EXTU    $s[0],EXT1,24,$Te1[0]
153         LDW     *${TEB}[$Te3[1]],$Te3[1]        ; Te3[s1>>24],  t2
154 ||      LDW     *${TEA}[$Te1[0]],$Te1[0]        ; Te1[s0>>8],   t3
155 ||      EXTU    $s[2],EXT2,24,$Te2[2]
156 ||      EXTU    $s[3],EXT2,24,$Te2[3]
157         LDW     *${TEA}[$Te2[2]],$Te2[2]        ; Te2[s2>>16],  t0
158 ||      LDW     *${TEB}[$Te2[3]],$Te2[3]        ; Te2[s3>>16],  t1
159 ||      EXTU    $s[3],EXT3,24,$Te3[3]
160 ||      EXTU    $s[2],EXT1,24,$Te1[2]
161         LDW     *${TEB}[$Te3[3]],$Te3[3]        ; Te3[s3>>24],  t0
162 ||      LDW     *${TEA}[$Te1[2]],$Te1[2]        ; Te1[s2>>8],   t1
163 ||      EXTU    $s[0],EXT2,24,$Te2[0]
164 ||      EXTU    $s[1],EXT2,24,$Te2[1]
165         LDW     *${TEA}[$Te2[0]],$Te2[0]        ; Te2[s0>>16],  t2
166 ||      LDW     *${TEB}[$Te2[1]],$Te2[1]        ; Te2[s1>>16],  t3
167 ||      EXTU    $s[3],EXT1,24,$Te1[3]
168 ||      EXTU    $s[2],EXT3,24,$Te3[2]
169         LDW     *${TEB}[$Te1[3]],$Te1[3]        ; Te1[s3>>8],   t2
170 ||      LDW     *${TEA}[$Te3[2]],$Te3[2]        ; Te3[s2>>24],  t3
171 ||      ROTL    $Te1[1],TBL1,$Te3[0]            ; t0
172 ||      ROTL    $Te3[0],TBL3,$Te1[1]            ; t1
173 ||      EXTU    $s[0],EXT0,24,$Te0[0]
174 ||      EXTU    $s[1],EXT0,24,$Te0[1]
175         LDW     *${TEA}[$Te0[0]],$Te0[0]        ; Te0[s0],      t0
176 ||      LDW     *${TEB}[$Te0[1]],$Te0[1]        ; Te0[s1],      t1
177 ||      ROTL    $Te3[1],TBL3,$Te1[0]            ; t2
178 ||      ROTL    $Te1[0],TBL1,$Te3[1]            ; t3
179 ||      EXTU    $s[2],EXT0,24,$Te0[2]
180 ||      EXTU    $s[3],EXT0,24,$Te0[3]
181         LDW     *${TEA}[$Te0[2]],$Te0[2]        ; Te0[s2],      t2
182 ||      LDW     *${TEB}[$Te0[3]],$Te0[3]        ; Te0[s3],      t3
183 ||      ROTL    $Te2[2],TBL2,$Te2[2]            ; t0
184 ||      ROTL    $Te2[3],TBL2,$Te2[3]            ; t1
185 ||      XOR     $K[0],$Te3[0],$s[0]
186 ||      XOR     $K[1],$Te1[1],$s[1]
187         ROTL    $Te3[3],TBL3,$Te1[2]            ; t0
188 ||      ROTL    $Te1[2],TBL1,$Te3[3]            ; t1
189 ||      XOR     $K[2],$Te1[0],$s[2]
190 ||      XOR     $K[3],$Te3[1],$s[3]
191 ||      LDW     *$KPA++[2],$K[0]                ; next round key
192 ||      LDW     *$KPB++[2],$K[1]
193         ROTL    $Te2[0],TBL2,$Te2[0]            ; t2
194 ||      ROTL    $Te2[1],TBL2,$Te2[1]            ; t3
195 ||      XOR     $s[0],$Te2[2],$s[0]
196 ||      XOR     $s[1],$Te2[3],$s[1]
197 ||      LDW     *$KPA++[2],$K[2]
198 ||      LDW     *$KPB++[2],$K[3]
199         ROTL    $Te1[3],TBL1,$Te3[2]            ; t2
200 ||      ROTL    $Te3[2],TBL3,$Te1[3]            ; t3
201 ||      XOR     $s[0],$Te1[2],$s[0]
202 ||      XOR     $s[1],$Te3[3],$s[1]
203         XOR     $s[2],$Te2[0],$s[2]
204 ||      XOR     $s[3],$Te2[1],$s[3]
205 ||      XOR     $s[0],$Te0[0],$s[0]
206 ||      XOR     $s[1],$Te0[1],$s[1]
207         SPKERNEL
208 ||      XOR.L   $s[2],$Te3[2],$s[2]
209 ||      XOR.L   $s[3],$Te1[3],$s[3]
210 ;;====================================================================
211         ADD.D   ${TEA},A0,${TEA}                ; point to Te4
212 ||      ADD.D   ${TEB},A0,${TEB}
213 ||      EXTU    $s[1],EXT1,24,$Te1[1]
214 ||      EXTU    $s[0],EXT3,24,$Te3[0]
215         LDBU    *${TEB}[$Te1[1]],$Te1[1]        ; Te1[s1>>8],   t0
216 ||      LDBU    *${TEA}[$Te3[0]],$Te3[0]        ; Te3[s0>>24],  t1
217 ||      XOR     $s[2],$Te0[2],$s[2]             ; modulo-scheduled
218 ||      XOR     $s[3],$Te0[3],$s[3]             ; modulo-scheduled
219 ||      EXTU    $s[0],EXT0,24,$Te0[0]
220 ||      EXTU    $s[1],EXT0,24,$Te0[1]
221         LDBU    *${TEA}[$Te0[0]],$Te0[0]        ; Te0[s0],      t0
222 ||      LDBU    *${TEB}[$Te0[1]],$Te0[1]        ; Te0[s1],      t1
223 ||      EXTU    $s[3],EXT3,24,$Te3[3]
224 ||      EXTU    $s[2],EXT1,24,$Te1[2]
225         LDBU    *${TEB}[$Te3[3]],$Te3[3]        ; Te3[s3>>24],  t0
226 ||      LDBU    *${TEA}[$Te1[2]],$Te1[2]        ; Te1[s2>>8],   t1
227 ||      EXTU    $s[2],EXT2,24,$Te2[2]
228 ||      EXTU    $s[3],EXT2,24,$Te2[3]
229         LDBU    *${TEA}[$Te2[2]],$Te2[2]        ; Te2[s2>>16],  t0
230 ||      LDBU    *${TEB}[$Te2[3]],$Te2[3]        ; Te2[s3>>16],  t1
231 ||      EXTU    $s[1],EXT3,24,$Te3[1]
232 ||      EXTU    $s[0],EXT1,24,$Te1[0]
233         LDBU    *${TEB}[$Te3[1]],$Te3[1]        ; Te3[s1>>24],  t2
234 ||      LDBU    *${TEA}[$Te1[0]],$Te1[0]        ; Te1[s0>>8],   t3
235 ||      EXTU    $s[3],EXT1,24,$Te1[3]
236 ||      EXTU    $s[2],EXT3,24,$Te3[2]
237         LDBU    *${TEB}[$Te1[3]],$Te1[3]        ; Te1[s3>>8],   t2
238 ||      LDBU    *${TEA}[$Te3[2]],$Te3[2]        ; Te3[s2>>24],  t3
239 ||      EXTU    $s[2],EXT0,24,$Te0[2]
240 ||      EXTU    $s[3],EXT0,24,$Te0[3]
241         LDBU    *${TEA}[$Te0[2]],$Te0[2]        ; Te0[s2],      t2
242 ||      LDBU    *${TEB}[$Te0[3]],$Te0[3]        ; Te0[s3],      t3
243 ||      EXTU    $s[0],EXT2,24,$Te2[0]
244 ||      EXTU    $s[1],EXT2,24,$Te2[1]
245         LDBU    *${TEA}[$Te2[0]],$Te2[0]        ; Te2[s0>>16],  t2
246 ||      LDBU    *${TEB}[$Te2[1]],$Te2[1]        ; Te2[s1>>16],  t3
247
248         .if     .BIG_ENDIAN
249         PACK2   $Te0[0],$Te1[1],$Te0[0]
250 ||      PACK2   $Te0[1],$Te1[2],$Te0[1]
251         PACK2   $Te2[2],$Te3[3],$Te2[2]
252 ||      PACK2   $Te2[3],$Te3[0],$Te2[3]
253         PACKL4  $Te0[0],$Te2[2],$Te0[0]
254 ||      PACKL4  $Te0[1],$Te2[3],$Te0[1]
255         XOR     $K[0],$Te0[0],$Te0[0]           ; s[0]
256 ||      XOR     $K[1],$Te0[1],$Te0[1]           ; s[1]
257
258         PACK2   $Te0[2],$Te1[3],$Te0[2]
259 ||      PACK2   $Te0[3],$Te1[0],$Te0[3]
260         PACK2   $Te2[0],$Te3[1],$Te2[0]
261 ||      PACK2   $Te2[1],$Te3[2],$Te2[1]
262 ||      BNOP    RA
263         PACKL4  $Te0[2],$Te2[0],$Te0[2]
264 ||      PACKL4  $Te0[3],$Te2[1],$Te0[3]
265         XOR     $K[2],$Te0[2],$Te0[2]           ; s[2]
266 ||      XOR     $K[3],$Te0[3],$Te0[3]           ; s[3]
267
268         MV      $Te0[0],A9
269 ||      MV      $Te0[1],A8
270         MV      $Te0[2],B9
271 ||      MV      $Te0[3],B8
272 || [B2] STNDW   A9:A8,*OUT++
273    [B2] STNDW   B9:B8,*OUT++
274         .else
275         PACK2   $Te1[1],$Te0[0],$Te1[1]
276 ||      PACK2   $Te1[2],$Te0[1],$Te1[2]
277         PACK2   $Te3[3],$Te2[2],$Te3[3]
278 ||      PACK2   $Te3[0],$Te2[3],$Te3[0]
279         PACKL4  $Te3[3],$Te1[1],$Te1[1]
280 ||      PACKL4  $Te3[0],$Te1[2],$Te1[2]
281         XOR     $K[0],$Te1[1],$Te1[1]           ; s[0]
282 ||      XOR     $K[1],$Te1[2],$Te1[2]           ; s[1]
283
284         PACK2   $Te1[3],$Te0[2],$Te1[3]
285 ||      PACK2   $Te1[0],$Te0[3],$Te1[0]
286         PACK2   $Te3[1],$Te2[0],$Te3[1]
287 ||      PACK2   $Te3[2],$Te2[1],$Te3[2]
288 ||      BNOP    RA
289         PACKL4  $Te3[1],$Te1[3],$Te1[3]
290 ||      PACKL4  $Te3[2],$Te1[0],$Te1[0]
291         XOR     $K[2],$Te1[3],$Te1[3]           ; s[2]
292 ||      XOR     $K[3],$Te1[0],$Te1[0]           ; s[3]
293
294         MV      $Te1[1],A8
295 ||      MV      $Te1[2],A9
296         MV      $Te1[3],B8
297 ||      MV      $Te1[0],B9
298 || [B2] STNDW   A9:A8,*OUT++
299    [B2] STNDW   B9:B8,*OUT++
300         .endif
301         .endasmfunc
302
303         .global _AES_decrypt
304 _AES_decrypt:
305         .asmfunc
306         MVK     1,B2
307 __decrypt:
308         .if     __TI_EABI__
309    [B2] LDNDW   *INP++,A9:A8                    ; load input
310 ||      MVKL    \$PCR_OFFSET(AES_Td,__decrypt),$TEA
311 ||      ADDKPC  __decrypt,B0
312    [B2] LDNDW   *INP++,B9:B8
313 ||      MVKH    \$PCR_OFFSET(AES_Td,__decrypt),$TEA
314 ||      ADD     0,KEY,$KPA
315 ||      ADD     4,KEY,$KPB
316         .else
317    [B2] LDNDW   *INP++,A9:A8                    ; load input
318 ||      MVKL    (AES_Td-__decrypt),$TEA
319 ||      ADDKPC  __decrypt,B0
320    [B2] LDNDW   *INP++,B9:B8
321 ||      MVKH    (AES_Td-__decrypt),$TEA
322 ||      ADD     0,KEY,$KPA
323 ||      ADD     4,KEY,$KPB
324         .endif
325         LDW     *$KPA++[2],$Td0[0]              ; zero round key
326 ||      LDW     *$KPB++[2],$Td0[1]
327 ||      MVK     60,A0
328 ||      ADD     B0,$TEA,$TEA                    ; AES_Td
329         LDW     *KEY[A0],B0                     ; rounds
330 ||      MVK     1024,A0                         ; sizeof(AES_Td)
331         LDW     *$KPA++[2],$Td0[2]
332 ||      LDW     *$KPB++[2],$Td0[3]
333 ||      MV      $TEA,$TEB
334         NOP
335         .if     .BIG_ENDIAN
336         MV      A9,$s[0]
337 ||      MV      A8,$s[1]
338 ||      MV      B9,$s[2]
339 ||      MV      B8,$s[3]
340         .else
341         MV      A8,$s[0]
342 ||      MV      A9,$s[1]
343 ||      MV      B8,$s[2]
344 ||      MV      B9,$s[3]
345         .endif
346         XOR     $Td0[0],$s[0],$s[0]
347 ||      XOR     $Td0[1],$s[1],$s[1]
348 ||      LDW     *$KPA++[2],$K[0]                ; 1st round key
349 ||      LDW     *$KPB++[2],$K[1]
350         SUB     B0,2,B0
351
352         SPLOOPD 13
353 ||      MVC     B0,ILC
354 ||      LDW     *$KPA++[2],$K[2]
355 ||      LDW     *$KPB++[2],$K[3]
356 ;;====================================================================
357         EXTU    $s[1],EXT3,24,$Td3[1]
358 ||      EXTU    $s[0],EXT1,24,$Td1[0]
359         LDW     *${TEB}[$Td3[1]],$Td3[1]        ; Td3[s1>>24],  t0
360 ||      LDW     *${TEA}[$Td1[0]],$Td1[0]        ; Td1[s0>>8],   t1
361 ||      XOR     $s[2],$Td0[2],$s[2]             ; modulo-scheduled
362 ||      XOR     $s[3],$Td0[3],$s[3]             ; modulo-scheduled
363 ||      EXTU    $s[1],EXT1,24,$Td1[1]
364 ||      EXTU    $s[0],EXT3,24,$Td3[0]
365         LDW     *${TEB}[$Td1[1]],$Td1[1]        ; Td1[s1>>8],   t2
366 ||      LDW     *${TEA}[$Td3[0]],$Td3[0]        ; Td3[s0>>24],  t3
367 ||      EXTU    $s[2],EXT2,24,$Td2[2]
368 ||      EXTU    $s[3],EXT2,24,$Td2[3]
369         LDW     *${TEA}[$Td2[2]],$Td2[2]        ; Td2[s2>>16],  t0
370 ||      LDW     *${TEB}[$Td2[3]],$Td2[3]        ; Td2[s3>>16],  t1
371 ||      EXTU    $s[3],EXT1,24,$Td1[3]
372 ||      EXTU    $s[2],EXT3,24,$Td3[2]
373         LDW     *${TEB}[$Td1[3]],$Td1[3]        ; Td1[s3>>8],   t0
374 ||      LDW     *${TEA}[$Td3[2]],$Td3[2]        ; Td3[s2>>24],  t1
375 ||      EXTU    $s[0],EXT2,24,$Td2[0]
376 ||      EXTU    $s[1],EXT2,24,$Td2[1]
377         LDW     *${TEA}[$Td2[0]],$Td2[0]        ; Td2[s0>>16],  t2
378 ||      LDW     *${TEB}[$Td2[1]],$Td2[1]        ; Td2[s1>>16],  t3
379 ||      EXTU    $s[3],EXT3,24,$Td3[3]
380 ||      EXTU    $s[2],EXT1,24,$Td1[2]
381         LDW     *${TEB}[$Td3[3]],$Td3[3]        ; Td3[s3>>24],  t2
382 ||      LDW     *${TEA}[$Td1[2]],$Td1[2]        ; Td1[s2>>8],   t3
383 ||      ROTL    $Td3[1],TBL3,$Td1[0]            ; t0
384 ||      ROTL    $Td1[0],TBL1,$Td3[1]            ; t1
385 ||      EXTU    $s[0],EXT0,24,$Td0[0]
386 ||      EXTU    $s[1],EXT0,24,$Td0[1]
387         LDW     *${TEA}[$Td0[0]],$Td0[0]        ; Td0[s0],      t0
388 ||      LDW     *${TEB}[$Td0[1]],$Td0[1]        ; Td0[s1],      t1
389 ||      ROTL    $Td1[1],TBL1,$Td3[0]            ; t2
390 ||      ROTL    $Td3[0],TBL3,$Td1[1]            ; t3
391 ||      EXTU    $s[2],EXT0,24,$Td0[2]
392 ||      EXTU    $s[3],EXT0,24,$Td0[3]
393         LDW     *${TEA}[$Td0[2]],$Td0[2]        ; Td0[s2],      t2
394 ||      LDW     *${TEB}[$Td0[3]],$Td0[3]        ; Td0[s3],      t3
395 ||      ROTL    $Td2[2],TBL2,$Td2[2]            ; t0
396 ||      ROTL    $Td2[3],TBL2,$Td2[3]            ; t1
397 ||      XOR     $K[0],$Td1[0],$s[0]
398 ||      XOR     $K[1],$Td3[1],$s[1]
399         ROTL    $Td1[3],TBL1,$Td3[2]            ; t0
400 ||      ROTL    $Td3[2],TBL3,$Td1[3]            ; t1
401 ||      XOR     $K[2],$Td3[0],$s[2]
402 ||      XOR     $K[3],$Td1[1],$s[3]
403 ||      LDW     *$KPA++[2],$K[0]                ; next round key
404 ||      LDW     *$KPB++[2],$K[1]
405         ROTL    $Td2[0],TBL2,$Td2[0]            ; t2
406 ||      ROTL    $Td2[1],TBL2,$Td2[1]            ; t3
407 ||      XOR     $s[0],$Td2[2],$s[0]
408 ||      XOR     $s[1],$Td2[3],$s[1]
409 ||      LDW     *$KPA++[2],$K[2]
410 ||      LDW     *$KPB++[2],$K[3]
411         ROTL    $Td3[3],TBL3,$Td1[2]            ; t2
412 ||      ROTL    $Td1[2],TBL1,$Td3[3]            ; t3
413 ||      XOR     $s[0],$Td3[2],$s[0]
414 ||      XOR     $s[1],$Td1[3],$s[1]
415         XOR     $s[2],$Td2[0],$s[2]
416 ||      XOR     $s[3],$Td2[1],$s[3]
417 ||      XOR     $s[0],$Td0[0],$s[0]
418 ||      XOR     $s[1],$Td0[1],$s[1]
419         SPKERNEL
420 ||      XOR.L   $s[2],$Td1[2],$s[2]
421 ||      XOR.L   $s[3],$Td3[3],$s[3]
422 ;;====================================================================
423         ADD.D   ${TEA},A0,${TEA}                ; point to Td4
424 ||      ADD.D   ${TEB},A0,${TEB}
425 ||      EXTU    $s[1],EXT3,24,$Td3[1]
426 ||      EXTU    $s[0],EXT1,24,$Td1[0]
427         LDBU    *${TEB}[$Td3[1]],$Td3[1]        ; Td3[s1>>24],  t0
428 ||      LDBU    *${TEA}[$Td1[0]],$Td1[0]        ; Td1[s0>>8],   t1
429 ||      XOR     $s[2],$Td0[2],$s[2]             ; modulo-scheduled
430 ||      XOR     $s[3],$Td0[3],$s[3]             ; modulo-scheduled
431 ||      EXTU    $s[0],EXT0,24,$Td0[0]
432 ||      EXTU    $s[1],EXT0,24,$Td0[1]
433         LDBU    *${TEA}[$Td0[0]],$Td0[0]        ; Td0[s0],      t0
434 ||      LDBU    *${TEB}[$Td0[1]],$Td0[1]        ; Td0[s1],      t1
435 ||      EXTU    $s[2],EXT2,24,$Td2[2]
436 ||      EXTU    $s[3],EXT2,24,$Td2[3]
437         LDBU    *${TEA}[$Td2[2]],$Td2[2]        ; Td2[s2>>16],  t0
438 ||      LDBU    *${TEB}[$Td2[3]],$Td2[3]        ; Td2[s3>>16],  t1
439 ||      EXTU    $s[3],EXT1,24,$Td1[3]
440 ||      EXTU    $s[2],EXT3,24,$Td3[2]
441         LDBU    *${TEB}[$Td1[3]],$Td1[3]        ; Td1[s3>>8],   t0
442 ||      LDBU    *${TEA}[$Td3[2]],$Td3[2]        ; Td3[s2>>24],  t1
443 ||      EXTU    $s[1],EXT1,24,$Td1[1]
444 ||      EXTU    $s[0],EXT3,24,$Td3[0]
445         LDBU    *${TEB}[$Td1[1]],$Td1[1]        ; Td1[s1>>8],   t2
446 ||      LDBU    *${TEA}[$Td3[0]],$Td3[0]        ; Td3[s0>>24],  t3
447 ||      EXTU    $s[0],EXT2,24,$Td2[0]
448 ||      EXTU    $s[1],EXT2,24,$Td2[1]
449         LDBU    *${TEA}[$Td2[0]],$Td2[0]        ; Td2[s0>>16],  t2
450 ||      LDBU    *${TEB}[$Td2[1]],$Td2[1]        ; Td2[s1>>16],  t3
451 ||      EXTU    $s[3],EXT3,24,$Td3[3]
452 ||      EXTU    $s[2],EXT1,24,$Td1[2]
453         LDBU    *${TEB}[$Td3[3]],$Td3[3]        ; Td3[s3>>24],  t2
454 ||      LDBU    *${TEA}[$Td1[2]],$Td1[2]        ; Td1[s2>>8],   t3
455 ||      EXTU    $s[2],EXT0,24,$Td0[2]
456 ||      EXTU    $s[3],EXT0,24,$Td0[3]
457         LDBU    *${TEA}[$Td0[2]],$Td0[2]        ; Td0[s2],      t2
458 ||      LDBU    *${TEB}[$Td0[3]],$Td0[3]        ; Td0[s3],      t3
459
460         .if     .BIG_ENDIAN
461         PACK2   $Td0[0],$Td1[3],$Td0[0]
462 ||      PACK2   $Td0[1],$Td1[0],$Td0[1]
463         PACK2   $Td2[2],$Td3[1],$Td2[2]
464 ||      PACK2   $Td2[3],$Td3[2],$Td2[3]
465         PACKL4  $Td0[0],$Td2[2],$Td0[0]
466 ||      PACKL4  $Td0[1],$Td2[3],$Td0[1]
467         XOR     $K[0],$Td0[0],$Td0[0]           ; s[0]
468 ||      XOR     $K[1],$Td0[1],$Td0[1]           ; s[1]
469
470         PACK2   $Td0[2],$Td1[1],$Td0[2]
471 ||      PACK2   $Td0[3],$Td1[2],$Td0[3]
472         PACK2   $Td2[0],$Td3[3],$Td2[0]
473 ||      PACK2   $Td2[1],$Td3[0],$Td2[1]
474 ||      BNOP    RA
475         PACKL4  $Td0[2],$Td2[0],$Td0[2]
476 ||      PACKL4  $Td0[3],$Td2[1],$Td0[3]
477         XOR     $K[2],$Td0[2],$Td0[2]           ; s[2]
478 ||      XOR     $K[3],$Td0[3],$Td0[3]           ; s[3]
479
480         MV      $Td0[0],A9
481 ||      MV      $Td0[1],A8
482         MV      $Td0[2],B9
483 ||      MV      $Td0[3],B8
484 || [B2] STNDW   A9:A8,*OUT++
485    [B2] STNDW   B9:B8,*OUT++
486         .else
487         PACK2   $Td1[3],$Td0[0],$Td1[3]
488 ||      PACK2   $Td1[0],$Td0[1],$Td1[0]
489         PACK2   $Td3[1],$Td2[2],$Td3[1]
490 ||      PACK2   $Td3[2],$Td2[3],$Td3[2]
491         PACKL4  $Td3[1],$Td1[3],$Td1[3]
492 ||      PACKL4  $Td3[2],$Td1[0],$Td1[0]
493         XOR     $K[0],$Td1[3],$Td1[3]           ; s[0]
494 ||      XOR     $K[1],$Td1[0],$Td1[0]           ; s[1]
495
496         PACK2   $Td1[1],$Td0[2],$Td1[1]
497 ||      PACK2   $Td1[2],$Td0[3],$Td1[2]
498         PACK2   $Td3[3],$Td2[0],$Td3[3]
499 ||      PACK2   $Td3[0],$Td2[1],$Td3[0]
500 ||      BNOP    RA
501         PACKL4  $Td3[3],$Td1[1],$Td1[1]
502 ||      PACKL4  $Td3[0],$Td1[2],$Td1[2]
503         XOR     $K[2],$Td1[1],$Td1[1]           ; s[2]
504 ||      XOR     $K[3],$Td1[2],$Td1[2]           ; s[3]
505
506         MV      $Td1[3],A8
507 ||      MV      $Td1[0],A9
508         MV      $Td1[1],B8
509 ||      MV      $Td1[2],B9
510 || [B2] STNDW   A9:A8,*OUT++
511    [B2] STNDW   B9:B8,*OUT++
512         .endif
513         .endasmfunc
514 ___
515 {
516 my @K=(@K,@s);                  # extended key
517 my @Te4=map("B$_",(16..19));
518
519 my @Kx9=@Te0;                   # used in AES_set_decrypt_key
520 my @KxB=@Te1;
521 my @KxD=@Te2;
522 my @KxE=@Te3;
523
524 $code.=<<___;
525         .asg    OUT,BITS
526
527         .global _AES_set_encrypt_key
528 _AES_set_encrypt_key:
529 __set_encrypt_key:
530         .asmfunc
531         MV      INP,A0
532 ||      SHRU    BITS,5,BITS                     ; 128-192-256 -> 4-6-8
533 ||      MV      KEY,A1
534   [!A0] B       RA
535 ||[!A0] MVK     -1,RET
536 ||[!A0] MVK     1,A1                            ; only one B RA
537   [!A1] B       RA
538 ||[!A1] MVK     -1,RET
539 ||[!A1] MVK     0,A0
540 ||      MVK     0,B0
541 ||      MVK     0,A1
542    [A0] LDNDW   *INP++,A9:A8
543 || [A0] CMPEQ   4,BITS,B0
544 || [A0] CMPLT   3,BITS,A1
545    [B0] B       key128?
546 || [A1] LDNDW   *INP++,B9:B8
547 || [A0] CMPEQ   6,BITS,B0
548 || [A0] CMPLT   5,BITS,A1
549    [B0] B       key192?
550 || [A1] LDNDW   *INP++,B17:B16
551 || [A0] CMPEQ   8,BITS,B0
552 || [A0] CMPLT   7,BITS,A1
553    [B0] B       key256?
554 || [A1] LDNDW   *INP++,B19:B18
555
556         .if     __TI_EABI__
557    [A0] ADD     0,KEY,$KPA
558 || [A0] ADD     4,KEY,$KPB
559 || [A0] MVKL    \$PCR_OFFSET(AES_Te4,__set_encrypt_key),$TEA
560 || [A0] ADDKPC  __set_encrypt_key,B6
561    [A0] MVKH    \$PCR_OFFSET(AES_Te4,__set_encrypt_key),$TEA
562    [A0] ADD     B6,$TEA,$TEA                    ; AES_Te4
563         .else
564    [A0] ADD     0,KEY,$KPA
565 || [A0] ADD     4,KEY,$KPB
566 || [A0] MVKL    (AES_Te4-__set_encrypt_key),$TEA
567 || [A0] ADDKPC  __set_encrypt_key,B6
568    [A0] MVKH    (AES_Te4-__set_encrypt_key),$TEA
569    [A0] ADD     B6,$TEA,$TEA                    ; AES_Te4
570         .endif
571         NOP
572         NOP
573
574         BNOP    RA,5
575 ||      MVK     -2,RET                          ; unknown bit length
576 ||      MVK     0,B0                            ; redundant
577 ;;====================================================================
578 ;;====================================================================
579 key128?:
580         .if     .BIG_ENDIAN
581         MV      A9,$K[0]
582 ||      MV      A8,$K[1]
583 ||      MV      B9,$Te4[2]
584 ||      MV      B8,$K[3]
585         .else
586         MV      A8,$K[0]
587 ||      MV      A9,$K[1]
588 ||      MV      B8,$Te4[2]
589 ||      MV      B9,$K[3]
590         .endif
591
592         MVK     256,A0
593 ||      MVK     9,B0
594
595         SPLOOPD 14
596 ||      MVC     B0,ILC
597 ||      MV      $TEA,$TEB
598 ||      ADD     $TEA,A0,A30                     ; rcon
599 ;;====================================================================
600         LDW     *A30++[1],A31                   ; rcon[i]
601 ||      MV      $Te4[2],$K[2]
602 ||      EXTU    $K[3],EXT1,24,$Te4[0]
603         LDBU    *${TEB}[$Te4[0]],$Te4[0]
604 ||      MV      $K[3],A0
605 ||      EXTU    $K[3],EXT2,24,$Te4[1]
606         LDBU    *${TEB}[$Te4[1]],$Te4[1]
607 ||      EXTU    A0,EXT3,24,A0
608 ||      EXTU    $K[3],EXT0,24,$Te4[3]
609         .if     .BIG_ENDIAN
610         LDBU    *${TEA}[A0],$Te4[3]
611 ||      LDBU    *${TEB}[$Te4[3]],A0
612         .else
613         LDBU    *${TEA}[A0],A0
614 ||      LDBU    *${TEB}[$Te4[3]],$Te4[3]
615         .endif
616
617         STW     $K[0],*$KPA++[2]
618 ||      STW     $K[1],*$KPB++[2]
619         STW     $K[2],*$KPA++[2]
620 ||      STW     $K[3],*$KPB++[2]
621
622         XOR     A31,$K[0],$K[0]                 ; ^=rcon[i]
623         .if     .BIG_ENDIAN
624         PACK2   $Te4[0],$Te4[1],$Te4[1]
625         PACK2   $Te4[3],A0,$Te4[3]
626         PACKL4  $Te4[1],$Te4[3],$Te4[3]
627         .else
628         PACK2   $Te4[1],$Te4[0],$Te4[1]
629         PACK2   $Te4[3],A0,$Te4[3]
630         PACKL4  $Te4[3],$Te4[1],$Te4[3]
631         .endif
632         XOR     $Te4[3],$K[0],$Te4[0]           ; K[0]
633         XOR     $Te4[0],$K[1],$K[1]             ; K[1]
634         MV      $Te4[0],$K[0]
635 ||      XOR     $K[1],$K[2],$Te4[2]             ; K[2]
636         XOR     $Te4[2],$K[3],$K[3]             ; K[3]
637         SPKERNEL
638 ;;====================================================================
639         BNOP    RA
640         MV      $Te4[2],$K[2]
641 ||      STW     $K[0],*$KPA++[2]
642 ||      STW     $K[1],*$KPB++[2]
643         STW     $K[2],*$KPA++[2]
644 ||      STW     $K[3],*$KPB++[2]
645         MVK     10,B0                           ; rounds
646         STW     B0,*++${KPB}[15]
647         MVK     0,RET
648 ;;====================================================================
649 ;;====================================================================
650 key192?:
651         .if     .BIG_ENDIAN
652         MV      A9,$K[0]
653 ||      MV      A8,$K[1]
654 ||      MV      B9,$K[2]
655 ||      MV      B8,$K[3]
656         MV      B17,$Te4[2]
657 ||      MV      B16,$K[5]
658         .else
659         MV      A8,$K[0]
660 ||      MV      A9,$K[1]
661 ||      MV      B8,$K[2]
662 ||      MV      B9,$K[3]
663         MV      B16,$Te4[2]
664 ||      MV      B17,$K[5]
665         .endif
666
667         MVK     256,A0
668 ||      MVK     6,B0
669         MV      $TEA,$TEB
670 ||      ADD     $TEA,A0,A30                     ; rcon
671 ;;====================================================================
672 loop192?:
673         LDW     *A30++[1],A31                   ; rcon[i]
674 ||      MV      $Te4[2],$K[4]
675 ||      EXTU    $K[5],EXT1,24,$Te4[0]
676         LDBU    *${TEB}[$Te4[0]],$Te4[0]
677 ||      MV      $K[5],A0
678 ||      EXTU    $K[5],EXT2,24,$Te4[1]
679         LDBU    *${TEB}[$Te4[1]],$Te4[1]
680 ||      EXTU    A0,EXT3,24,A0
681 ||      EXTU    $K[5],EXT0,24,$Te4[3]
682         .if     .BIG_ENDIAN
683         LDBU    *${TEA}[A0],$Te4[3]
684 ||      LDBU    *${TEB}[$Te4[3]],A0
685         .else
686         LDBU    *${TEA}[A0],A0
687 ||      LDBU    *${TEB}[$Te4[3]],$Te4[3]
688         .endif
689
690         STW     $K[0],*$KPA++[2]
691 ||      STW     $K[1],*$KPB++[2]
692         STW     $K[2],*$KPA++[2]
693 ||      STW     $K[3],*$KPB++[2]
694         STW     $K[4],*$KPA++[2]
695 ||      STW     $K[5],*$KPB++[2]
696
697         XOR     A31,$K[0],$K[0]                 ; ^=rcon[i]
698         .if     .BIG_ENDIAN
699         PACK2   $Te4[0],$Te4[1],$Te4[1]
700 ||      PACK2   $Te4[3],A0,$Te4[3]
701         PACKL4  $Te4[1],$Te4[3],$Te4[3]
702         .else
703         PACK2   $Te4[1],$Te4[0],$Te4[1]
704 ||      PACK2   $Te4[3],A0,$Te4[3]
705         PACKL4  $Te4[3],$Te4[1],$Te4[3]
706         .endif
707         BDEC    loop192?,B0
708 ||      XOR     $Te4[3],$K[0],$Te4[0]           ; K[0]
709         XOR     $Te4[0],$K[1],$K[1]             ; K[1]
710         MV      $Te4[0],$K[0]
711 ||      XOR     $K[1],$K[2],$Te4[2]             ; K[2]
712         XOR     $Te4[2],$K[3],$K[3]             ; K[3]
713         MV      $Te4[2],$K[2]
714 ||      XOR     $K[3],$K[4],$Te4[2]             ; K[4]
715         XOR     $Te4[2],$K[5],$K[5]             ; K[5]
716 ;;====================================================================
717         BNOP    RA
718         STW     $K[0],*$KPA++[2]
719 ||      STW     $K[1],*$KPB++[2]
720         STW     $K[2],*$KPA++[2]
721 ||      STW     $K[3],*$KPB++[2]
722         MVK     12,B0                           ; rounds
723         STW     B0,*++${KPB}[7]
724         MVK     0,RET
725 ;;====================================================================
726 ;;====================================================================
727 key256?:
728         .if     .BIG_ENDIAN
729         MV      A9,$K[0]
730 ||      MV      A8,$K[1]
731 ||      MV      B9,$K[2]
732 ||      MV      B8,$K[3]
733         MV      B17,$K[4]
734 ||      MV      B16,$K[5]
735 ||      MV      B19,$Te4[2]
736 ||      MV      B18,$K[7]
737         .else
738         MV      A8,$K[0]
739 ||      MV      A9,$K[1]
740 ||      MV      B8,$K[2]
741 ||      MV      B9,$K[3]
742         MV      B16,$K[4]
743 ||      MV      B17,$K[5]
744 ||      MV      B18,$Te4[2]
745 ||      MV      B19,$K[7]
746         .endif
747
748         MVK     256,A0
749 ||      MVK     6,B0
750         MV      $TEA,$TEB
751 ||      ADD     $TEA,A0,A30                     ; rcon
752 ;;====================================================================
753 loop256?:
754         LDW     *A30++[1],A31                   ; rcon[i]
755 ||      MV      $Te4[2],$K[6]
756 ||      EXTU    $K[7],EXT1,24,$Te4[0]
757         LDBU    *${TEB}[$Te4[0]],$Te4[0]
758 ||      MV      $K[7],A0
759 ||      EXTU    $K[7],EXT2,24,$Te4[1]
760         LDBU    *${TEB}[$Te4[1]],$Te4[1]
761 ||      EXTU    A0,EXT3,24,A0
762 ||      EXTU    $K[7],EXT0,24,$Te4[3]
763         .if     .BIG_ENDIAN
764         LDBU    *${TEA}[A0],$Te4[3]
765 ||      LDBU    *${TEB}[$Te4[3]],A0
766         .else
767         LDBU    *${TEA}[A0],A0
768 ||      LDBU    *${TEB}[$Te4[3]],$Te4[3]
769         .endif
770
771         STW     $K[0],*$KPA++[2]
772 ||      STW     $K[1],*$KPB++[2]
773         STW     $K[2],*$KPA++[2]
774 ||      STW     $K[3],*$KPB++[2]
775         STW     $K[4],*$KPA++[2]
776 ||      STW     $K[5],*$KPB++[2]
777         STW     $K[6],*$KPA++[2]
778 ||      STW     $K[7],*$KPB++[2]
779 ||      XOR     A31,$K[0],$K[0]                 ; ^=rcon[i]
780         .if     .BIG_ENDIAN
781         PACK2   $Te4[0],$Te4[1],$Te4[1]
782 ||      PACK2   $Te4[3],A0,$Te4[3]
783         PACKL4  $Te4[1],$Te4[3],$Te4[3]
784 ||[!B0] B       done256?
785         .else
786         PACK2   $Te4[1],$Te4[0],$Te4[1]
787 ||      PACK2   $Te4[3],A0,$Te4[3]
788         PACKL4  $Te4[3],$Te4[1],$Te4[3]
789 ||[!B0] B       done256?
790         .endif
791         XOR     $Te4[3],$K[0],$Te4[0]           ; K[0]
792         XOR     $Te4[0],$K[1],$K[1]             ; K[1]
793         MV      $Te4[0],$K[0]
794 ||      XOR     $K[1],$K[2],$Te4[2]             ; K[2]
795         XOR     $Te4[2],$K[3],$K[3]             ; K[3]
796
797         MV      $Te4[2],$K[2]
798 || [B0] EXTU    $K[3],EXT0,24,$Te4[0]
799 || [B0] SUB     B0,1,B0
800         LDBU    *${TEB}[$Te4[0]],$Te4[0]
801 ||      MV      $K[3],A0
802 ||      EXTU    $K[3],EXT1,24,$Te4[1]
803         LDBU    *${TEB}[$Te4[1]],$Te4[1]
804 ||      EXTU    A0,EXT2,24,A0
805 ||      EXTU    $K[3],EXT3,24,$Te4[3]
806
807         .if     .BIG_ENDIAN
808         LDBU    *${TEA}[A0],$Te4[3]
809 ||      LDBU    *${TEB}[$Te4[3]],A0
810         NOP     3
811         PACK2   $Te4[0],$Te4[1],$Te4[1]
812         PACK2   $Te4[3],A0,$Te4[3]
813 ||      B       loop256?
814         PACKL4  $Te4[1],$Te4[3],$Te4[3]
815         .else
816         LDBU    *${TEA}[A0],A0
817 ||      LDBU    *${TEB}[$Te4[3]],$Te4[3]
818         NOP     3
819         PACK2   $Te4[1],$Te4[0],$Te4[1]
820         PACK2   $Te4[3],A0,$Te4[3]
821 ||      B       loop256?
822         PACKL4  $Te4[3],$Te4[1],$Te4[3]
823         .endif
824
825         XOR     $Te4[3],$K[4],$Te4[0]           ; K[4]
826         XOR     $Te4[0],$K[5],$K[5]             ; K[5]
827         MV      $Te4[0],$K[4]
828 ||      XOR     $K[5],$K[6],$Te4[2]             ; K[6]
829         XOR     $Te4[2],$K[7],$K[7]             ; K[7]
830 ;;====================================================================
831 done256?:
832         BNOP    RA
833         STW     $K[0],*$KPA++[2]
834 ||      STW     $K[1],*$KPB++[2]
835         STW     $K[2],*$KPA++[2]
836 ||      STW     $K[3],*$KPB++[2]
837         MVK     14,B0                           ; rounds
838         STW     B0,*--${KPB}[1]
839         MVK     0,RET
840         .endasmfunc
841
842         .global _AES_set_decrypt_key
843 _AES_set_decrypt_key:
844         .asmfunc
845         B       __set_encrypt_key               ; guarantee local call
846         MV      KEY,B30                         ; B30 is not modified
847         MV      RA, B31                         ; B31 is not modified
848         ADDKPC  ret?,RA,2
849 ret?:                                           ; B0 holds rounds or zero
850   [!B0] BNOP    B31                             ; return if zero
851    [B0] SHL     B0,4,A0                         ; offset to last round key
852    [B0] SHRU    B0,1,B1
853    [B0] SUB     B1,1,B1
854    [B0] MVK     0x0000001B,B3                   ; AES polynomial
855    [B0] MVKH    0x07000000,B3
856
857         SPLOOPD 9                               ; flip round keys
858 ||      MVC     B1,ILC
859 ||      MV      B30,$KPA
860 ||      ADD     B30,A0,$KPB
861 ||      MVK     16,A0                           ; sizeof(round key)
862 ;;====================================================================
863         LDW     *${KPA}[0],A16
864 ||      LDW     *${KPB}[0],B16
865         LDW     *${KPA}[1],A17
866 ||      LDW     *${KPB}[1],B17
867         LDW     *${KPA}[2],A18
868 ||      LDW     *${KPB}[2],B18
869         LDW     *${KPA}[3],A19
870 ||      ADD     $KPA,A0,$KPA
871 ||      LDW     *${KPB}[3],B19
872 ||      SUB     $KPB,A0,$KPB
873         NOP
874         STW     B16,*${KPA}[-4]
875 ||      STW     A16,*${KPB}[4]
876         STW     B17,*${KPA}[-3]
877 ||      STW     A17,*${KPB}[5]
878         STW     B18,*${KPA}[-2]
879 ||      STW     A18,*${KPB}[6]
880         STW     B19,*${KPA}[-1]
881 ||      STW     A19,*${KPB}[7]
882         SPKERNEL
883 ;;====================================================================
884         SUB     B0,1,B0                         ; skip last round
885 ||      ADD     B30,A0,$KPA                     ; skip first round
886 ||      ADD     B30,A0,$KPB
887 ||      MVC     GFPGFR,B30                      ; save GFPGFR
888         LDW     *${KPA}[0],$K[0]
889 ||      LDW     *${KPB}[1],$K[1]
890 ||      MVC     B3,GFPGFR
891         LDW     *${KPA}[2],$K[2]
892 ||      LDW     *${KPB}[3],$K[3]
893         MVK     0x00000909,A24
894 ||      MVK     0x00000B0B,B24
895         MVKH    0x09090000,A24
896 ||      MVKH    0x0B0B0000,B24
897         MVC     B0,ILC
898 ||      SUB     B0,1,B0
899
900         GMPY4   $K[0],A24,$Kx9[0]               ; Â·0x09
901 ||      GMPY4   $K[1],A24,$Kx9[1]
902 ||      MVK     0x00000D0D,A25
903 ||      MVK     0x00000E0E,B25
904         GMPY4   $K[2],A24,$Kx9[2]
905 ||      GMPY4   $K[3],A24,$Kx9[3]
906 ||      MVKH    0x0D0D0000,A25
907 ||      MVKH    0x0E0E0000,B25
908
909         GMPY4   $K[0],B24,$KxB[0]               ; Â·0x0B
910 ||      GMPY4   $K[1],B24,$KxB[1]
911         GMPY4   $K[2],B24,$KxB[2]
912 ||      GMPY4   $K[3],B24,$KxB[3]
913
914         SPLOOP  11                              ; InvMixColumns
915 ;;====================================================================
916         GMPY4   $K[0],A25,$KxD[0]               ; Â·0x0D
917 ||      GMPY4   $K[1],A25,$KxD[1]
918 ||      SWAP2   $Kx9[0],$Kx9[0]                 ; rotate by 16
919 ||      SWAP2   $Kx9[1],$Kx9[1]
920 ||      MV      $K[0],$s[0]                     ; this or DINT
921 ||      MV      $K[1],$s[1]
922 || [B0] LDW     *${KPA}[4],$K[0]
923 || [B0] LDW     *${KPB}[5],$K[1]
924         GMPY4   $K[2],A25,$KxD[2]
925 ||      GMPY4   $K[3],A25,$KxD[3]
926 ||      SWAP2   $Kx9[2],$Kx9[2]
927 ||      SWAP2   $Kx9[3],$Kx9[3]
928 ||      MV      $K[2],$s[2]
929 ||      MV      $K[3],$s[3]
930 || [B0] LDW     *${KPA}[6],$K[2]
931 || [B0] LDW     *${KPB}[7],$K[3]
932
933         GMPY4   $s[0],B25,$KxE[0]               ; Â·0x0E
934 ||      GMPY4   $s[1],B25,$KxE[1]
935 ||      XOR     $Kx9[0],$KxB[0],$KxB[0]
936 ||      XOR     $Kx9[1],$KxB[1],$KxB[1]
937         GMPY4   $s[2],B25,$KxE[2]
938 ||      GMPY4   $s[3],B25,$KxE[3]
939 ||      XOR     $Kx9[2],$KxB[2],$KxB[2]
940 ||      XOR     $Kx9[3],$KxB[3],$KxB[3]
941
942         ROTL    $KxB[0],TBL3,$KxB[0]
943 ||      ROTL    $KxB[1],TBL3,$KxB[1]
944 ||      SWAP2   $KxD[0],$KxD[0]                 ; rotate by 16
945 ||      SWAP2   $KxD[1],$KxD[1]
946         ROTL    $KxB[2],TBL3,$KxB[2]
947 ||      ROTL    $KxB[3],TBL3,$KxB[3]
948 ||      SWAP2   $KxD[2],$KxD[2]
949 ||      SWAP2   $KxD[3],$KxD[3]
950
951         XOR     $KxE[0],$KxD[0],$KxE[0]
952 ||      XOR     $KxE[1],$KxD[1],$KxE[1]
953 || [B0] GMPY4   $K[0],A24,$Kx9[0]               ; Â·0x09
954 || [B0] GMPY4   $K[1],A24,$Kx9[1]
955 ||      ADDAW   $KPA,4,$KPA
956         XOR     $KxE[2],$KxD[2],$KxE[2]
957 ||      XOR     $KxE[3],$KxD[3],$KxE[3]
958 || [B0] GMPY4   $K[2],A24,$Kx9[2]
959 || [B0] GMPY4   $K[3],A24,$Kx9[3]
960 ||      ADDAW   $KPB,4,$KPB
961
962         XOR     $KxB[0],$KxE[0],$KxE[0]
963 ||      XOR     $KxB[1],$KxE[1],$KxE[1]
964 || [B0] GMPY4   $K[0],B24,$KxB[0]               ; Â·0x0B
965 || [B0] GMPY4   $K[1],B24,$KxB[1]
966         XOR     $KxB[2],$KxE[2],$KxE[2]
967 ||      XOR     $KxB[3],$KxE[3],$KxE[3]
968 || [B0] GMPY4   $K[2],B24,$KxB[2]
969 || [B0] GMPY4   $K[3],B24,$KxB[3]
970 ||      STW     $KxE[0],*${KPA}[-4]
971 ||      STW     $KxE[1],*${KPB}[-3]
972         STW     $KxE[2],*${KPA}[-2]
973 ||      STW     $KxE[3],*${KPB}[-1]
974 || [B0] SUB     B0,1,B0
975         SPKERNEL
976 ;;====================================================================
977         BNOP    B31,3
978         MVC     B30,GFPGFR                      ; restore GFPGFR(*)
979         MVK     0,RET
980         .endasmfunc
981 ___
982 # (*)   Even though ABI doesn't specify GFPGFR as non-volatile, there
983 #       are code samples out there that *assume* its default value.
984 }
985 {
986 my ($inp,$out,$blocks,$key,$ivp)=("A4","B4","A6","B6","A8");
987 $code.=<<___;
988         .global _AES_ctr32_encrypt
989 _AES_ctr32_encrypt:
990         .asmfunc
991         LDNDW   *${ivp}[0],A31:A30      ; load counter value
992 ||      MV      $blocks,A2              ; reassign $blocks
993 ||      DMV     RA,$key,B27:B26         ; reassign RA and $key
994         LDNDW   *${ivp}[1],B31:B30
995 ||      MVK     0,B2                    ; don't let __encrypt load input
996 ||      MVK     0,A1                    ; and postpone writing output
997         .if     .BIG_ENDIAN
998         NOP
999         .else
1000         NOP     4
1001         SWAP2   B31,B31                 ; keep least significant 32 bits
1002         SWAP4   B31,B31                 ; in host byte order
1003         .endif
1004 ctr32_loop?:
1005    [A2] BNOP    __encrypt
1006 || [A1] XOR     A29,A9,A9               ; input^Ek(counter)
1007 || [A1] XOR     A28,A8,A8
1008 || [A2] LDNDW   *INP++,A29:A28          ; load input
1009   [!A2] BNOP    B27                     ; return
1010 || [A1] XOR     B29,B9,B9
1011 || [A1] XOR     B28,B8,B8
1012 || [A2] LDNDW   *INP++,B29:B28
1013         .if     .BIG_ENDIAN
1014    [A1] STNDW   A9:A8,*OUT++            ; save output
1015 || [A2] DMV     A31,A30,A9:A8           ; pass counter value to __encrypt
1016    [A1] STNDW   B9:B8,*OUT++
1017 || [A2] DMV     B31,B30,B9:B8
1018 || [A2] ADD     B30,1,B30               ; counter++
1019         .else
1020    [A1] STNDW   A9:A8,*OUT++            ; save output
1021 || [A2] DMV     A31,A30,A9:A8
1022 || [A2] SWAP2   B31,B0
1023 || [A2] ADD     B31,1,B31               ; counter++
1024    [A1] STNDW   B9:B8,*OUT++
1025 || [A2] MV      B30,B8
1026 || [A2] SWAP4   B0,B9
1027         .endif
1028    [A2] ADDKPC  ctr32_loop?,RA          ; return to ctr32_loop?
1029 || [A2] MV      B26,KEY                 ; pass $key
1030 || [A2] SUB     A2,1,A2                 ; $blocks--
1031 ||[!A1] MVK     1,A1
1032         NOP
1033         NOP
1034         .endasmfunc
1035 ___
1036 }
1037 # Tables are kept in endian-neutral manner
1038 $code.=<<___;
1039         .if     __TI_EABI__
1040         .sect   ".text:aes_asm.const"
1041         .else
1042         .sect   ".const:aes_asm"
1043         .endif
1044         .align  128
1045 AES_Te:
1046         .byte   0xc6,0x63,0x63,0xa5,    0xf8,0x7c,0x7c,0x84
1047         .byte   0xee,0x77,0x77,0x99,    0xf6,0x7b,0x7b,0x8d
1048         .byte   0xff,0xf2,0xf2,0x0d,    0xd6,0x6b,0x6b,0xbd
1049         .byte   0xde,0x6f,0x6f,0xb1,    0x91,0xc5,0xc5,0x54
1050         .byte   0x60,0x30,0x30,0x50,    0x02,0x01,0x01,0x03
1051         .byte   0xce,0x67,0x67,0xa9,    0x56,0x2b,0x2b,0x7d
1052         .byte   0xe7,0xfe,0xfe,0x19,    0xb5,0xd7,0xd7,0x62
1053         .byte   0x4d,0xab,0xab,0xe6,    0xec,0x76,0x76,0x9a
1054         .byte   0x8f,0xca,0xca,0x45,    0x1f,0x82,0x82,0x9d
1055         .byte   0x89,0xc9,0xc9,0x40,    0xfa,0x7d,0x7d,0x87
1056         .byte   0xef,0xfa,0xfa,0x15,    0xb2,0x59,0x59,0xeb
1057         .byte   0x8e,0x47,0x47,0xc9,    0xfb,0xf0,0xf0,0x0b
1058         .byte   0x41,0xad,0xad,0xec,    0xb3,0xd4,0xd4,0x67
1059         .byte   0x5f,0xa2,0xa2,0xfd,    0x45,0xaf,0xaf,0xea
1060         .byte   0x23,0x9c,0x9c,0xbf,    0x53,0xa4,0xa4,0xf7
1061         .byte   0xe4,0x72,0x72,0x96,    0x9b,0xc0,0xc0,0x5b
1062         .byte   0x75,0xb7,0xb7,0xc2,    0xe1,0xfd,0xfd,0x1c
1063         .byte   0x3d,0x93,0x93,0xae,    0x4c,0x26,0x26,0x6a
1064         .byte   0x6c,0x36,0x36,0x5a,    0x7e,0x3f,0x3f,0x41
1065         .byte   0xf5,0xf7,0xf7,0x02,    0x83,0xcc,0xcc,0x4f
1066         .byte   0x68,0x34,0x34,0x5c,    0x51,0xa5,0xa5,0xf4
1067         .byte   0xd1,0xe5,0xe5,0x34,    0xf9,0xf1,0xf1,0x08
1068         .byte   0xe2,0x71,0x71,0x93,    0xab,0xd8,0xd8,0x73
1069         .byte   0x62,0x31,0x31,0x53,    0x2a,0x15,0x15,0x3f
1070         .byte   0x08,0x04,0x04,0x0c,    0x95,0xc7,0xc7,0x52
1071         .byte   0x46,0x23,0x23,0x65,    0x9d,0xc3,0xc3,0x5e
1072         .byte   0x30,0x18,0x18,0x28,    0x37,0x96,0x96,0xa1
1073         .byte   0x0a,0x05,0x05,0x0f,    0x2f,0x9a,0x9a,0xb5
1074         .byte   0x0e,0x07,0x07,0x09,    0x24,0x12,0x12,0x36
1075         .byte   0x1b,0x80,0x80,0x9b,    0xdf,0xe2,0xe2,0x3d
1076         .byte   0xcd,0xeb,0xeb,0x26,    0x4e,0x27,0x27,0x69
1077         .byte   0x7f,0xb2,0xb2,0xcd,    0xea,0x75,0x75,0x9f
1078         .byte   0x12,0x09,0x09,0x1b,    0x1d,0x83,0x83,0x9e
1079         .byte   0x58,0x2c,0x2c,0x74,    0x34,0x1a,0x1a,0x2e
1080         .byte   0x36,0x1b,0x1b,0x2d,    0xdc,0x6e,0x6e,0xb2
1081         .byte   0xb4,0x5a,0x5a,0xee,    0x5b,0xa0,0xa0,0xfb
1082         .byte   0xa4,0x52,0x52,0xf6,    0x76,0x3b,0x3b,0x4d
1083         .byte   0xb7,0xd6,0xd6,0x61,    0x7d,0xb3,0xb3,0xce
1084         .byte   0x52,0x29,0x29,0x7b,    0xdd,0xe3,0xe3,0x3e
1085         .byte   0x5e,0x2f,0x2f,0x71,    0x13,0x84,0x84,0x97
1086         .byte   0xa6,0x53,0x53,0xf5,    0xb9,0xd1,0xd1,0x68
1087         .byte   0x00,0x00,0x00,0x00,    0xc1,0xed,0xed,0x2c
1088         .byte   0x40,0x20,0x20,0x60,    0xe3,0xfc,0xfc,0x1f
1089         .byte   0x79,0xb1,0xb1,0xc8,    0xb6,0x5b,0x5b,0xed
1090         .byte   0xd4,0x6a,0x6a,0xbe,    0x8d,0xcb,0xcb,0x46
1091         .byte   0x67,0xbe,0xbe,0xd9,    0x72,0x39,0x39,0x4b
1092         .byte   0x94,0x4a,0x4a,0xde,    0x98,0x4c,0x4c,0xd4
1093         .byte   0xb0,0x58,0x58,0xe8,    0x85,0xcf,0xcf,0x4a
1094         .byte   0xbb,0xd0,0xd0,0x6b,    0xc5,0xef,0xef,0x2a
1095         .byte   0x4f,0xaa,0xaa,0xe5,    0xed,0xfb,0xfb,0x16
1096         .byte   0x86,0x43,0x43,0xc5,    0x9a,0x4d,0x4d,0xd7
1097         .byte   0x66,0x33,0x33,0x55,    0x11,0x85,0x85,0x94
1098         .byte   0x8a,0x45,0x45,0xcf,    0xe9,0xf9,0xf9,0x10
1099         .byte   0x04,0x02,0x02,0x06,    0xfe,0x7f,0x7f,0x81
1100         .byte   0xa0,0x50,0x50,0xf0,    0x78,0x3c,0x3c,0x44
1101         .byte   0x25,0x9f,0x9f,0xba,    0x4b,0xa8,0xa8,0xe3
1102         .byte   0xa2,0x51,0x51,0xf3,    0x5d,0xa3,0xa3,0xfe
1103         .byte   0x80,0x40,0x40,0xc0,    0x05,0x8f,0x8f,0x8a
1104         .byte   0x3f,0x92,0x92,0xad,    0x21,0x9d,0x9d,0xbc
1105         .byte   0x70,0x38,0x38,0x48,    0xf1,0xf5,0xf5,0x04
1106         .byte   0x63,0xbc,0xbc,0xdf,    0x77,0xb6,0xb6,0xc1
1107         .byte   0xaf,0xda,0xda,0x75,    0x42,0x21,0x21,0x63
1108         .byte   0x20,0x10,0x10,0x30,    0xe5,0xff,0xff,0x1a
1109         .byte   0xfd,0xf3,0xf3,0x0e,    0xbf,0xd2,0xd2,0x6d
1110         .byte   0x81,0xcd,0xcd,0x4c,    0x18,0x0c,0x0c,0x14
1111         .byte   0x26,0x13,0x13,0x35,    0xc3,0xec,0xec,0x2f
1112         .byte   0xbe,0x5f,0x5f,0xe1,    0x35,0x97,0x97,0xa2
1113         .byte   0x88,0x44,0x44,0xcc,    0x2e,0x17,0x17,0x39
1114         .byte   0x93,0xc4,0xc4,0x57,    0x55,0xa7,0xa7,0xf2
1115         .byte   0xfc,0x7e,0x7e,0x82,    0x7a,0x3d,0x3d,0x47
1116         .byte   0xc8,0x64,0x64,0xac,    0xba,0x5d,0x5d,0xe7
1117         .byte   0x32,0x19,0x19,0x2b,    0xe6,0x73,0x73,0x95
1118         .byte   0xc0,0x60,0x60,0xa0,    0x19,0x81,0x81,0x98
1119         .byte   0x9e,0x4f,0x4f,0xd1,    0xa3,0xdc,0xdc,0x7f
1120         .byte   0x44,0x22,0x22,0x66,    0x54,0x2a,0x2a,0x7e
1121         .byte   0x3b,0x90,0x90,0xab,    0x0b,0x88,0x88,0x83
1122         .byte   0x8c,0x46,0x46,0xca,    0xc7,0xee,0xee,0x29
1123         .byte   0x6b,0xb8,0xb8,0xd3,    0x28,0x14,0x14,0x3c
1124         .byte   0xa7,0xde,0xde,0x79,    0xbc,0x5e,0x5e,0xe2
1125         .byte   0x16,0x0b,0x0b,0x1d,    0xad,0xdb,0xdb,0x76
1126         .byte   0xdb,0xe0,0xe0,0x3b,    0x64,0x32,0x32,0x56
1127         .byte   0x74,0x3a,0x3a,0x4e,    0x14,0x0a,0x0a,0x1e
1128         .byte   0x92,0x49,0x49,0xdb,    0x0c,0x06,0x06,0x0a
1129         .byte   0x48,0x24,0x24,0x6c,    0xb8,0x5c,0x5c,0xe4
1130         .byte   0x9f,0xc2,0xc2,0x5d,    0xbd,0xd3,0xd3,0x6e
1131         .byte   0x43,0xac,0xac,0xef,    0xc4,0x62,0x62,0xa6
1132         .byte   0x39,0x91,0x91,0xa8,    0x31,0x95,0x95,0xa4
1133         .byte   0xd3,0xe4,0xe4,0x37,    0xf2,0x79,0x79,0x8b
1134         .byte   0xd5,0xe7,0xe7,0x32,    0x8b,0xc8,0xc8,0x43
1135         .byte   0x6e,0x37,0x37,0x59,    0xda,0x6d,0x6d,0xb7
1136         .byte   0x01,0x8d,0x8d,0x8c,    0xb1,0xd5,0xd5,0x64
1137         .byte   0x9c,0x4e,0x4e,0xd2,    0x49,0xa9,0xa9,0xe0
1138         .byte   0xd8,0x6c,0x6c,0xb4,    0xac,0x56,0x56,0xfa
1139         .byte   0xf3,0xf4,0xf4,0x07,    0xcf,0xea,0xea,0x25
1140         .byte   0xca,0x65,0x65,0xaf,    0xf4,0x7a,0x7a,0x8e
1141         .byte   0x47,0xae,0xae,0xe9,    0x10,0x08,0x08,0x18
1142         .byte   0x6f,0xba,0xba,0xd5,    0xf0,0x78,0x78,0x88
1143         .byte   0x4a,0x25,0x25,0x6f,    0x5c,0x2e,0x2e,0x72
1144         .byte   0x38,0x1c,0x1c,0x24,    0x57,0xa6,0xa6,0xf1
1145         .byte   0x73,0xb4,0xb4,0xc7,    0x97,0xc6,0xc6,0x51
1146         .byte   0xcb,0xe8,0xe8,0x23,    0xa1,0xdd,0xdd,0x7c
1147         .byte   0xe8,0x74,0x74,0x9c,    0x3e,0x1f,0x1f,0x21
1148         .byte   0x96,0x4b,0x4b,0xdd,    0x61,0xbd,0xbd,0xdc
1149         .byte   0x0d,0x8b,0x8b,0x86,    0x0f,0x8a,0x8a,0x85
1150         .byte   0xe0,0x70,0x70,0x90,    0x7c,0x3e,0x3e,0x42
1151         .byte   0x71,0xb5,0xb5,0xc4,    0xcc,0x66,0x66,0xaa
1152         .byte   0x90,0x48,0x48,0xd8,    0x06,0x03,0x03,0x05
1153         .byte   0xf7,0xf6,0xf6,0x01,    0x1c,0x0e,0x0e,0x12
1154         .byte   0xc2,0x61,0x61,0xa3,    0x6a,0x35,0x35,0x5f
1155         .byte   0xae,0x57,0x57,0xf9,    0x69,0xb9,0xb9,0xd0
1156         .byte   0x17,0x86,0x86,0x91,    0x99,0xc1,0xc1,0x58
1157         .byte   0x3a,0x1d,0x1d,0x27,    0x27,0x9e,0x9e,0xb9
1158         .byte   0xd9,0xe1,0xe1,0x38,    0xeb,0xf8,0xf8,0x13
1159         .byte   0x2b,0x98,0x98,0xb3,    0x22,0x11,0x11,0x33
1160         .byte   0xd2,0x69,0x69,0xbb,    0xa9,0xd9,0xd9,0x70
1161         .byte   0x07,0x8e,0x8e,0x89,    0x33,0x94,0x94,0xa7
1162         .byte   0x2d,0x9b,0x9b,0xb6,    0x3c,0x1e,0x1e,0x22
1163         .byte   0x15,0x87,0x87,0x92,    0xc9,0xe9,0xe9,0x20
1164         .byte   0x87,0xce,0xce,0x49,    0xaa,0x55,0x55,0xff
1165         .byte   0x50,0x28,0x28,0x78,    0xa5,0xdf,0xdf,0x7a
1166         .byte   0x03,0x8c,0x8c,0x8f,    0x59,0xa1,0xa1,0xf8
1167         .byte   0x09,0x89,0x89,0x80,    0x1a,0x0d,0x0d,0x17
1168         .byte   0x65,0xbf,0xbf,0xda,    0xd7,0xe6,0xe6,0x31
1169         .byte   0x84,0x42,0x42,0xc6,    0xd0,0x68,0x68,0xb8
1170         .byte   0x82,0x41,0x41,0xc3,    0x29,0x99,0x99,0xb0
1171         .byte   0x5a,0x2d,0x2d,0x77,    0x1e,0x0f,0x0f,0x11
1172         .byte   0x7b,0xb0,0xb0,0xcb,    0xa8,0x54,0x54,0xfc
1173         .byte   0x6d,0xbb,0xbb,0xd6,    0x2c,0x16,0x16,0x3a
1174 AES_Te4:
1175         .byte   0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
1176         .byte   0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
1177         .byte   0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
1178         .byte   0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
1179         .byte   0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
1180         .byte   0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
1181         .byte   0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
1182         .byte   0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
1183         .byte   0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
1184         .byte   0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
1185         .byte   0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
1186         .byte   0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
1187         .byte   0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
1188         .byte   0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
1189         .byte   0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
1190         .byte   0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
1191         .byte   0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
1192         .byte   0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
1193         .byte   0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
1194         .byte   0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
1195         .byte   0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
1196         .byte   0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
1197         .byte   0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
1198         .byte   0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
1199         .byte   0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
1200         .byte   0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
1201         .byte   0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
1202         .byte   0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
1203         .byte   0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
1204         .byte   0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
1205         .byte   0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
1206         .byte   0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
1207 rcon:
1208         .byte   0x01,0x00,0x00,0x00,    0x02,0x00,0x00,0x00
1209         .byte   0x04,0x00,0x00,0x00,    0x08,0x00,0x00,0x00
1210         .byte   0x10,0x00,0x00,0x00,    0x20,0x00,0x00,0x00
1211         .byte   0x40,0x00,0x00,0x00,    0x80,0x00,0x00,0x00
1212         .byte   0x1B,0x00,0x00,0x00,    0x36,0x00,0x00,0x00
1213         .align  128
1214 AES_Td:
1215         .byte   0x51,0xf4,0xa7,0x50,    0x7e,0x41,0x65,0x53
1216         .byte   0x1a,0x17,0xa4,0xc3,    0x3a,0x27,0x5e,0x96
1217         .byte   0x3b,0xab,0x6b,0xcb,    0x1f,0x9d,0x45,0xf1
1218         .byte   0xac,0xfa,0x58,0xab,    0x4b,0xe3,0x03,0x93
1219         .byte   0x20,0x30,0xfa,0x55,    0xad,0x76,0x6d,0xf6
1220         .byte   0x88,0xcc,0x76,0x91,    0xf5,0x02,0x4c,0x25
1221         .byte   0x4f,0xe5,0xd7,0xfc,    0xc5,0x2a,0xcb,0xd7
1222         .byte   0x26,0x35,0x44,0x80,    0xb5,0x62,0xa3,0x8f
1223         .byte   0xde,0xb1,0x5a,0x49,    0x25,0xba,0x1b,0x67
1224         .byte   0x45,0xea,0x0e,0x98,    0x5d,0xfe,0xc0,0xe1
1225         .byte   0xc3,0x2f,0x75,0x02,    0x81,0x4c,0xf0,0x12
1226         .byte   0x8d,0x46,0x97,0xa3,    0x6b,0xd3,0xf9,0xc6
1227         .byte   0x03,0x8f,0x5f,0xe7,    0x15,0x92,0x9c,0x95
1228         .byte   0xbf,0x6d,0x7a,0xeb,    0x95,0x52,0x59,0xda
1229         .byte   0xd4,0xbe,0x83,0x2d,    0x58,0x74,0x21,0xd3
1230         .byte   0x49,0xe0,0x69,0x29,    0x8e,0xc9,0xc8,0x44
1231         .byte   0x75,0xc2,0x89,0x6a,    0xf4,0x8e,0x79,0x78
1232         .byte   0x99,0x58,0x3e,0x6b,    0x27,0xb9,0x71,0xdd
1233         .byte   0xbe,0xe1,0x4f,0xb6,    0xf0,0x88,0xad,0x17
1234         .byte   0xc9,0x20,0xac,0x66,    0x7d,0xce,0x3a,0xb4
1235         .byte   0x63,0xdf,0x4a,0x18,    0xe5,0x1a,0x31,0x82
1236         .byte   0x97,0x51,0x33,0x60,    0x62,0x53,0x7f,0x45
1237         .byte   0xb1,0x64,0x77,0xe0,    0xbb,0x6b,0xae,0x84
1238         .byte   0xfe,0x81,0xa0,0x1c,    0xf9,0x08,0x2b,0x94
1239         .byte   0x70,0x48,0x68,0x58,    0x8f,0x45,0xfd,0x19
1240         .byte   0x94,0xde,0x6c,0x87,    0x52,0x7b,0xf8,0xb7
1241         .byte   0xab,0x73,0xd3,0x23,    0x72,0x4b,0x02,0xe2
1242         .byte   0xe3,0x1f,0x8f,0x57,    0x66,0x55,0xab,0x2a
1243         .byte   0xb2,0xeb,0x28,0x07,    0x2f,0xb5,0xc2,0x03
1244         .byte   0x86,0xc5,0x7b,0x9a,    0xd3,0x37,0x08,0xa5
1245         .byte   0x30,0x28,0x87,0xf2,    0x23,0xbf,0xa5,0xb2
1246         .byte   0x02,0x03,0x6a,0xba,    0xed,0x16,0x82,0x5c
1247         .byte   0x8a,0xcf,0x1c,0x2b,    0xa7,0x79,0xb4,0x92
1248         .byte   0xf3,0x07,0xf2,0xf0,    0x4e,0x69,0xe2,0xa1
1249         .byte   0x65,0xda,0xf4,0xcd,    0x06,0x05,0xbe,0xd5
1250         .byte   0xd1,0x34,0x62,0x1f,    0xc4,0xa6,0xfe,0x8a
1251         .byte   0x34,0x2e,0x53,0x9d,    0xa2,0xf3,0x55,0xa0
1252         .byte   0x05,0x8a,0xe1,0x32,    0xa4,0xf6,0xeb,0x75
1253         .byte   0x0b,0x83,0xec,0x39,    0x40,0x60,0xef,0xaa
1254         .byte   0x5e,0x71,0x9f,0x06,    0xbd,0x6e,0x10,0x51
1255         .byte   0x3e,0x21,0x8a,0xf9,    0x96,0xdd,0x06,0x3d
1256         .byte   0xdd,0x3e,0x05,0xae,    0x4d,0xe6,0xbd,0x46
1257         .byte   0x91,0x54,0x8d,0xb5,    0x71,0xc4,0x5d,0x05
1258         .byte   0x04,0x06,0xd4,0x6f,    0x60,0x50,0x15,0xff
1259         .byte   0x19,0x98,0xfb,0x24,    0xd6,0xbd,0xe9,0x97
1260         .byte   0x89,0x40,0x43,0xcc,    0x67,0xd9,0x9e,0x77
1261         .byte   0xb0,0xe8,0x42,0xbd,    0x07,0x89,0x8b,0x88
1262         .byte   0xe7,0x19,0x5b,0x38,    0x79,0xc8,0xee,0xdb
1263         .byte   0xa1,0x7c,0x0a,0x47,    0x7c,0x42,0x0f,0xe9
1264         .byte   0xf8,0x84,0x1e,0xc9,    0x00,0x00,0x00,0x00
1265         .byte   0x09,0x80,0x86,0x83,    0x32,0x2b,0xed,0x48
1266         .byte   0x1e,0x11,0x70,0xac,    0x6c,0x5a,0x72,0x4e
1267         .byte   0xfd,0x0e,0xff,0xfb,    0x0f,0x85,0x38,0x56
1268         .byte   0x3d,0xae,0xd5,0x1e,    0x36,0x2d,0x39,0x27
1269         .byte   0x0a,0x0f,0xd9,0x64,    0x68,0x5c,0xa6,0x21
1270         .byte   0x9b,0x5b,0x54,0xd1,    0x24,0x36,0x2e,0x3a
1271         .byte   0x0c,0x0a,0x67,0xb1,    0x93,0x57,0xe7,0x0f
1272         .byte   0xb4,0xee,0x96,0xd2,    0x1b,0x9b,0x91,0x9e
1273         .byte   0x80,0xc0,0xc5,0x4f,    0x61,0xdc,0x20,0xa2
1274         .byte   0x5a,0x77,0x4b,0x69,    0x1c,0x12,0x1a,0x16
1275         .byte   0xe2,0x93,0xba,0x0a,    0xc0,0xa0,0x2a,0xe5
1276         .byte   0x3c,0x22,0xe0,0x43,    0x12,0x1b,0x17,0x1d
1277         .byte   0x0e,0x09,0x0d,0x0b,    0xf2,0x8b,0xc7,0xad
1278         .byte   0x2d,0xb6,0xa8,0xb9,    0x14,0x1e,0xa9,0xc8
1279         .byte   0x57,0xf1,0x19,0x85,    0xaf,0x75,0x07,0x4c
1280         .byte   0xee,0x99,0xdd,0xbb,    0xa3,0x7f,0x60,0xfd
1281         .byte   0xf7,0x01,0x26,0x9f,    0x5c,0x72,0xf5,0xbc
1282         .byte   0x44,0x66,0x3b,0xc5,    0x5b,0xfb,0x7e,0x34
1283         .byte   0x8b,0x43,0x29,0x76,    0xcb,0x23,0xc6,0xdc
1284         .byte   0xb6,0xed,0xfc,0x68,    0xb8,0xe4,0xf1,0x63
1285         .byte   0xd7,0x31,0xdc,0xca,    0x42,0x63,0x85,0x10
1286         .byte   0x13,0x97,0x22,0x40,    0x84,0xc6,0x11,0x20
1287         .byte   0x85,0x4a,0x24,0x7d,    0xd2,0xbb,0x3d,0xf8
1288         .byte   0xae,0xf9,0x32,0x11,    0xc7,0x29,0xa1,0x6d
1289         .byte   0x1d,0x9e,0x2f,0x4b,    0xdc,0xb2,0x30,0xf3
1290         .byte   0x0d,0x86,0x52,0xec,    0x77,0xc1,0xe3,0xd0
1291         .byte   0x2b,0xb3,0x16,0x6c,    0xa9,0x70,0xb9,0x99
1292         .byte   0x11,0x94,0x48,0xfa,    0x47,0xe9,0x64,0x22
1293         .byte   0xa8,0xfc,0x8c,0xc4,    0xa0,0xf0,0x3f,0x1a
1294         .byte   0x56,0x7d,0x2c,0xd8,    0x22,0x33,0x90,0xef
1295         .byte   0x87,0x49,0x4e,0xc7,    0xd9,0x38,0xd1,0xc1
1296         .byte   0x8c,0xca,0xa2,0xfe,    0x98,0xd4,0x0b,0x36
1297         .byte   0xa6,0xf5,0x81,0xcf,    0xa5,0x7a,0xde,0x28
1298         .byte   0xda,0xb7,0x8e,0x26,    0x3f,0xad,0xbf,0xa4
1299         .byte   0x2c,0x3a,0x9d,0xe4,    0x50,0x78,0x92,0x0d
1300         .byte   0x6a,0x5f,0xcc,0x9b,    0x54,0x7e,0x46,0x62
1301         .byte   0xf6,0x8d,0x13,0xc2,    0x90,0xd8,0xb8,0xe8
1302         .byte   0x2e,0x39,0xf7,0x5e,    0x82,0xc3,0xaf,0xf5
1303         .byte   0x9f,0x5d,0x80,0xbe,    0x69,0xd0,0x93,0x7c
1304         .byte   0x6f,0xd5,0x2d,0xa9,    0xcf,0x25,0x12,0xb3
1305         .byte   0xc8,0xac,0x99,0x3b,    0x10,0x18,0x7d,0xa7
1306         .byte   0xe8,0x9c,0x63,0x6e,    0xdb,0x3b,0xbb,0x7b
1307         .byte   0xcd,0x26,0x78,0x09,    0x6e,0x59,0x18,0xf4
1308         .byte   0xec,0x9a,0xb7,0x01,    0x83,0x4f,0x9a,0xa8
1309         .byte   0xe6,0x95,0x6e,0x65,    0xaa,0xff,0xe6,0x7e
1310         .byte   0x21,0xbc,0xcf,0x08,    0xef,0x15,0xe8,0xe6
1311         .byte   0xba,0xe7,0x9b,0xd9,    0x4a,0x6f,0x36,0xce
1312         .byte   0xea,0x9f,0x09,0xd4,    0x29,0xb0,0x7c,0xd6
1313         .byte   0x31,0xa4,0xb2,0xaf,    0x2a,0x3f,0x23,0x31
1314         .byte   0xc6,0xa5,0x94,0x30,    0x35,0xa2,0x66,0xc0
1315         .byte   0x74,0x4e,0xbc,0x37,    0xfc,0x82,0xca,0xa6
1316         .byte   0xe0,0x90,0xd0,0xb0,    0x33,0xa7,0xd8,0x15
1317         .byte   0xf1,0x04,0x98,0x4a,    0x41,0xec,0xda,0xf7
1318         .byte   0x7f,0xcd,0x50,0x0e,    0x17,0x91,0xf6,0x2f
1319         .byte   0x76,0x4d,0xd6,0x8d,    0x43,0xef,0xb0,0x4d
1320         .byte   0xcc,0xaa,0x4d,0x54,    0xe4,0x96,0x04,0xdf
1321         .byte   0x9e,0xd1,0xb5,0xe3,    0x4c,0x6a,0x88,0x1b
1322         .byte   0xc1,0x2c,0x1f,0xb8,    0x46,0x65,0x51,0x7f
1323         .byte   0x9d,0x5e,0xea,0x04,    0x01,0x8c,0x35,0x5d
1324         .byte   0xfa,0x87,0x74,0x73,    0xfb,0x0b,0x41,0x2e
1325         .byte   0xb3,0x67,0x1d,0x5a,    0x92,0xdb,0xd2,0x52
1326         .byte   0xe9,0x10,0x56,0x33,    0x6d,0xd6,0x47,0x13
1327         .byte   0x9a,0xd7,0x61,0x8c,    0x37,0xa1,0x0c,0x7a
1328         .byte   0x59,0xf8,0x14,0x8e,    0xeb,0x13,0x3c,0x89
1329         .byte   0xce,0xa9,0x27,0xee,    0xb7,0x61,0xc9,0x35
1330         .byte   0xe1,0x1c,0xe5,0xed,    0x7a,0x47,0xb1,0x3c
1331         .byte   0x9c,0xd2,0xdf,0x59,    0x55,0xf2,0x73,0x3f
1332         .byte   0x18,0x14,0xce,0x79,    0x73,0xc7,0x37,0xbf
1333         .byte   0x53,0xf7,0xcd,0xea,    0x5f,0xfd,0xaa,0x5b
1334         .byte   0xdf,0x3d,0x6f,0x14,    0x78,0x44,0xdb,0x86
1335         .byte   0xca,0xaf,0xf3,0x81,    0xb9,0x68,0xc4,0x3e
1336         .byte   0x38,0x24,0x34,0x2c,    0xc2,0xa3,0x40,0x5f
1337         .byte   0x16,0x1d,0xc3,0x72,    0xbc,0xe2,0x25,0x0c
1338         .byte   0x28,0x3c,0x49,0x8b,    0xff,0x0d,0x95,0x41
1339         .byte   0x39,0xa8,0x01,0x71,    0x08,0x0c,0xb3,0xde
1340         .byte   0xd8,0xb4,0xe4,0x9c,    0x64,0x56,0xc1,0x90
1341         .byte   0x7b,0xcb,0x84,0x61,    0xd5,0x32,0xb6,0x70
1342         .byte   0x48,0x6c,0x5c,0x74,    0xd0,0xb8,0x57,0x42
1343 AES_Td4:
1344         .byte   0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
1345         .byte   0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
1346         .byte   0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
1347         .byte   0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
1348         .byte   0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
1349         .byte   0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
1350         .byte   0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
1351         .byte   0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
1352         .byte   0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
1353         .byte   0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
1354         .byte   0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
1355         .byte   0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
1356         .byte   0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
1357         .byte   0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
1358         .byte   0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
1359         .byte   0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
1360         .byte   0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
1361         .byte   0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
1362         .byte   0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
1363         .byte   0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
1364         .byte   0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
1365         .byte   0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
1366         .byte   0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
1367         .byte   0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
1368         .byte   0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
1369         .byte   0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
1370         .byte   0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
1371         .byte   0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
1372         .byte   0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
1373         .byte   0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
1374         .byte   0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
1375         .byte   0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
1376         .cstring "AES for C64x+, CRYPTOGAMS by <appro\@openssl.org>"
1377         .align  4
1378 ___
1379
1380 print $code;
1381 close STDOUT or die "error closing STDOUT";