aes/asm/{aes-armv4|bsaes-armv7}.pl: make it work with binutils-2.29.
[openssl.git] / crypto / aes / asm / aes-c64xplus.pl
1 #! /usr/bin/env perl
2 # Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved.
3 #
4 # Licensed under the OpenSSL license (the "License").  You may not use
5 # this file except in compliance with the License.  You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
8
9 #
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
16 #
17 # [Endian-neutral] AES for C64x+.
18 #
19 # Even though SPLOOPs are scheduled for 13 cycles, and thus expected
20 # performance is ~8.5 cycles per byte processed with 128-bit key,
21 # measured performance turned to be ~10 cycles per byte. Discrepancy
22 # must be caused by limitations of L1D memory banking(*), see SPRU871
23 # TI publication for further details. If any consolation it's still
24 # ~20% faster than TI's linear assembly module anyway... Compared to
25 # aes_core.c compiled with cl6x 6.0 with -mv6400+ -o2 options this
26 # code is 3.75x faster and almost 3x smaller (tables included).
27 #
28 # (*)   This means that there might be subtle correlation between data
29 #       and timing and one can wonder if it can be ... attacked:-(
30 #       On the other hand this also means that *if* one chooses to
31 #       implement *4* T-tables variant [instead of 1 T-table as in
32 #       this implementation, or in addition to], then one ought to
33 #       *interleave* them. Even though it complicates addressing,
34 #       references to interleaved tables would be guaranteed not to
35 #       clash. I reckon that it should be possible to break 8 cycles
36 #       per byte "barrier," i.e. improve by ~20%, naturally at the
37 #       cost of 8x increased pressure on L1D. 8x because you'd have
38 #       to interleave both Te and Td tables...
39
40 while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
41 open STDOUT,">$output";
42
43 ($TEA,$TEB)=("A5","B5");
44 ($KPA,$KPB)=("A3","B1");
45 @K=("A6","B6","A7","B7");
46 @s=("A8","B8","A9","B9");
47 @Te0=@Td0=("A16","B16","A17","B17");
48 @Te1=@Td1=("A18","B18","A19","B19");
49 @Te2=@Td2=("A20","B20","A21","B21");
50 @Te3=@Td3=("A22","B22","A23","B23");
51
52 $code=<<___;
53         .text
54
55         .if     .ASSEMBLER_VERSION<7000000
56         .asg    0,__TI_EABI__
57         .endif
58         .if     __TI_EABI__
59         .nocmp
60         .asg    AES_encrypt,_AES_encrypt
61         .asg    AES_decrypt,_AES_decrypt
62         .asg    AES_set_encrypt_key,_AES_set_encrypt_key
63         .asg    AES_set_decrypt_key,_AES_set_decrypt_key
64         .asg    AES_ctr32_encrypt,_AES_ctr32_encrypt
65         .endif
66
67         .asg    B3,RA
68         .asg    A4,INP
69         .asg    B4,OUT
70         .asg    A6,KEY
71         .asg    A4,RET
72         .asg    B15,SP
73
74         .eval   24,EXT0
75         .eval   16,EXT1
76         .eval   8,EXT2
77         .eval   0,EXT3
78         .eval   8,TBL1
79         .eval   16,TBL2
80         .eval   24,TBL3
81
82         .if     .BIG_ENDIAN
83         .eval   24-EXT0,EXT0
84         .eval   24-EXT1,EXT1
85         .eval   24-EXT2,EXT2
86         .eval   24-EXT3,EXT3
87         .eval   32-TBL1,TBL1
88         .eval   32-TBL2,TBL2
89         .eval   32-TBL3,TBL3
90         .endif
91
92         .global _AES_encrypt
93 _AES_encrypt:
94         .asmfunc
95         MVK     1,B2
96 __encrypt:
97         .if     __TI_EABI__
98    [B2] LDNDW   *INP++,A9:A8                    ; load input
99 ||      MVKL    \$PCR_OFFSET(AES_Te,__encrypt),$TEA
100 ||      ADDKPC  __encrypt,B0
101    [B2] LDNDW   *INP++,B9:B8
102 ||      MVKH    \$PCR_OFFSET(AES_Te,__encrypt),$TEA
103 ||      ADD     0,KEY,$KPA
104 ||      ADD     4,KEY,$KPB
105         .else
106    [B2] LDNDW   *INP++,A9:A8                    ; load input
107 ||      MVKL    (AES_Te-__encrypt),$TEA
108 ||      ADDKPC  __encrypt,B0
109    [B2] LDNDW   *INP++,B9:B8
110 ||      MVKH    (AES_Te-__encrypt),$TEA
111 ||      ADD     0,KEY,$KPA
112 ||      ADD     4,KEY,$KPB
113         .endif
114         LDW     *$KPA++[2],$Te0[0]              ; zero round key
115 ||      LDW     *$KPB++[2],$Te0[1]
116 ||      MVK     60,A0
117 ||      ADD     B0,$TEA,$TEA                    ; AES_Te
118         LDW     *KEY[A0],B0                     ; rounds
119 ||      MVK     1024,A0                         ; sizeof(AES_Te)
120         LDW     *$KPA++[2],$Te0[2]
121 ||      LDW     *$KPB++[2],$Te0[3]
122 ||      MV      $TEA,$TEB
123         NOP
124         .if     .BIG_ENDIAN
125         MV      A9,$s[0]
126 ||      MV      A8,$s[1]
127 ||      MV      B9,$s[2]
128 ||      MV      B8,$s[3]
129         .else
130         MV      A8,$s[0]
131 ||      MV      A9,$s[1]
132 ||      MV      B8,$s[2]
133 ||      MV      B9,$s[3]
134         .endif
135         XOR     $Te0[0],$s[0],$s[0]
136 ||      XOR     $Te0[1],$s[1],$s[1]
137 ||      LDW     *$KPA++[2],$K[0]                ; 1st round key
138 ||      LDW     *$KPB++[2],$K[1]
139         SUB     B0,2,B0
140
141         SPLOOPD 13
142 ||      MVC     B0,ILC
143 ||      LDW     *$KPA++[2],$K[2]
144 ||      LDW     *$KPB++[2],$K[3]
145 ;;====================================================================
146         EXTU    $s[1],EXT1,24,$Te1[1]
147 ||      EXTU    $s[0],EXT3,24,$Te3[0]
148         LDW     *${TEB}[$Te1[1]],$Te1[1]        ; Te1[s1>>8],   t0
149 ||      LDW     *${TEA}[$Te3[0]],$Te3[0]        ; Te3[s0>>24],  t1
150 ||      XOR     $s[2],$Te0[2],$s[2]             ; modulo-scheduled
151 ||      XOR     $s[3],$Te0[3],$s[3]             ; modulo-scheduled
152 ||      EXTU    $s[1],EXT3,24,$Te3[1]
153 ||      EXTU    $s[0],EXT1,24,$Te1[0]
154         LDW     *${TEB}[$Te3[1]],$Te3[1]        ; Te3[s1>>24],  t2
155 ||      LDW     *${TEA}[$Te1[0]],$Te1[0]        ; Te1[s0>>8],   t3
156 ||      EXTU    $s[2],EXT2,24,$Te2[2]
157 ||      EXTU    $s[3],EXT2,24,$Te2[3]
158         LDW     *${TEA}[$Te2[2]],$Te2[2]        ; Te2[s2>>16],  t0
159 ||      LDW     *${TEB}[$Te2[3]],$Te2[3]        ; Te2[s3>>16],  t1
160 ||      EXTU    $s[3],EXT3,24,$Te3[3]
161 ||      EXTU    $s[2],EXT1,24,$Te1[2]
162         LDW     *${TEB}[$Te3[3]],$Te3[3]        ; Te3[s3>>24],  t0
163 ||      LDW     *${TEA}[$Te1[2]],$Te1[2]        ; Te1[s2>>8],   t1
164 ||      EXTU    $s[0],EXT2,24,$Te2[0]
165 ||      EXTU    $s[1],EXT2,24,$Te2[1]
166         LDW     *${TEA}[$Te2[0]],$Te2[0]        ; Te2[s0>>16],  t2
167 ||      LDW     *${TEB}[$Te2[1]],$Te2[1]        ; Te2[s1>>16],  t3
168 ||      EXTU    $s[3],EXT1,24,$Te1[3]
169 ||      EXTU    $s[2],EXT3,24,$Te3[2]
170         LDW     *${TEB}[$Te1[3]],$Te1[3]        ; Te1[s3>>8],   t2
171 ||      LDW     *${TEA}[$Te3[2]],$Te3[2]        ; Te3[s2>>24],  t3
172 ||      ROTL    $Te1[1],TBL1,$Te3[0]            ; t0
173 ||      ROTL    $Te3[0],TBL3,$Te1[1]            ; t1
174 ||      EXTU    $s[0],EXT0,24,$Te0[0]
175 ||      EXTU    $s[1],EXT0,24,$Te0[1]
176         LDW     *${TEA}[$Te0[0]],$Te0[0]        ; Te0[s0],      t0
177 ||      LDW     *${TEB}[$Te0[1]],$Te0[1]        ; Te0[s1],      t1
178 ||      ROTL    $Te3[1],TBL3,$Te1[0]            ; t2
179 ||      ROTL    $Te1[0],TBL1,$Te3[1]            ; t3
180 ||      EXTU    $s[2],EXT0,24,$Te0[2]
181 ||      EXTU    $s[3],EXT0,24,$Te0[3]
182         LDW     *${TEA}[$Te0[2]],$Te0[2]        ; Te0[s2],      t2
183 ||      LDW     *${TEB}[$Te0[3]],$Te0[3]        ; Te0[s3],      t3
184 ||      ROTL    $Te2[2],TBL2,$Te2[2]            ; t0
185 ||      ROTL    $Te2[3],TBL2,$Te2[3]            ; t1
186 ||      XOR     $K[0],$Te3[0],$s[0]
187 ||      XOR     $K[1],$Te1[1],$s[1]
188         ROTL    $Te3[3],TBL3,$Te1[2]            ; t0
189 ||      ROTL    $Te1[2],TBL1,$Te3[3]            ; t1
190 ||      XOR     $K[2],$Te1[0],$s[2]
191 ||      XOR     $K[3],$Te3[1],$s[3]
192 ||      LDW     *$KPA++[2],$K[0]                ; next round key
193 ||      LDW     *$KPB++[2],$K[1]
194         ROTL    $Te2[0],TBL2,$Te2[0]            ; t2
195 ||      ROTL    $Te2[1],TBL2,$Te2[1]            ; t3
196 ||      XOR     $s[0],$Te2[2],$s[0]
197 ||      XOR     $s[1],$Te2[3],$s[1]
198 ||      LDW     *$KPA++[2],$K[2]
199 ||      LDW     *$KPB++[2],$K[3]
200         ROTL    $Te1[3],TBL1,$Te3[2]            ; t2
201 ||      ROTL    $Te3[2],TBL3,$Te1[3]            ; t3
202 ||      XOR     $s[0],$Te1[2],$s[0]
203 ||      XOR     $s[1],$Te3[3],$s[1]
204         XOR     $s[2],$Te2[0],$s[2]
205 ||      XOR     $s[3],$Te2[1],$s[3]
206 ||      XOR     $s[0],$Te0[0],$s[0]
207 ||      XOR     $s[1],$Te0[1],$s[1]
208         SPKERNEL
209 ||      XOR.L   $s[2],$Te3[2],$s[2]
210 ||      XOR.L   $s[3],$Te1[3],$s[3]
211 ;;====================================================================
212         ADD.D   ${TEA},A0,${TEA}                ; point to Te4
213 ||      ADD.D   ${TEB},A0,${TEB}
214 ||      EXTU    $s[1],EXT1,24,$Te1[1]
215 ||      EXTU    $s[0],EXT3,24,$Te3[0]
216         LDBU    *${TEB}[$Te1[1]],$Te1[1]        ; Te1[s1>>8],   t0
217 ||      LDBU    *${TEA}[$Te3[0]],$Te3[0]        ; Te3[s0>>24],  t1
218 ||      XOR     $s[2],$Te0[2],$s[2]             ; modulo-scheduled
219 ||      XOR     $s[3],$Te0[3],$s[3]             ; modulo-scheduled
220 ||      EXTU    $s[0],EXT0,24,$Te0[0]
221 ||      EXTU    $s[1],EXT0,24,$Te0[1]
222         LDBU    *${TEA}[$Te0[0]],$Te0[0]        ; Te0[s0],      t0
223 ||      LDBU    *${TEB}[$Te0[1]],$Te0[1]        ; Te0[s1],      t1
224 ||      EXTU    $s[3],EXT3,24,$Te3[3]
225 ||      EXTU    $s[2],EXT1,24,$Te1[2]
226         LDBU    *${TEB}[$Te3[3]],$Te3[3]        ; Te3[s3>>24],  t0
227 ||      LDBU    *${TEA}[$Te1[2]],$Te1[2]        ; Te1[s2>>8],   t1
228 ||      EXTU    $s[2],EXT2,24,$Te2[2]
229 ||      EXTU    $s[3],EXT2,24,$Te2[3]
230         LDBU    *${TEA}[$Te2[2]],$Te2[2]        ; Te2[s2>>16],  t0
231 ||      LDBU    *${TEB}[$Te2[3]],$Te2[3]        ; Te2[s3>>16],  t1
232 ||      EXTU    $s[1],EXT3,24,$Te3[1]
233 ||      EXTU    $s[0],EXT1,24,$Te1[0]
234         LDBU    *${TEB}[$Te3[1]],$Te3[1]        ; Te3[s1>>24],  t2
235 ||      LDBU    *${TEA}[$Te1[0]],$Te1[0]        ; Te1[s0>>8],   t3
236 ||      EXTU    $s[3],EXT1,24,$Te1[3]
237 ||      EXTU    $s[2],EXT3,24,$Te3[2]
238         LDBU    *${TEB}[$Te1[3]],$Te1[3]        ; Te1[s3>>8],   t2
239 ||      LDBU    *${TEA}[$Te3[2]],$Te3[2]        ; Te3[s2>>24],  t3
240 ||      EXTU    $s[2],EXT0,24,$Te0[2]
241 ||      EXTU    $s[3],EXT0,24,$Te0[3]
242         LDBU    *${TEA}[$Te0[2]],$Te0[2]        ; Te0[s2],      t2
243 ||      LDBU    *${TEB}[$Te0[3]],$Te0[3]        ; Te0[s3],      t3
244 ||      EXTU    $s[0],EXT2,24,$Te2[0]
245 ||      EXTU    $s[1],EXT2,24,$Te2[1]
246         LDBU    *${TEA}[$Te2[0]],$Te2[0]        ; Te2[s0>>16],  t2
247 ||      LDBU    *${TEB}[$Te2[1]],$Te2[1]        ; Te2[s1>>16],  t3
248
249         .if     .BIG_ENDIAN
250         PACK2   $Te0[0],$Te1[1],$Te0[0]
251 ||      PACK2   $Te0[1],$Te1[2],$Te0[1]
252         PACK2   $Te2[2],$Te3[3],$Te2[2]
253 ||      PACK2   $Te2[3],$Te3[0],$Te2[3]
254         PACKL4  $Te0[0],$Te2[2],$Te0[0]
255 ||      PACKL4  $Te0[1],$Te2[3],$Te0[1]
256         XOR     $K[0],$Te0[0],$Te0[0]           ; s[0]
257 ||      XOR     $K[1],$Te0[1],$Te0[1]           ; s[1]
258
259         PACK2   $Te0[2],$Te1[3],$Te0[2]
260 ||      PACK2   $Te0[3],$Te1[0],$Te0[3]
261         PACK2   $Te2[0],$Te3[1],$Te2[0]
262 ||      PACK2   $Te2[1],$Te3[2],$Te2[1]
263 ||      BNOP    RA
264         PACKL4  $Te0[2],$Te2[0],$Te0[2]
265 ||      PACKL4  $Te0[3],$Te2[1],$Te0[3]
266         XOR     $K[2],$Te0[2],$Te0[2]           ; s[2]
267 ||      XOR     $K[3],$Te0[3],$Te0[3]           ; s[3]
268
269         MV      $Te0[0],A9
270 ||      MV      $Te0[1],A8
271         MV      $Te0[2],B9
272 ||      MV      $Te0[3],B8
273 || [B2] STNDW   A9:A8,*OUT++
274    [B2] STNDW   B9:B8,*OUT++
275         .else
276         PACK2   $Te1[1],$Te0[0],$Te1[1]
277 ||      PACK2   $Te1[2],$Te0[1],$Te1[2]
278         PACK2   $Te3[3],$Te2[2],$Te3[3]
279 ||      PACK2   $Te3[0],$Te2[3],$Te3[0]
280         PACKL4  $Te3[3],$Te1[1],$Te1[1]
281 ||      PACKL4  $Te3[0],$Te1[2],$Te1[2]
282         XOR     $K[0],$Te1[1],$Te1[1]           ; s[0]
283 ||      XOR     $K[1],$Te1[2],$Te1[2]           ; s[1]
284
285         PACK2   $Te1[3],$Te0[2],$Te1[3]
286 ||      PACK2   $Te1[0],$Te0[3],$Te1[0]
287         PACK2   $Te3[1],$Te2[0],$Te3[1]
288 ||      PACK2   $Te3[2],$Te2[1],$Te3[2]
289 ||      BNOP    RA
290         PACKL4  $Te3[1],$Te1[3],$Te1[3]
291 ||      PACKL4  $Te3[2],$Te1[0],$Te1[0]
292         XOR     $K[2],$Te1[3],$Te1[3]           ; s[2]
293 ||      XOR     $K[3],$Te1[0],$Te1[0]           ; s[3]
294
295         MV      $Te1[1],A8
296 ||      MV      $Te1[2],A9
297         MV      $Te1[3],B8
298 ||      MV      $Te1[0],B9
299 || [B2] STNDW   A9:A8,*OUT++
300    [B2] STNDW   B9:B8,*OUT++
301         .endif
302         .endasmfunc
303
304         .global _AES_decrypt
305 _AES_decrypt:
306         .asmfunc
307         MVK     1,B2
308 __decrypt:
309         .if     __TI_EABI__
310    [B2] LDNDW   *INP++,A9:A8                    ; load input
311 ||      MVKL    \$PCR_OFFSET(AES_Td,__decrypt),$TEA
312 ||      ADDKPC  __decrypt,B0
313    [B2] LDNDW   *INP++,B9:B8
314 ||      MVKH    \$PCR_OFFSET(AES_Td,__decrypt),$TEA
315 ||      ADD     0,KEY,$KPA
316 ||      ADD     4,KEY,$KPB
317         .else
318    [B2] LDNDW   *INP++,A9:A8                    ; load input
319 ||      MVKL    (AES_Td-__decrypt),$TEA
320 ||      ADDKPC  __decrypt,B0
321    [B2] LDNDW   *INP++,B9:B8
322 ||      MVKH    (AES_Td-__decrypt),$TEA
323 ||      ADD     0,KEY,$KPA
324 ||      ADD     4,KEY,$KPB
325         .endif
326         LDW     *$KPA++[2],$Td0[0]              ; zero round key
327 ||      LDW     *$KPB++[2],$Td0[1]
328 ||      MVK     60,A0
329 ||      ADD     B0,$TEA,$TEA                    ; AES_Td
330         LDW     *KEY[A0],B0                     ; rounds
331 ||      MVK     1024,A0                         ; sizeof(AES_Td)
332         LDW     *$KPA++[2],$Td0[2]
333 ||      LDW     *$KPB++[2],$Td0[3]
334 ||      MV      $TEA,$TEB
335         NOP
336         .if     .BIG_ENDIAN
337         MV      A9,$s[0]
338 ||      MV      A8,$s[1]
339 ||      MV      B9,$s[2]
340 ||      MV      B8,$s[3]
341         .else
342         MV      A8,$s[0]
343 ||      MV      A9,$s[1]
344 ||      MV      B8,$s[2]
345 ||      MV      B9,$s[3]
346         .endif
347         XOR     $Td0[0],$s[0],$s[0]
348 ||      XOR     $Td0[1],$s[1],$s[1]
349 ||      LDW     *$KPA++[2],$K[0]                ; 1st round key
350 ||      LDW     *$KPB++[2],$K[1]
351         SUB     B0,2,B0
352
353         SPLOOPD 13
354 ||      MVC     B0,ILC
355 ||      LDW     *$KPA++[2],$K[2]
356 ||      LDW     *$KPB++[2],$K[3]
357 ;;====================================================================
358         EXTU    $s[1],EXT3,24,$Td3[1]
359 ||      EXTU    $s[0],EXT1,24,$Td1[0]
360         LDW     *${TEB}[$Td3[1]],$Td3[1]        ; Td3[s1>>24],  t0
361 ||      LDW     *${TEA}[$Td1[0]],$Td1[0]        ; Td1[s0>>8],   t1
362 ||      XOR     $s[2],$Td0[2],$s[2]             ; modulo-scheduled
363 ||      XOR     $s[3],$Td0[3],$s[3]             ; modulo-scheduled
364 ||      EXTU    $s[1],EXT1,24,$Td1[1]
365 ||      EXTU    $s[0],EXT3,24,$Td3[0]
366         LDW     *${TEB}[$Td1[1]],$Td1[1]        ; Td1[s1>>8],   t2
367 ||      LDW     *${TEA}[$Td3[0]],$Td3[0]        ; Td3[s0>>24],  t3
368 ||      EXTU    $s[2],EXT2,24,$Td2[2]
369 ||      EXTU    $s[3],EXT2,24,$Td2[3]
370         LDW     *${TEA}[$Td2[2]],$Td2[2]        ; Td2[s2>>16],  t0
371 ||      LDW     *${TEB}[$Td2[3]],$Td2[3]        ; Td2[s3>>16],  t1
372 ||      EXTU    $s[3],EXT1,24,$Td1[3]
373 ||      EXTU    $s[2],EXT3,24,$Td3[2]
374         LDW     *${TEB}[$Td1[3]],$Td1[3]        ; Td1[s3>>8],   t0
375 ||      LDW     *${TEA}[$Td3[2]],$Td3[2]        ; Td3[s2>>24],  t1
376 ||      EXTU    $s[0],EXT2,24,$Td2[0]
377 ||      EXTU    $s[1],EXT2,24,$Td2[1]
378         LDW     *${TEA}[$Td2[0]],$Td2[0]        ; Td2[s0>>16],  t2
379 ||      LDW     *${TEB}[$Td2[1]],$Td2[1]        ; Td2[s1>>16],  t3
380 ||      EXTU    $s[3],EXT3,24,$Td3[3]
381 ||      EXTU    $s[2],EXT1,24,$Td1[2]
382         LDW     *${TEB}[$Td3[3]],$Td3[3]        ; Td3[s3>>24],  t2
383 ||      LDW     *${TEA}[$Td1[2]],$Td1[2]        ; Td1[s2>>8],   t3
384 ||      ROTL    $Td3[1],TBL3,$Td1[0]            ; t0
385 ||      ROTL    $Td1[0],TBL1,$Td3[1]            ; t1
386 ||      EXTU    $s[0],EXT0,24,$Td0[0]
387 ||      EXTU    $s[1],EXT0,24,$Td0[1]
388         LDW     *${TEA}[$Td0[0]],$Td0[0]        ; Td0[s0],      t0
389 ||      LDW     *${TEB}[$Td0[1]],$Td0[1]        ; Td0[s1],      t1
390 ||      ROTL    $Td1[1],TBL1,$Td3[0]            ; t2
391 ||      ROTL    $Td3[0],TBL3,$Td1[1]            ; t3
392 ||      EXTU    $s[2],EXT0,24,$Td0[2]
393 ||      EXTU    $s[3],EXT0,24,$Td0[3]
394         LDW     *${TEA}[$Td0[2]],$Td0[2]        ; Td0[s2],      t2
395 ||      LDW     *${TEB}[$Td0[3]],$Td0[3]        ; Td0[s3],      t3
396 ||      ROTL    $Td2[2],TBL2,$Td2[2]            ; t0
397 ||      ROTL    $Td2[3],TBL2,$Td2[3]            ; t1
398 ||      XOR     $K[0],$Td1[0],$s[0]
399 ||      XOR     $K[1],$Td3[1],$s[1]
400         ROTL    $Td1[3],TBL1,$Td3[2]            ; t0
401 ||      ROTL    $Td3[2],TBL3,$Td1[3]            ; t1
402 ||      XOR     $K[2],$Td3[0],$s[2]
403 ||      XOR     $K[3],$Td1[1],$s[3]
404 ||      LDW     *$KPA++[2],$K[0]                ; next round key
405 ||      LDW     *$KPB++[2],$K[1]
406         ROTL    $Td2[0],TBL2,$Td2[0]            ; t2
407 ||      ROTL    $Td2[1],TBL2,$Td2[1]            ; t3
408 ||      XOR     $s[0],$Td2[2],$s[0]
409 ||      XOR     $s[1],$Td2[3],$s[1]
410 ||      LDW     *$KPA++[2],$K[2]
411 ||      LDW     *$KPB++[2],$K[3]
412         ROTL    $Td3[3],TBL3,$Td1[2]            ; t2
413 ||      ROTL    $Td1[2],TBL1,$Td3[3]            ; t3
414 ||      XOR     $s[0],$Td3[2],$s[0]
415 ||      XOR     $s[1],$Td1[3],$s[1]
416         XOR     $s[2],$Td2[0],$s[2]
417 ||      XOR     $s[3],$Td2[1],$s[3]
418 ||      XOR     $s[0],$Td0[0],$s[0]
419 ||      XOR     $s[1],$Td0[1],$s[1]
420         SPKERNEL
421 ||      XOR.L   $s[2],$Td1[2],$s[2]
422 ||      XOR.L   $s[3],$Td3[3],$s[3]
423 ;;====================================================================
424         ADD.D   ${TEA},A0,${TEA}                ; point to Td4
425 ||      ADD.D   ${TEB},A0,${TEB}
426 ||      EXTU    $s[1],EXT3,24,$Td3[1]
427 ||      EXTU    $s[0],EXT1,24,$Td1[0]
428         LDBU    *${TEB}[$Td3[1]],$Td3[1]        ; Td3[s1>>24],  t0
429 ||      LDBU    *${TEA}[$Td1[0]],$Td1[0]        ; Td1[s0>>8],   t1
430 ||      XOR     $s[2],$Td0[2],$s[2]             ; modulo-scheduled
431 ||      XOR     $s[3],$Td0[3],$s[3]             ; modulo-scheduled
432 ||      EXTU    $s[0],EXT0,24,$Td0[0]
433 ||      EXTU    $s[1],EXT0,24,$Td0[1]
434         LDBU    *${TEA}[$Td0[0]],$Td0[0]        ; Td0[s0],      t0
435 ||      LDBU    *${TEB}[$Td0[1]],$Td0[1]        ; Td0[s1],      t1
436 ||      EXTU    $s[2],EXT2,24,$Td2[2]
437 ||      EXTU    $s[3],EXT2,24,$Td2[3]
438         LDBU    *${TEA}[$Td2[2]],$Td2[2]        ; Td2[s2>>16],  t0
439 ||      LDBU    *${TEB}[$Td2[3]],$Td2[3]        ; Td2[s3>>16],  t1
440 ||      EXTU    $s[3],EXT1,24,$Td1[3]
441 ||      EXTU    $s[2],EXT3,24,$Td3[2]
442         LDBU    *${TEB}[$Td1[3]],$Td1[3]        ; Td1[s3>>8],   t0
443 ||      LDBU    *${TEA}[$Td3[2]],$Td3[2]        ; Td3[s2>>24],  t1
444 ||      EXTU    $s[1],EXT1,24,$Td1[1]
445 ||      EXTU    $s[0],EXT3,24,$Td3[0]
446         LDBU    *${TEB}[$Td1[1]],$Td1[1]        ; Td1[s1>>8],   t2
447 ||      LDBU    *${TEA}[$Td3[0]],$Td3[0]        ; Td3[s0>>24],  t3
448 ||      EXTU    $s[0],EXT2,24,$Td2[0]
449 ||      EXTU    $s[1],EXT2,24,$Td2[1]
450         LDBU    *${TEA}[$Td2[0]],$Td2[0]        ; Td2[s0>>16],  t2
451 ||      LDBU    *${TEB}[$Td2[1]],$Td2[1]        ; Td2[s1>>16],  t3
452 ||      EXTU    $s[3],EXT3,24,$Td3[3]
453 ||      EXTU    $s[2],EXT1,24,$Td1[2]
454         LDBU    *${TEB}[$Td3[3]],$Td3[3]        ; Td3[s3>>24],  t2
455 ||      LDBU    *${TEA}[$Td1[2]],$Td1[2]        ; Td1[s2>>8],   t3
456 ||      EXTU    $s[2],EXT0,24,$Td0[2]
457 ||      EXTU    $s[3],EXT0,24,$Td0[3]
458         LDBU    *${TEA}[$Td0[2]],$Td0[2]        ; Td0[s2],      t2
459 ||      LDBU    *${TEB}[$Td0[3]],$Td0[3]        ; Td0[s3],      t3
460
461         .if     .BIG_ENDIAN
462         PACK2   $Td0[0],$Td1[3],$Td0[0]
463 ||      PACK2   $Td0[1],$Td1[0],$Td0[1]
464         PACK2   $Td2[2],$Td3[1],$Td2[2]
465 ||      PACK2   $Td2[3],$Td3[2],$Td2[3]
466         PACKL4  $Td0[0],$Td2[2],$Td0[0]
467 ||      PACKL4  $Td0[1],$Td2[3],$Td0[1]
468         XOR     $K[0],$Td0[0],$Td0[0]           ; s[0]
469 ||      XOR     $K[1],$Td0[1],$Td0[1]           ; s[1]
470
471         PACK2   $Td0[2],$Td1[1],$Td0[2]
472 ||      PACK2   $Td0[3],$Td1[2],$Td0[3]
473         PACK2   $Td2[0],$Td3[3],$Td2[0]
474 ||      PACK2   $Td2[1],$Td3[0],$Td2[1]
475 ||      BNOP    RA
476         PACKL4  $Td0[2],$Td2[0],$Td0[2]
477 ||      PACKL4  $Td0[3],$Td2[1],$Td0[3]
478         XOR     $K[2],$Td0[2],$Td0[2]           ; s[2]
479 ||      XOR     $K[3],$Td0[3],$Td0[3]           ; s[3]
480
481         MV      $Td0[0],A9
482 ||      MV      $Td0[1],A8
483         MV      $Td0[2],B9
484 ||      MV      $Td0[3],B8
485 || [B2] STNDW   A9:A8,*OUT++
486    [B2] STNDW   B9:B8,*OUT++
487         .else
488         PACK2   $Td1[3],$Td0[0],$Td1[3]
489 ||      PACK2   $Td1[0],$Td0[1],$Td1[0]
490         PACK2   $Td3[1],$Td2[2],$Td3[1]
491 ||      PACK2   $Td3[2],$Td2[3],$Td3[2]
492         PACKL4  $Td3[1],$Td1[3],$Td1[3]
493 ||      PACKL4  $Td3[2],$Td1[0],$Td1[0]
494         XOR     $K[0],$Td1[3],$Td1[3]           ; s[0]
495 ||      XOR     $K[1],$Td1[0],$Td1[0]           ; s[1]
496
497         PACK2   $Td1[1],$Td0[2],$Td1[1]
498 ||      PACK2   $Td1[2],$Td0[3],$Td1[2]
499         PACK2   $Td3[3],$Td2[0],$Td3[3]
500 ||      PACK2   $Td3[0],$Td2[1],$Td3[0]
501 ||      BNOP    RA
502         PACKL4  $Td3[3],$Td1[1],$Td1[1]
503 ||      PACKL4  $Td3[0],$Td1[2],$Td1[2]
504         XOR     $K[2],$Td1[1],$Td1[1]           ; s[2]
505 ||      XOR     $K[3],$Td1[2],$Td1[2]           ; s[3]
506
507         MV      $Td1[3],A8
508 ||      MV      $Td1[0],A9
509         MV      $Td1[1],B8
510 ||      MV      $Td1[2],B9
511 || [B2] STNDW   A9:A8,*OUT++
512    [B2] STNDW   B9:B8,*OUT++
513         .endif
514         .endasmfunc
515 ___
516 {
517 my @K=(@K,@s);                  # extended key
518 my @Te4=map("B$_",(16..19));
519
520 my @Kx9=@Te0;                   # used in AES_set_decrypt_key
521 my @KxB=@Te1;
522 my @KxD=@Te2;
523 my @KxE=@Te3;
524
525 $code.=<<___;
526         .asg    OUT,BITS
527
528         .global _AES_set_encrypt_key
529 _AES_set_encrypt_key:
530 __set_encrypt_key:
531         .asmfunc
532         MV      INP,A0
533 ||      SHRU    BITS,5,BITS                     ; 128-192-256 -> 4-6-8
534 ||      MV      KEY,A1
535   [!A0] B       RA
536 ||[!A0] MVK     -1,RET
537 ||[!A0] MVK     1,A1                            ; only one B RA
538   [!A1] B       RA
539 ||[!A1] MVK     -1,RET
540 ||[!A1] MVK     0,A0
541 ||      MVK     0,B0
542 ||      MVK     0,A1
543    [A0] LDNDW   *INP++,A9:A8
544 || [A0] CMPEQ   4,BITS,B0
545 || [A0] CMPLT   3,BITS,A1
546    [B0] B       key128?
547 || [A1] LDNDW   *INP++,B9:B8
548 || [A0] CMPEQ   6,BITS,B0
549 || [A0] CMPLT   5,BITS,A1
550    [B0] B       key192?
551 || [A1] LDNDW   *INP++,B17:B16
552 || [A0] CMPEQ   8,BITS,B0
553 || [A0] CMPLT   7,BITS,A1
554    [B0] B       key256?
555 || [A1] LDNDW   *INP++,B19:B18
556
557         .if     __TI_EABI__
558    [A0] ADD     0,KEY,$KPA
559 || [A0] ADD     4,KEY,$KPB
560 || [A0] MVKL    \$PCR_OFFSET(AES_Te4,__set_encrypt_key),$TEA
561 || [A0] ADDKPC  __set_encrypt_key,B6
562    [A0] MVKH    \$PCR_OFFSET(AES_Te4,__set_encrypt_key),$TEA
563    [A0] ADD     B6,$TEA,$TEA                    ; AES_Te4
564         .else
565    [A0] ADD     0,KEY,$KPA
566 || [A0] ADD     4,KEY,$KPB
567 || [A0] MVKL    (AES_Te4-__set_encrypt_key),$TEA
568 || [A0] ADDKPC  __set_encrypt_key,B6
569    [A0] MVKH    (AES_Te4-__set_encrypt_key),$TEA
570    [A0] ADD     B6,$TEA,$TEA                    ; AES_Te4
571         .endif
572         NOP
573         NOP
574
575         BNOP    RA,5
576 ||      MVK     -2,RET                          ; unknown bit length
577 ||      MVK     0,B0                            ; redundant
578 ;;====================================================================
579 ;;====================================================================
580 key128?:
581         .if     .BIG_ENDIAN
582         MV      A9,$K[0]
583 ||      MV      A8,$K[1]
584 ||      MV      B9,$Te4[2]
585 ||      MV      B8,$K[3]
586         .else
587         MV      A8,$K[0]
588 ||      MV      A9,$K[1]
589 ||      MV      B8,$Te4[2]
590 ||      MV      B9,$K[3]
591         .endif
592
593         MVK     256,A0
594 ||      MVK     9,B0
595
596         SPLOOPD 14
597 ||      MVC     B0,ILC
598 ||      MV      $TEA,$TEB
599 ||      ADD     $TEA,A0,A30                     ; rcon
600 ;;====================================================================
601         LDW     *A30++[1],A31                   ; rcon[i]
602 ||      MV      $Te4[2],$K[2]
603 ||      EXTU    $K[3],EXT1,24,$Te4[0]
604         LDBU    *${TEB}[$Te4[0]],$Te4[0]
605 ||      MV      $K[3],A0
606 ||      EXTU    $K[3],EXT2,24,$Te4[1]
607         LDBU    *${TEB}[$Te4[1]],$Te4[1]
608 ||      EXTU    A0,EXT3,24,A0
609 ||      EXTU    $K[3],EXT0,24,$Te4[3]
610         .if     .BIG_ENDIAN
611         LDBU    *${TEA}[A0],$Te4[3]
612 ||      LDBU    *${TEB}[$Te4[3]],A0
613         .else
614         LDBU    *${TEA}[A0],A0
615 ||      LDBU    *${TEB}[$Te4[3]],$Te4[3]
616         .endif
617
618         STW     $K[0],*$KPA++[2]
619 ||      STW     $K[1],*$KPB++[2]
620         STW     $K[2],*$KPA++[2]
621 ||      STW     $K[3],*$KPB++[2]
622
623         XOR     A31,$K[0],$K[0]                 ; ^=rcon[i]
624         .if     .BIG_ENDIAN
625         PACK2   $Te4[0],$Te4[1],$Te4[1]
626         PACK2   $Te4[3],A0,$Te4[3]
627         PACKL4  $Te4[1],$Te4[3],$Te4[3]
628         .else
629         PACK2   $Te4[1],$Te4[0],$Te4[1]
630         PACK2   $Te4[3],A0,$Te4[3]
631         PACKL4  $Te4[3],$Te4[1],$Te4[3]
632         .endif
633         XOR     $Te4[3],$K[0],$Te4[0]           ; K[0]
634         XOR     $Te4[0],$K[1],$K[1]             ; K[1]
635         MV      $Te4[0],$K[0]
636 ||      XOR     $K[1],$K[2],$Te4[2]             ; K[2]
637         XOR     $Te4[2],$K[3],$K[3]             ; K[3]
638         SPKERNEL
639 ;;====================================================================
640         BNOP    RA
641         MV      $Te4[2],$K[2]
642 ||      STW     $K[0],*$KPA++[2]
643 ||      STW     $K[1],*$KPB++[2]
644         STW     $K[2],*$KPA++[2]
645 ||      STW     $K[3],*$KPB++[2]
646         MVK     10,B0                           ; rounds
647         STW     B0,*++${KPB}[15]
648         MVK     0,RET
649 ;;====================================================================
650 ;;====================================================================
651 key192?:
652         .if     .BIG_ENDIAN
653         MV      A9,$K[0]
654 ||      MV      A8,$K[1]
655 ||      MV      B9,$K[2]
656 ||      MV      B8,$K[3]
657         MV      B17,$Te4[2]
658 ||      MV      B16,$K[5]
659         .else
660         MV      A8,$K[0]
661 ||      MV      A9,$K[1]
662 ||      MV      B8,$K[2]
663 ||      MV      B9,$K[3]
664         MV      B16,$Te4[2]
665 ||      MV      B17,$K[5]
666         .endif
667
668         MVK     256,A0
669 ||      MVK     6,B0
670         MV      $TEA,$TEB
671 ||      ADD     $TEA,A0,A30                     ; rcon
672 ;;====================================================================
673 loop192?:
674         LDW     *A30++[1],A31                   ; rcon[i]
675 ||      MV      $Te4[2],$K[4]
676 ||      EXTU    $K[5],EXT1,24,$Te4[0]
677         LDBU    *${TEB}[$Te4[0]],$Te4[0]
678 ||      MV      $K[5],A0
679 ||      EXTU    $K[5],EXT2,24,$Te4[1]
680         LDBU    *${TEB}[$Te4[1]],$Te4[1]
681 ||      EXTU    A0,EXT3,24,A0
682 ||      EXTU    $K[5],EXT0,24,$Te4[3]
683         .if     .BIG_ENDIAN
684         LDBU    *${TEA}[A0],$Te4[3]
685 ||      LDBU    *${TEB}[$Te4[3]],A0
686         .else
687         LDBU    *${TEA}[A0],A0
688 ||      LDBU    *${TEB}[$Te4[3]],$Te4[3]
689         .endif
690
691         STW     $K[0],*$KPA++[2]
692 ||      STW     $K[1],*$KPB++[2]
693         STW     $K[2],*$KPA++[2]
694 ||      STW     $K[3],*$KPB++[2]
695         STW     $K[4],*$KPA++[2]
696 ||      STW     $K[5],*$KPB++[2]
697
698         XOR     A31,$K[0],$K[0]                 ; ^=rcon[i]
699         .if     .BIG_ENDIAN
700         PACK2   $Te4[0],$Te4[1],$Te4[1]
701 ||      PACK2   $Te4[3],A0,$Te4[3]
702         PACKL4  $Te4[1],$Te4[3],$Te4[3]
703         .else
704         PACK2   $Te4[1],$Te4[0],$Te4[1]
705 ||      PACK2   $Te4[3],A0,$Te4[3]
706         PACKL4  $Te4[3],$Te4[1],$Te4[3]
707         .endif
708         BDEC    loop192?,B0
709 ||      XOR     $Te4[3],$K[0],$Te4[0]           ; K[0]
710         XOR     $Te4[0],$K[1],$K[1]             ; K[1]
711         MV      $Te4[0],$K[0]
712 ||      XOR     $K[1],$K[2],$Te4[2]             ; K[2]
713         XOR     $Te4[2],$K[3],$K[3]             ; K[3]
714         MV      $Te4[2],$K[2]
715 ||      XOR     $K[3],$K[4],$Te4[2]             ; K[4]
716         XOR     $Te4[2],$K[5],$K[5]             ; K[5]
717 ;;====================================================================
718         BNOP    RA
719         STW     $K[0],*$KPA++[2]
720 ||      STW     $K[1],*$KPB++[2]
721         STW     $K[2],*$KPA++[2]
722 ||      STW     $K[3],*$KPB++[2]
723         MVK     12,B0                           ; rounds
724         STW     B0,*++${KPB}[7]
725         MVK     0,RET
726 ;;====================================================================
727 ;;====================================================================
728 key256?:
729         .if     .BIG_ENDIAN
730         MV      A9,$K[0]
731 ||      MV      A8,$K[1]
732 ||      MV      B9,$K[2]
733 ||      MV      B8,$K[3]
734         MV      B17,$K[4]
735 ||      MV      B16,$K[5]
736 ||      MV      B19,$Te4[2]
737 ||      MV      B18,$K[7]
738         .else
739         MV      A8,$K[0]
740 ||      MV      A9,$K[1]
741 ||      MV      B8,$K[2]
742 ||      MV      B9,$K[3]
743         MV      B16,$K[4]
744 ||      MV      B17,$K[5]
745 ||      MV      B18,$Te4[2]
746 ||      MV      B19,$K[7]
747         .endif
748
749         MVK     256,A0
750 ||      MVK     6,B0
751         MV      $TEA,$TEB
752 ||      ADD     $TEA,A0,A30                     ; rcon
753 ;;====================================================================
754 loop256?:
755         LDW     *A30++[1],A31                   ; rcon[i]
756 ||      MV      $Te4[2],$K[6]
757 ||      EXTU    $K[7],EXT1,24,$Te4[0]
758         LDBU    *${TEB}[$Te4[0]],$Te4[0]
759 ||      MV      $K[7],A0
760 ||      EXTU    $K[7],EXT2,24,$Te4[1]
761         LDBU    *${TEB}[$Te4[1]],$Te4[1]
762 ||      EXTU    A0,EXT3,24,A0
763 ||      EXTU    $K[7],EXT0,24,$Te4[3]
764         .if     .BIG_ENDIAN
765         LDBU    *${TEA}[A0],$Te4[3]
766 ||      LDBU    *${TEB}[$Te4[3]],A0
767         .else
768         LDBU    *${TEA}[A0],A0
769 ||      LDBU    *${TEB}[$Te4[3]],$Te4[3]
770         .endif
771
772         STW     $K[0],*$KPA++[2]
773 ||      STW     $K[1],*$KPB++[2]
774         STW     $K[2],*$KPA++[2]
775 ||      STW     $K[3],*$KPB++[2]
776         STW     $K[4],*$KPA++[2]
777 ||      STW     $K[5],*$KPB++[2]
778         STW     $K[6],*$KPA++[2]
779 ||      STW     $K[7],*$KPB++[2]
780 ||      XOR     A31,$K[0],$K[0]                 ; ^=rcon[i]
781         .if     .BIG_ENDIAN
782         PACK2   $Te4[0],$Te4[1],$Te4[1]
783 ||      PACK2   $Te4[3],A0,$Te4[3]
784         PACKL4  $Te4[1],$Te4[3],$Te4[3]
785 ||[!B0] B       done256?
786         .else
787         PACK2   $Te4[1],$Te4[0],$Te4[1]
788 ||      PACK2   $Te4[3],A0,$Te4[3]
789         PACKL4  $Te4[3],$Te4[1],$Te4[3]
790 ||[!B0] B       done256?
791         .endif
792         XOR     $Te4[3],$K[0],$Te4[0]           ; K[0]
793         XOR     $Te4[0],$K[1],$K[1]             ; K[1]
794         MV      $Te4[0],$K[0]
795 ||      XOR     $K[1],$K[2],$Te4[2]             ; K[2]
796         XOR     $Te4[2],$K[3],$K[3]             ; K[3]
797
798         MV      $Te4[2],$K[2]
799 || [B0] EXTU    $K[3],EXT0,24,$Te4[0]
800 || [B0] SUB     B0,1,B0
801         LDBU    *${TEB}[$Te4[0]],$Te4[0]
802 ||      MV      $K[3],A0
803 ||      EXTU    $K[3],EXT1,24,$Te4[1]
804         LDBU    *${TEB}[$Te4[1]],$Te4[1]
805 ||      EXTU    A0,EXT2,24,A0
806 ||      EXTU    $K[3],EXT3,24,$Te4[3]
807
808         .if     .BIG_ENDIAN
809         LDBU    *${TEA}[A0],$Te4[3]
810 ||      LDBU    *${TEB}[$Te4[3]],A0
811         NOP     3
812         PACK2   $Te4[0],$Te4[1],$Te4[1]
813         PACK2   $Te4[3],A0,$Te4[3]
814 ||      B       loop256?
815         PACKL4  $Te4[1],$Te4[3],$Te4[3]
816         .else
817         LDBU    *${TEA}[A0],A0
818 ||      LDBU    *${TEB}[$Te4[3]],$Te4[3]
819         NOP     3
820         PACK2   $Te4[1],$Te4[0],$Te4[1]
821         PACK2   $Te4[3],A0,$Te4[3]
822 ||      B       loop256?
823         PACKL4  $Te4[3],$Te4[1],$Te4[3]
824         .endif
825
826         XOR     $Te4[3],$K[4],$Te4[0]           ; K[4]
827         XOR     $Te4[0],$K[5],$K[5]             ; K[5]
828         MV      $Te4[0],$K[4]
829 ||      XOR     $K[5],$K[6],$Te4[2]             ; K[6]
830         XOR     $Te4[2],$K[7],$K[7]             ; K[7]
831 ;;====================================================================
832 done256?:
833         BNOP    RA
834         STW     $K[0],*$KPA++[2]
835 ||      STW     $K[1],*$KPB++[2]
836         STW     $K[2],*$KPA++[2]
837 ||      STW     $K[3],*$KPB++[2]
838         MVK     14,B0                           ; rounds
839         STW     B0,*--${KPB}[1]
840         MVK     0,RET
841         .endasmfunc
842
843         .global _AES_set_decrypt_key
844 _AES_set_decrypt_key:
845         .asmfunc
846         B       __set_encrypt_key               ; guarantee local call
847         MV      KEY,B30                         ; B30 is not modified
848         MV      RA, B31                         ; B31 is not modified
849         ADDKPC  ret?,RA,2
850 ret?:                                           ; B0 holds rounds or zero
851   [!B0] BNOP    B31                             ; return if zero
852    [B0] SHL     B0,4,A0                         ; offset to last round key
853    [B0] SHRU    B0,1,B1
854    [B0] SUB     B1,1,B1
855    [B0] MVK     0x0000001B,B3                   ; AES polynomial
856    [B0] MVKH    0x07000000,B3
857
858         SPLOOPD 9                               ; flip round keys
859 ||      MVC     B1,ILC
860 ||      MV      B30,$KPA
861 ||      ADD     B30,A0,$KPB
862 ||      MVK     16,A0                           ; sizeof(round key)
863 ;;====================================================================
864         LDW     *${KPA}[0],A16
865 ||      LDW     *${KPB}[0],B16
866         LDW     *${KPA}[1],A17
867 ||      LDW     *${KPB}[1],B17
868         LDW     *${KPA}[2],A18
869 ||      LDW     *${KPB}[2],B18
870         LDW     *${KPA}[3],A19
871 ||      ADD     $KPA,A0,$KPA
872 ||      LDW     *${KPB}[3],B19
873 ||      SUB     $KPB,A0,$KPB
874         NOP
875         STW     B16,*${KPA}[-4]
876 ||      STW     A16,*${KPB}[4]
877         STW     B17,*${KPA}[-3]
878 ||      STW     A17,*${KPB}[5]
879         STW     B18,*${KPA}[-2]
880 ||      STW     A18,*${KPB}[6]
881         STW     B19,*${KPA}[-1]
882 ||      STW     A19,*${KPB}[7]
883         SPKERNEL
884 ;;====================================================================
885         SUB     B0,1,B0                         ; skip last round
886 ||      ADD     B30,A0,$KPA                     ; skip first round
887 ||      ADD     B30,A0,$KPB
888 ||      MVC     GFPGFR,B30                      ; save GFPGFR
889         LDW     *${KPA}[0],$K[0]
890 ||      LDW     *${KPB}[1],$K[1]
891 ||      MVC     B3,GFPGFR
892         LDW     *${KPA}[2],$K[2]
893 ||      LDW     *${KPB}[3],$K[3]
894         MVK     0x00000909,A24
895 ||      MVK     0x00000B0B,B24
896         MVKH    0x09090000,A24
897 ||      MVKH    0x0B0B0000,B24
898         MVC     B0,ILC
899 ||      SUB     B0,1,B0
900
901         GMPY4   $K[0],A24,$Kx9[0]               ; Â·0x09
902 ||      GMPY4   $K[1],A24,$Kx9[1]
903 ||      MVK     0x00000D0D,A25
904 ||      MVK     0x00000E0E,B25
905         GMPY4   $K[2],A24,$Kx9[2]
906 ||      GMPY4   $K[3],A24,$Kx9[3]
907 ||      MVKH    0x0D0D0000,A25
908 ||      MVKH    0x0E0E0000,B25
909
910         GMPY4   $K[0],B24,$KxB[0]               ; Â·0x0B
911 ||      GMPY4   $K[1],B24,$KxB[1]
912         GMPY4   $K[2],B24,$KxB[2]
913 ||      GMPY4   $K[3],B24,$KxB[3]
914
915         SPLOOP  11                              ; InvMixColumns
916 ;;====================================================================
917         GMPY4   $K[0],A25,$KxD[0]               ; Â·0x0D
918 ||      GMPY4   $K[1],A25,$KxD[1]
919 ||      SWAP2   $Kx9[0],$Kx9[0]                 ; rotate by 16
920 ||      SWAP2   $Kx9[1],$Kx9[1]
921 ||      MV      $K[0],$s[0]                     ; this or DINT
922 ||      MV      $K[1],$s[1]
923 || [B0] LDW     *${KPA}[4],$K[0]
924 || [B0] LDW     *${KPB}[5],$K[1]
925         GMPY4   $K[2],A25,$KxD[2]
926 ||      GMPY4   $K[3],A25,$KxD[3]
927 ||      SWAP2   $Kx9[2],$Kx9[2]
928 ||      SWAP2   $Kx9[3],$Kx9[3]
929 ||      MV      $K[2],$s[2]
930 ||      MV      $K[3],$s[3]
931 || [B0] LDW     *${KPA}[6],$K[2]
932 || [B0] LDW     *${KPB}[7],$K[3]
933
934         GMPY4   $s[0],B25,$KxE[0]               ; Â·0x0E
935 ||      GMPY4   $s[1],B25,$KxE[1]
936 ||      XOR     $Kx9[0],$KxB[0],$KxB[0]
937 ||      XOR     $Kx9[1],$KxB[1],$KxB[1]
938         GMPY4   $s[2],B25,$KxE[2]
939 ||      GMPY4   $s[3],B25,$KxE[3]
940 ||      XOR     $Kx9[2],$KxB[2],$KxB[2]
941 ||      XOR     $Kx9[3],$KxB[3],$KxB[3]
942
943         ROTL    $KxB[0],TBL3,$KxB[0]
944 ||      ROTL    $KxB[1],TBL3,$KxB[1]
945 ||      SWAP2   $KxD[0],$KxD[0]                 ; rotate by 16
946 ||      SWAP2   $KxD[1],$KxD[1]
947         ROTL    $KxB[2],TBL3,$KxB[2]
948 ||      ROTL    $KxB[3],TBL3,$KxB[3]
949 ||      SWAP2   $KxD[2],$KxD[2]
950 ||      SWAP2   $KxD[3],$KxD[3]
951
952         XOR     $KxE[0],$KxD[0],$KxE[0]
953 ||      XOR     $KxE[1],$KxD[1],$KxE[1]
954 || [B0] GMPY4   $K[0],A24,$Kx9[0]               ; Â·0x09
955 || [B0] GMPY4   $K[1],A24,$Kx9[1]
956 ||      ADDAW   $KPA,4,$KPA
957         XOR     $KxE[2],$KxD[2],$KxE[2]
958 ||      XOR     $KxE[3],$KxD[3],$KxE[3]
959 || [B0] GMPY4   $K[2],A24,$Kx9[2]
960 || [B0] GMPY4   $K[3],A24,$Kx9[3]
961 ||      ADDAW   $KPB,4,$KPB
962
963         XOR     $KxB[0],$KxE[0],$KxE[0]
964 ||      XOR     $KxB[1],$KxE[1],$KxE[1]
965 || [B0] GMPY4   $K[0],B24,$KxB[0]               ; Â·0x0B
966 || [B0] GMPY4   $K[1],B24,$KxB[1]
967         XOR     $KxB[2],$KxE[2],$KxE[2]
968 ||      XOR     $KxB[3],$KxE[3],$KxE[3]
969 || [B0] GMPY4   $K[2],B24,$KxB[2]
970 || [B0] GMPY4   $K[3],B24,$KxB[3]
971 ||      STW     $KxE[0],*${KPA}[-4]
972 ||      STW     $KxE[1],*${KPB}[-3]
973         STW     $KxE[2],*${KPA}[-2]
974 ||      STW     $KxE[3],*${KPB}[-1]
975 || [B0] SUB     B0,1,B0
976         SPKERNEL
977 ;;====================================================================
978         BNOP    B31,3
979         MVC     B30,GFPGFR                      ; restore GFPGFR(*)
980         MVK     0,RET
981         .endasmfunc
982 ___
983 # (*)   Even though ABI doesn't specify GFPGFR as non-volatile, there
984 #       are code samples out there that *assume* its default value.
985 }
986 {
987 my ($inp,$out,$blocks,$key,$ivp)=("A4","B4","A6","B6","A8");
988 $code.=<<___;
989         .global _AES_ctr32_encrypt
990 _AES_ctr32_encrypt:
991         .asmfunc
992         LDNDW   *${ivp}[0],A31:A30      ; load counter value
993 ||      MV      $blocks,A2              ; reassign $blocks
994 ||      DMV     RA,$key,B27:B26         ; reassign RA and $key
995         LDNDW   *${ivp}[1],B31:B30
996 ||      MVK     0,B2                    ; don't let __encrypt load input
997 ||      MVK     0,A1                    ; and postpone writing output
998         .if     .BIG_ENDIAN
999         NOP
1000         .else
1001         NOP     4
1002         SWAP2   B31,B31                 ; keep least significant 32 bits
1003         SWAP4   B31,B31                 ; in host byte order
1004         .endif
1005 ctr32_loop?:
1006    [A2] BNOP    __encrypt
1007 || [A1] XOR     A29,A9,A9               ; input^Ek(counter)
1008 || [A1] XOR     A28,A8,A8
1009 || [A2] LDNDW   *INP++,A29:A28          ; load input
1010   [!A2] BNOP    B27                     ; return
1011 || [A1] XOR     B29,B9,B9
1012 || [A1] XOR     B28,B8,B8
1013 || [A2] LDNDW   *INP++,B29:B28
1014         .if     .BIG_ENDIAN
1015    [A1] STNDW   A9:A8,*OUT++            ; save output
1016 || [A2] DMV     A31,A30,A9:A8           ; pass counter value to __encrypt
1017    [A1] STNDW   B9:B8,*OUT++
1018 || [A2] DMV     B31,B30,B9:B8
1019 || [A2] ADD     B30,1,B30               ; counter++
1020         .else
1021    [A1] STNDW   A9:A8,*OUT++            ; save output
1022 || [A2] DMV     A31,A30,A9:A8
1023 || [A2] SWAP2   B31,B0
1024 || [A2] ADD     B31,1,B31               ; counter++
1025    [A1] STNDW   B9:B8,*OUT++
1026 || [A2] MV      B30,B8
1027 || [A2] SWAP4   B0,B9
1028         .endif
1029    [A2] ADDKPC  ctr32_loop?,RA          ; return to ctr32_loop?
1030 || [A2] MV      B26,KEY                 ; pass $key
1031 || [A2] SUB     A2,1,A2                 ; $blocks--
1032 ||[!A1] MVK     1,A1
1033         NOP
1034         NOP
1035         .endasmfunc
1036 ___
1037 }
1038 # Tables are kept in endian-neutral manner
1039 $code.=<<___;
1040         .if     __TI_EABI__
1041         .sect   ".text:aes_asm.const"
1042         .else
1043         .sect   ".const:aes_asm"
1044         .endif
1045         .align  128
1046 AES_Te:
1047         .byte   0xc6,0x63,0x63,0xa5,    0xf8,0x7c,0x7c,0x84
1048         .byte   0xee,0x77,0x77,0x99,    0xf6,0x7b,0x7b,0x8d
1049         .byte   0xff,0xf2,0xf2,0x0d,    0xd6,0x6b,0x6b,0xbd
1050         .byte   0xde,0x6f,0x6f,0xb1,    0x91,0xc5,0xc5,0x54
1051         .byte   0x60,0x30,0x30,0x50,    0x02,0x01,0x01,0x03
1052         .byte   0xce,0x67,0x67,0xa9,    0x56,0x2b,0x2b,0x7d
1053         .byte   0xe7,0xfe,0xfe,0x19,    0xb5,0xd7,0xd7,0x62
1054         .byte   0x4d,0xab,0xab,0xe6,    0xec,0x76,0x76,0x9a
1055         .byte   0x8f,0xca,0xca,0x45,    0x1f,0x82,0x82,0x9d
1056         .byte   0x89,0xc9,0xc9,0x40,    0xfa,0x7d,0x7d,0x87
1057         .byte   0xef,0xfa,0xfa,0x15,    0xb2,0x59,0x59,0xeb
1058         .byte   0x8e,0x47,0x47,0xc9,    0xfb,0xf0,0xf0,0x0b
1059         .byte   0x41,0xad,0xad,0xec,    0xb3,0xd4,0xd4,0x67
1060         .byte   0x5f,0xa2,0xa2,0xfd,    0x45,0xaf,0xaf,0xea
1061         .byte   0x23,0x9c,0x9c,0xbf,    0x53,0xa4,0xa4,0xf7
1062         .byte   0xe4,0x72,0x72,0x96,    0x9b,0xc0,0xc0,0x5b
1063         .byte   0x75,0xb7,0xb7,0xc2,    0xe1,0xfd,0xfd,0x1c
1064         .byte   0x3d,0x93,0x93,0xae,    0x4c,0x26,0x26,0x6a
1065         .byte   0x6c,0x36,0x36,0x5a,    0x7e,0x3f,0x3f,0x41
1066         .byte   0xf5,0xf7,0xf7,0x02,    0x83,0xcc,0xcc,0x4f
1067         .byte   0x68,0x34,0x34,0x5c,    0x51,0xa5,0xa5,0xf4
1068         .byte   0xd1,0xe5,0xe5,0x34,    0xf9,0xf1,0xf1,0x08
1069         .byte   0xe2,0x71,0x71,0x93,    0xab,0xd8,0xd8,0x73
1070         .byte   0x62,0x31,0x31,0x53,    0x2a,0x15,0x15,0x3f
1071         .byte   0x08,0x04,0x04,0x0c,    0x95,0xc7,0xc7,0x52
1072         .byte   0x46,0x23,0x23,0x65,    0x9d,0xc3,0xc3,0x5e
1073         .byte   0x30,0x18,0x18,0x28,    0x37,0x96,0x96,0xa1
1074         .byte   0x0a,0x05,0x05,0x0f,    0x2f,0x9a,0x9a,0xb5
1075         .byte   0x0e,0x07,0x07,0x09,    0x24,0x12,0x12,0x36
1076         .byte   0x1b,0x80,0x80,0x9b,    0xdf,0xe2,0xe2,0x3d
1077         .byte   0xcd,0xeb,0xeb,0x26,    0x4e,0x27,0x27,0x69
1078         .byte   0x7f,0xb2,0xb2,0xcd,    0xea,0x75,0x75,0x9f
1079         .byte   0x12,0x09,0x09,0x1b,    0x1d,0x83,0x83,0x9e
1080         .byte   0x58,0x2c,0x2c,0x74,    0x34,0x1a,0x1a,0x2e
1081         .byte   0x36,0x1b,0x1b,0x2d,    0xdc,0x6e,0x6e,0xb2
1082         .byte   0xb4,0x5a,0x5a,0xee,    0x5b,0xa0,0xa0,0xfb
1083         .byte   0xa4,0x52,0x52,0xf6,    0x76,0x3b,0x3b,0x4d
1084         .byte   0xb7,0xd6,0xd6,0x61,    0x7d,0xb3,0xb3,0xce
1085         .byte   0x52,0x29,0x29,0x7b,    0xdd,0xe3,0xe3,0x3e
1086         .byte   0x5e,0x2f,0x2f,0x71,    0x13,0x84,0x84,0x97
1087         .byte   0xa6,0x53,0x53,0xf5,    0xb9,0xd1,0xd1,0x68
1088         .byte   0x00,0x00,0x00,0x00,    0xc1,0xed,0xed,0x2c
1089         .byte   0x40,0x20,0x20,0x60,    0xe3,0xfc,0xfc,0x1f
1090         .byte   0x79,0xb1,0xb1,0xc8,    0xb6,0x5b,0x5b,0xed
1091         .byte   0xd4,0x6a,0x6a,0xbe,    0x8d,0xcb,0xcb,0x46
1092         .byte   0x67,0xbe,0xbe,0xd9,    0x72,0x39,0x39,0x4b
1093         .byte   0x94,0x4a,0x4a,0xde,    0x98,0x4c,0x4c,0xd4
1094         .byte   0xb0,0x58,0x58,0xe8,    0x85,0xcf,0xcf,0x4a
1095         .byte   0xbb,0xd0,0xd0,0x6b,    0xc5,0xef,0xef,0x2a
1096         .byte   0x4f,0xaa,0xaa,0xe5,    0xed,0xfb,0xfb,0x16
1097         .byte   0x86,0x43,0x43,0xc5,    0x9a,0x4d,0x4d,0xd7
1098         .byte   0x66,0x33,0x33,0x55,    0x11,0x85,0x85,0x94
1099         .byte   0x8a,0x45,0x45,0xcf,    0xe9,0xf9,0xf9,0x10
1100         .byte   0x04,0x02,0x02,0x06,    0xfe,0x7f,0x7f,0x81
1101         .byte   0xa0,0x50,0x50,0xf0,    0x78,0x3c,0x3c,0x44
1102         .byte   0x25,0x9f,0x9f,0xba,    0x4b,0xa8,0xa8,0xe3
1103         .byte   0xa2,0x51,0x51,0xf3,    0x5d,0xa3,0xa3,0xfe
1104         .byte   0x80,0x40,0x40,0xc0,    0x05,0x8f,0x8f,0x8a
1105         .byte   0x3f,0x92,0x92,0xad,    0x21,0x9d,0x9d,0xbc
1106         .byte   0x70,0x38,0x38,0x48,    0xf1,0xf5,0xf5,0x04
1107         .byte   0x63,0xbc,0xbc,0xdf,    0x77,0xb6,0xb6,0xc1
1108         .byte   0xaf,0xda,0xda,0x75,    0x42,0x21,0x21,0x63
1109         .byte   0x20,0x10,0x10,0x30,    0xe5,0xff,0xff,0x1a
1110         .byte   0xfd,0xf3,0xf3,0x0e,    0xbf,0xd2,0xd2,0x6d
1111         .byte   0x81,0xcd,0xcd,0x4c,    0x18,0x0c,0x0c,0x14
1112         .byte   0x26,0x13,0x13,0x35,    0xc3,0xec,0xec,0x2f
1113         .byte   0xbe,0x5f,0x5f,0xe1,    0x35,0x97,0x97,0xa2
1114         .byte   0x88,0x44,0x44,0xcc,    0x2e,0x17,0x17,0x39
1115         .byte   0x93,0xc4,0xc4,0x57,    0x55,0xa7,0xa7,0xf2
1116         .byte   0xfc,0x7e,0x7e,0x82,    0x7a,0x3d,0x3d,0x47
1117         .byte   0xc8,0x64,0x64,0xac,    0xba,0x5d,0x5d,0xe7
1118         .byte   0x32,0x19,0x19,0x2b,    0xe6,0x73,0x73,0x95
1119         .byte   0xc0,0x60,0x60,0xa0,    0x19,0x81,0x81,0x98
1120         .byte   0x9e,0x4f,0x4f,0xd1,    0xa3,0xdc,0xdc,0x7f
1121         .byte   0x44,0x22,0x22,0x66,    0x54,0x2a,0x2a,0x7e
1122         .byte   0x3b,0x90,0x90,0xab,    0x0b,0x88,0x88,0x83
1123         .byte   0x8c,0x46,0x46,0xca,    0xc7,0xee,0xee,0x29
1124         .byte   0x6b,0xb8,0xb8,0xd3,    0x28,0x14,0x14,0x3c
1125         .byte   0xa7,0xde,0xde,0x79,    0xbc,0x5e,0x5e,0xe2
1126         .byte   0x16,0x0b,0x0b,0x1d,    0xad,0xdb,0xdb,0x76
1127         .byte   0xdb,0xe0,0xe0,0x3b,    0x64,0x32,0x32,0x56
1128         .byte   0x74,0x3a,0x3a,0x4e,    0x14,0x0a,0x0a,0x1e
1129         .byte   0x92,0x49,0x49,0xdb,    0x0c,0x06,0x06,0x0a
1130         .byte   0x48,0x24,0x24,0x6c,    0xb8,0x5c,0x5c,0xe4
1131         .byte   0x9f,0xc2,0xc2,0x5d,    0xbd,0xd3,0xd3,0x6e
1132         .byte   0x43,0xac,0xac,0xef,    0xc4,0x62,0x62,0xa6
1133         .byte   0x39,0x91,0x91,0xa8,    0x31,0x95,0x95,0xa4
1134         .byte   0xd3,0xe4,0xe4,0x37,    0xf2,0x79,0x79,0x8b
1135         .byte   0xd5,0xe7,0xe7,0x32,    0x8b,0xc8,0xc8,0x43
1136         .byte   0x6e,0x37,0x37,0x59,    0xda,0x6d,0x6d,0xb7
1137         .byte   0x01,0x8d,0x8d,0x8c,    0xb1,0xd5,0xd5,0x64
1138         .byte   0x9c,0x4e,0x4e,0xd2,    0x49,0xa9,0xa9,0xe0
1139         .byte   0xd8,0x6c,0x6c,0xb4,    0xac,0x56,0x56,0xfa
1140         .byte   0xf3,0xf4,0xf4,0x07,    0xcf,0xea,0xea,0x25
1141         .byte   0xca,0x65,0x65,0xaf,    0xf4,0x7a,0x7a,0x8e
1142         .byte   0x47,0xae,0xae,0xe9,    0x10,0x08,0x08,0x18
1143         .byte   0x6f,0xba,0xba,0xd5,    0xf0,0x78,0x78,0x88
1144         .byte   0x4a,0x25,0x25,0x6f,    0x5c,0x2e,0x2e,0x72
1145         .byte   0x38,0x1c,0x1c,0x24,    0x57,0xa6,0xa6,0xf1
1146         .byte   0x73,0xb4,0xb4,0xc7,    0x97,0xc6,0xc6,0x51
1147         .byte   0xcb,0xe8,0xe8,0x23,    0xa1,0xdd,0xdd,0x7c
1148         .byte   0xe8,0x74,0x74,0x9c,    0x3e,0x1f,0x1f,0x21
1149         .byte   0x96,0x4b,0x4b,0xdd,    0x61,0xbd,0xbd,0xdc
1150         .byte   0x0d,0x8b,0x8b,0x86,    0x0f,0x8a,0x8a,0x85
1151         .byte   0xe0,0x70,0x70,0x90,    0x7c,0x3e,0x3e,0x42
1152         .byte   0x71,0xb5,0xb5,0xc4,    0xcc,0x66,0x66,0xaa
1153         .byte   0x90,0x48,0x48,0xd8,    0x06,0x03,0x03,0x05
1154         .byte   0xf7,0xf6,0xf6,0x01,    0x1c,0x0e,0x0e,0x12
1155         .byte   0xc2,0x61,0x61,0xa3,    0x6a,0x35,0x35,0x5f
1156         .byte   0xae,0x57,0x57,0xf9,    0x69,0xb9,0xb9,0xd0
1157         .byte   0x17,0x86,0x86,0x91,    0x99,0xc1,0xc1,0x58
1158         .byte   0x3a,0x1d,0x1d,0x27,    0x27,0x9e,0x9e,0xb9
1159         .byte   0xd9,0xe1,0xe1,0x38,    0xeb,0xf8,0xf8,0x13
1160         .byte   0x2b,0x98,0x98,0xb3,    0x22,0x11,0x11,0x33
1161         .byte   0xd2,0x69,0x69,0xbb,    0xa9,0xd9,0xd9,0x70
1162         .byte   0x07,0x8e,0x8e,0x89,    0x33,0x94,0x94,0xa7
1163         .byte   0x2d,0x9b,0x9b,0xb6,    0x3c,0x1e,0x1e,0x22
1164         .byte   0x15,0x87,0x87,0x92,    0xc9,0xe9,0xe9,0x20
1165         .byte   0x87,0xce,0xce,0x49,    0xaa,0x55,0x55,0xff
1166         .byte   0x50,0x28,0x28,0x78,    0xa5,0xdf,0xdf,0x7a
1167         .byte   0x03,0x8c,0x8c,0x8f,    0x59,0xa1,0xa1,0xf8
1168         .byte   0x09,0x89,0x89,0x80,    0x1a,0x0d,0x0d,0x17
1169         .byte   0x65,0xbf,0xbf,0xda,    0xd7,0xe6,0xe6,0x31
1170         .byte   0x84,0x42,0x42,0xc6,    0xd0,0x68,0x68,0xb8
1171         .byte   0x82,0x41,0x41,0xc3,    0x29,0x99,0x99,0xb0
1172         .byte   0x5a,0x2d,0x2d,0x77,    0x1e,0x0f,0x0f,0x11
1173         .byte   0x7b,0xb0,0xb0,0xcb,    0xa8,0x54,0x54,0xfc
1174         .byte   0x6d,0xbb,0xbb,0xd6,    0x2c,0x16,0x16,0x3a
1175 AES_Te4:
1176         .byte   0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
1177         .byte   0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
1178         .byte   0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
1179         .byte   0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
1180         .byte   0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
1181         .byte   0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
1182         .byte   0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
1183         .byte   0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
1184         .byte   0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
1185         .byte   0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
1186         .byte   0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
1187         .byte   0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
1188         .byte   0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
1189         .byte   0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
1190         .byte   0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
1191         .byte   0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
1192         .byte   0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
1193         .byte   0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
1194         .byte   0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
1195         .byte   0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
1196         .byte   0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
1197         .byte   0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
1198         .byte   0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
1199         .byte   0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
1200         .byte   0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
1201         .byte   0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
1202         .byte   0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
1203         .byte   0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
1204         .byte   0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
1205         .byte   0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
1206         .byte   0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
1207         .byte   0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
1208 rcon:
1209         .byte   0x01,0x00,0x00,0x00,    0x02,0x00,0x00,0x00
1210         .byte   0x04,0x00,0x00,0x00,    0x08,0x00,0x00,0x00
1211         .byte   0x10,0x00,0x00,0x00,    0x20,0x00,0x00,0x00
1212         .byte   0x40,0x00,0x00,0x00,    0x80,0x00,0x00,0x00
1213         .byte   0x1B,0x00,0x00,0x00,    0x36,0x00,0x00,0x00
1214         .align  128
1215 AES_Td:
1216         .byte   0x51,0xf4,0xa7,0x50,    0x7e,0x41,0x65,0x53
1217         .byte   0x1a,0x17,0xa4,0xc3,    0x3a,0x27,0x5e,0x96
1218         .byte   0x3b,0xab,0x6b,0xcb,    0x1f,0x9d,0x45,0xf1
1219         .byte   0xac,0xfa,0x58,0xab,    0x4b,0xe3,0x03,0x93
1220         .byte   0x20,0x30,0xfa,0x55,    0xad,0x76,0x6d,0xf6
1221         .byte   0x88,0xcc,0x76,0x91,    0xf5,0x02,0x4c,0x25
1222         .byte   0x4f,0xe5,0xd7,0xfc,    0xc5,0x2a,0xcb,0xd7
1223         .byte   0x26,0x35,0x44,0x80,    0xb5,0x62,0xa3,0x8f
1224         .byte   0xde,0xb1,0x5a,0x49,    0x25,0xba,0x1b,0x67
1225         .byte   0x45,0xea,0x0e,0x98,    0x5d,0xfe,0xc0,0xe1
1226         .byte   0xc3,0x2f,0x75,0x02,    0x81,0x4c,0xf0,0x12
1227         .byte   0x8d,0x46,0x97,0xa3,    0x6b,0xd3,0xf9,0xc6
1228         .byte   0x03,0x8f,0x5f,0xe7,    0x15,0x92,0x9c,0x95
1229         .byte   0xbf,0x6d,0x7a,0xeb,    0x95,0x52,0x59,0xda
1230         .byte   0xd4,0xbe,0x83,0x2d,    0x58,0x74,0x21,0xd3
1231         .byte   0x49,0xe0,0x69,0x29,    0x8e,0xc9,0xc8,0x44
1232         .byte   0x75,0xc2,0x89,0x6a,    0xf4,0x8e,0x79,0x78
1233         .byte   0x99,0x58,0x3e,0x6b,    0x27,0xb9,0x71,0xdd
1234         .byte   0xbe,0xe1,0x4f,0xb6,    0xf0,0x88,0xad,0x17
1235         .byte   0xc9,0x20,0xac,0x66,    0x7d,0xce,0x3a,0xb4
1236         .byte   0x63,0xdf,0x4a,0x18,    0xe5,0x1a,0x31,0x82
1237         .byte   0x97,0x51,0x33,0x60,    0x62,0x53,0x7f,0x45
1238         .byte   0xb1,0x64,0x77,0xe0,    0xbb,0x6b,0xae,0x84
1239         .byte   0xfe,0x81,0xa0,0x1c,    0xf9,0x08,0x2b,0x94
1240         .byte   0x70,0x48,0x68,0x58,    0x8f,0x45,0xfd,0x19
1241         .byte   0x94,0xde,0x6c,0x87,    0x52,0x7b,0xf8,0xb7
1242         .byte   0xab,0x73,0xd3,0x23,    0x72,0x4b,0x02,0xe2
1243         .byte   0xe3,0x1f,0x8f,0x57,    0x66,0x55,0xab,0x2a
1244         .byte   0xb2,0xeb,0x28,0x07,    0x2f,0xb5,0xc2,0x03
1245         .byte   0x86,0xc5,0x7b,0x9a,    0xd3,0x37,0x08,0xa5
1246         .byte   0x30,0x28,0x87,0xf2,    0x23,0xbf,0xa5,0xb2
1247         .byte   0x02,0x03,0x6a,0xba,    0xed,0x16,0x82,0x5c
1248         .byte   0x8a,0xcf,0x1c,0x2b,    0xa7,0x79,0xb4,0x92
1249         .byte   0xf3,0x07,0xf2,0xf0,    0x4e,0x69,0xe2,0xa1
1250         .byte   0x65,0xda,0xf4,0xcd,    0x06,0x05,0xbe,0xd5
1251         .byte   0xd1,0x34,0x62,0x1f,    0xc4,0xa6,0xfe,0x8a
1252         .byte   0x34,0x2e,0x53,0x9d,    0xa2,0xf3,0x55,0xa0
1253         .byte   0x05,0x8a,0xe1,0x32,    0xa4,0xf6,0xeb,0x75
1254         .byte   0x0b,0x83,0xec,0x39,    0x40,0x60,0xef,0xaa
1255         .byte   0x5e,0x71,0x9f,0x06,    0xbd,0x6e,0x10,0x51
1256         .byte   0x3e,0x21,0x8a,0xf9,    0x96,0xdd,0x06,0x3d
1257         .byte   0xdd,0x3e,0x05,0xae,    0x4d,0xe6,0xbd,0x46
1258         .byte   0x91,0x54,0x8d,0xb5,    0x71,0xc4,0x5d,0x05
1259         .byte   0x04,0x06,0xd4,0x6f,    0x60,0x50,0x15,0xff
1260         .byte   0x19,0x98,0xfb,0x24,    0xd6,0xbd,0xe9,0x97
1261         .byte   0x89,0x40,0x43,0xcc,    0x67,0xd9,0x9e,0x77
1262         .byte   0xb0,0xe8,0x42,0xbd,    0x07,0x89,0x8b,0x88
1263         .byte   0xe7,0x19,0x5b,0x38,    0x79,0xc8,0xee,0xdb
1264         .byte   0xa1,0x7c,0x0a,0x47,    0x7c,0x42,0x0f,0xe9
1265         .byte   0xf8,0x84,0x1e,0xc9,    0x00,0x00,0x00,0x00
1266         .byte   0x09,0x80,0x86,0x83,    0x32,0x2b,0xed,0x48
1267         .byte   0x1e,0x11,0x70,0xac,    0x6c,0x5a,0x72,0x4e
1268         .byte   0xfd,0x0e,0xff,0xfb,    0x0f,0x85,0x38,0x56
1269         .byte   0x3d,0xae,0xd5,0x1e,    0x36,0x2d,0x39,0x27
1270         .byte   0x0a,0x0f,0xd9,0x64,    0x68,0x5c,0xa6,0x21
1271         .byte   0x9b,0x5b,0x54,0xd1,    0x24,0x36,0x2e,0x3a
1272         .byte   0x0c,0x0a,0x67,0xb1,    0x93,0x57,0xe7,0x0f
1273         .byte   0xb4,0xee,0x96,0xd2,    0x1b,0x9b,0x91,0x9e
1274         .byte   0x80,0xc0,0xc5,0x4f,    0x61,0xdc,0x20,0xa2
1275         .byte   0x5a,0x77,0x4b,0x69,    0x1c,0x12,0x1a,0x16
1276         .byte   0xe2,0x93,0xba,0x0a,    0xc0,0xa0,0x2a,0xe5
1277         .byte   0x3c,0x22,0xe0,0x43,    0x12,0x1b,0x17,0x1d
1278         .byte   0x0e,0x09,0x0d,0x0b,    0xf2,0x8b,0xc7,0xad
1279         .byte   0x2d,0xb6,0xa8,0xb9,    0x14,0x1e,0xa9,0xc8
1280         .byte   0x57,0xf1,0x19,0x85,    0xaf,0x75,0x07,0x4c
1281         .byte   0xee,0x99,0xdd,0xbb,    0xa3,0x7f,0x60,0xfd
1282         .byte   0xf7,0x01,0x26,0x9f,    0x5c,0x72,0xf5,0xbc
1283         .byte   0x44,0x66,0x3b,0xc5,    0x5b,0xfb,0x7e,0x34
1284         .byte   0x8b,0x43,0x29,0x76,    0xcb,0x23,0xc6,0xdc
1285         .byte   0xb6,0xed,0xfc,0x68,    0xb8,0xe4,0xf1,0x63
1286         .byte   0xd7,0x31,0xdc,0xca,    0x42,0x63,0x85,0x10
1287         .byte   0x13,0x97,0x22,0x40,    0x84,0xc6,0x11,0x20
1288         .byte   0x85,0x4a,0x24,0x7d,    0xd2,0xbb,0x3d,0xf8
1289         .byte   0xae,0xf9,0x32,0x11,    0xc7,0x29,0xa1,0x6d
1290         .byte   0x1d,0x9e,0x2f,0x4b,    0xdc,0xb2,0x30,0xf3
1291         .byte   0x0d,0x86,0x52,0xec,    0x77,0xc1,0xe3,0xd0
1292         .byte   0x2b,0xb3,0x16,0x6c,    0xa9,0x70,0xb9,0x99
1293         .byte   0x11,0x94,0x48,0xfa,    0x47,0xe9,0x64,0x22
1294         .byte   0xa8,0xfc,0x8c,0xc4,    0xa0,0xf0,0x3f,0x1a
1295         .byte   0x56,0x7d,0x2c,0xd8,    0x22,0x33,0x90,0xef
1296         .byte   0x87,0x49,0x4e,0xc7,    0xd9,0x38,0xd1,0xc1
1297         .byte   0x8c,0xca,0xa2,0xfe,    0x98,0xd4,0x0b,0x36
1298         .byte   0xa6,0xf5,0x81,0xcf,    0xa5,0x7a,0xde,0x28
1299         .byte   0xda,0xb7,0x8e,0x26,    0x3f,0xad,0xbf,0xa4
1300         .byte   0x2c,0x3a,0x9d,0xe4,    0x50,0x78,0x92,0x0d
1301         .byte   0x6a,0x5f,0xcc,0x9b,    0x54,0x7e,0x46,0x62
1302         .byte   0xf6,0x8d,0x13,0xc2,    0x90,0xd8,0xb8,0xe8
1303         .byte   0x2e,0x39,0xf7,0x5e,    0x82,0xc3,0xaf,0xf5
1304         .byte   0x9f,0x5d,0x80,0xbe,    0x69,0xd0,0x93,0x7c
1305         .byte   0x6f,0xd5,0x2d,0xa9,    0xcf,0x25,0x12,0xb3
1306         .byte   0xc8,0xac,0x99,0x3b,    0x10,0x18,0x7d,0xa7
1307         .byte   0xe8,0x9c,0x63,0x6e,    0xdb,0x3b,0xbb,0x7b
1308         .byte   0xcd,0x26,0x78,0x09,    0x6e,0x59,0x18,0xf4
1309         .byte   0xec,0x9a,0xb7,0x01,    0x83,0x4f,0x9a,0xa8
1310         .byte   0xe6,0x95,0x6e,0x65,    0xaa,0xff,0xe6,0x7e
1311         .byte   0x21,0xbc,0xcf,0x08,    0xef,0x15,0xe8,0xe6
1312         .byte   0xba,0xe7,0x9b,0xd9,    0x4a,0x6f,0x36,0xce
1313         .byte   0xea,0x9f,0x09,0xd4,    0x29,0xb0,0x7c,0xd6
1314         .byte   0x31,0xa4,0xb2,0xaf,    0x2a,0x3f,0x23,0x31
1315         .byte   0xc6,0xa5,0x94,0x30,    0x35,0xa2,0x66,0xc0
1316         .byte   0x74,0x4e,0xbc,0x37,    0xfc,0x82,0xca,0xa6
1317         .byte   0xe0,0x90,0xd0,0xb0,    0x33,0xa7,0xd8,0x15
1318         .byte   0xf1,0x04,0x98,0x4a,    0x41,0xec,0xda,0xf7
1319         .byte   0x7f,0xcd,0x50,0x0e,    0x17,0x91,0xf6,0x2f
1320         .byte   0x76,0x4d,0xd6,0x8d,    0x43,0xef,0xb0,0x4d
1321         .byte   0xcc,0xaa,0x4d,0x54,    0xe4,0x96,0x04,0xdf
1322         .byte   0x9e,0xd1,0xb5,0xe3,    0x4c,0x6a,0x88,0x1b
1323         .byte   0xc1,0x2c,0x1f,0xb8,    0x46,0x65,0x51,0x7f
1324         .byte   0x9d,0x5e,0xea,0x04,    0x01,0x8c,0x35,0x5d
1325         .byte   0xfa,0x87,0x74,0x73,    0xfb,0x0b,0x41,0x2e
1326         .byte   0xb3,0x67,0x1d,0x5a,    0x92,0xdb,0xd2,0x52
1327         .byte   0xe9,0x10,0x56,0x33,    0x6d,0xd6,0x47,0x13
1328         .byte   0x9a,0xd7,0x61,0x8c,    0x37,0xa1,0x0c,0x7a
1329         .byte   0x59,0xf8,0x14,0x8e,    0xeb,0x13,0x3c,0x89
1330         .byte   0xce,0xa9,0x27,0xee,    0xb7,0x61,0xc9,0x35
1331         .byte   0xe1,0x1c,0xe5,0xed,    0x7a,0x47,0xb1,0x3c
1332         .byte   0x9c,0xd2,0xdf,0x59,    0x55,0xf2,0x73,0x3f
1333         .byte   0x18,0x14,0xce,0x79,    0x73,0xc7,0x37,0xbf
1334         .byte   0x53,0xf7,0xcd,0xea,    0x5f,0xfd,0xaa,0x5b
1335         .byte   0xdf,0x3d,0x6f,0x14,    0x78,0x44,0xdb,0x86
1336         .byte   0xca,0xaf,0xf3,0x81,    0xb9,0x68,0xc4,0x3e
1337         .byte   0x38,0x24,0x34,0x2c,    0xc2,0xa3,0x40,0x5f
1338         .byte   0x16,0x1d,0xc3,0x72,    0xbc,0xe2,0x25,0x0c
1339         .byte   0x28,0x3c,0x49,0x8b,    0xff,0x0d,0x95,0x41
1340         .byte   0x39,0xa8,0x01,0x71,    0x08,0x0c,0xb3,0xde
1341         .byte   0xd8,0xb4,0xe4,0x9c,    0x64,0x56,0xc1,0x90
1342         .byte   0x7b,0xcb,0x84,0x61,    0xd5,0x32,0xb6,0x70
1343         .byte   0x48,0x6c,0x5c,0x74,    0xd0,0xb8,0x57,0x42
1344 AES_Td4:
1345         .byte   0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
1346         .byte   0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
1347         .byte   0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
1348         .byte   0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
1349         .byte   0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
1350         .byte   0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
1351         .byte   0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
1352         .byte   0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
1353         .byte   0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
1354         .byte   0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
1355         .byte   0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
1356         .byte   0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
1357         .byte   0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
1358         .byte   0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
1359         .byte   0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
1360         .byte   0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
1361         .byte   0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
1362         .byte   0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
1363         .byte   0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
1364         .byte   0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
1365         .byte   0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
1366         .byte   0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
1367         .byte   0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
1368         .byte   0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
1369         .byte   0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
1370         .byte   0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
1371         .byte   0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
1372         .byte   0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
1373         .byte   0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
1374         .byte   0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
1375         .byte   0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
1376         .byte   0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
1377         .cstring "AES for C64x+, CRYPTOGAMS by <appro\@openssl.org>"
1378         .align  4
1379 ___
1380
1381 print $code;
1382 close STDOUT;