5bbc2accff5af2ad6f749d431db4d57b090a571f
[openssl.git] / crypto / aes / asm / aes-c64xplus.pl
1 #!/usr/bin/env perl
2 #
3 # ====================================================================
4 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9 #
10 # [Endian-neutral] AES for C64x+.
11 #
12 # Even though SPLOOPs are scheduled for 13 cycles, and thus expected
13 # performance is ~8.5 cycles per byte processed with 128-bit key,
14 # measured performance turned to be ~10 cycles per byte. Discrepancy
15 # must be caused by limitations of L1D memory banking(*), see SPRU871
16 # TI publication for further details. If any consolation it's still
17 # ~20% faster than TI's linear assembly module anyway... Compared to
18 # aes_core.c compiled with cl6x 6.0 with -mv6400+ -o2 options this
19 # code is 3.75x faster and almost 3x smaller (tables included).
20 #
21 # (*)   This means that there might be subtle correlation between data
22 #       and timing and one can wonder if it can be ... attacked:-(
23 #       On the other hand this also means that *if* one chooses to
24 #       implement *4* T-tables variant [instead of 1 T-table as in
25 #       this implementation, or in addition to], then one ought to
26 #       *interleave* them. Even though it complicates addressing,
27 #       references to interleaved tables would be guaranteed not to
28 #       clash. I reckon that it should be possible to break 8 cycles
29 #       per byte "barrier," i.e. improve by ~20%, naturally at the
30 #       cost of 8x increased pressure on L1D. 8x because you'd have
31 #       to interleave both Te and Td tables...
32
33 while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
34 open STDOUT,">$output";
35
36 ($TEA,$TEB)=("A5","B5");
37 ($KPA,$KPB)=("A3","B1");
38 @K=("A6","B6","A7","B7");
39 @s=("A8","B8","A9","B9");
40 @Te0=@Td0=("A16","B16","A17","B17");
41 @Te1=@Td1=("A18","B18","A19","B19");
42 @Te2=@Td2=("A20","B20","A21","B21");
43 @Te3=@Td3=("A22","B22","A23","B23");
44
45 $code=<<___;
46         .text
47
48         .if     .ASSEMBLER_VERSION<7000000
49         .asg    0,__TI_EABI__
50         .endif
51         .if     __TI_EABI__
52         .nocmp
53         .asg    AES_encrypt,_AES_encrypt
54         .asg    AES_decrypt,_AES_decrypt
55         .asg    AES_set_encrypt_key,_AES_set_encrypt_key
56         .asg    AES_set_decrypt_key,_AES_set_decrypt_key
57         .asg    AES_ctr32_encrypt,_AES_ctr32_encrypt
58         .endif
59
60         .asg    B3,RA
61         .asg    A4,INP
62         .asg    B4,OUT
63         .asg    A6,KEY
64         .asg    A4,RET
65         .asg    B15,SP
66
67         .eval   24,EXT0
68         .eval   16,EXT1
69         .eval   8,EXT2
70         .eval   0,EXT3
71         .eval   8,TBL1
72         .eval   16,TBL2
73         .eval   24,TBL3
74
75         .if     .BIG_ENDIAN
76         .eval   24-EXT0,EXT0
77         .eval   24-EXT1,EXT1
78         .eval   24-EXT2,EXT2
79         .eval   24-EXT3,EXT3
80         .eval   32-TBL1,TBL1
81         .eval   32-TBL2,TBL2
82         .eval   32-TBL3,TBL3
83         .endif
84
85         .global _AES_encrypt
86 _AES_encrypt:
87         .asmfunc
88         MVK     1,B2
89 __encrypt:
90         .if     __TI_EABI__
91    [B2] LDNDW   *INP++,A9:A8                    ; load input
92 ||      MVKL    \$PCR_OFFSET(AES_Te,__encrypt),$TEA
93 ||      ADDKPC  __encrypt,B0
94    [B2] LDNDW   *INP++,B9:B8
95 ||      MVKH    \$PCR_OFFSET(AES_Te,__encrypt),$TEA
96 ||      ADD     0,KEY,$KPA
97 ||      ADD     4,KEY,$KPB
98         .else
99    [B2] LDNDW   *INP++,A9:A8                    ; load input
100 ||      MVKL    (AES_Te-__encrypt),$TEA
101 ||      ADDKPC  __encrypt,B0
102    [B2] LDNDW   *INP++,B9:B8
103 ||      MVKH    (AES_Te-__encrypt),$TEA
104 ||      ADD     0,KEY,$KPA
105 ||      ADD     4,KEY,$KPB
106         .endif
107         LDW     *$KPA++[2],$Te0[0]              ; zero round key
108 ||      LDW     *$KPB++[2],$Te0[1]
109 ||      MVK     60,A0
110 ||      ADD     B0,$TEA,$TEA                    ; AES_Te
111         LDW     *KEY[A0],B0                     ; rounds
112 ||      MVK     1024,A0                         ; sizeof(AES_Te)
113         LDW     *$KPA++[2],$Te0[2]
114 ||      LDW     *$KPB++[2],$Te0[3]
115 ||      MV      $TEA,$TEB
116         NOP
117         .if     .BIG_ENDIAN
118         MV      A9,$s[0]
119 ||      MV      A8,$s[1]
120 ||      MV      B9,$s[2]
121 ||      MV      B8,$s[3]
122         .else
123         MV      A8,$s[0]
124 ||      MV      A9,$s[1]
125 ||      MV      B8,$s[2]
126 ||      MV      B9,$s[3]
127         .endif
128         XOR     $Te0[0],$s[0],$s[0]
129 ||      XOR     $Te0[1],$s[1],$s[1]
130 ||      LDW     *$KPA++[2],$K[0]                ; 1st round key
131 ||      LDW     *$KPB++[2],$K[1]
132         SUB     B0,2,B0
133
134         SPLOOPD 13
135 ||      MVC     B0,ILC
136 ||      LDW     *$KPA++[2],$K[2]
137 ||      LDW     *$KPB++[2],$K[3]
138 ;;====================================================================
139         EXTU    $s[1],EXT1,24,$Te1[1]
140 ||      EXTU    $s[0],EXT3,24,$Te3[0]
141         LDW     *${TEB}[$Te1[1]],$Te1[1]        ; Te1[s1>>8],   t0
142 ||      LDW     *${TEA}[$Te3[0]],$Te3[0]        ; Te3[s0>>24],  t1
143 ||      XOR     $s[2],$Te0[2],$s[2]             ; modulo-scheduled
144 ||      XOR     $s[3],$Te0[3],$s[3]             ; modulo-scheduled
145 ||      EXTU    $s[1],EXT3,24,$Te3[1]
146 ||      EXTU    $s[0],EXT1,24,$Te1[0]
147         LDW     *${TEB}[$Te3[1]],$Te3[1]        ; Te3[s1>>24],  t2
148 ||      LDW     *${TEA}[$Te1[0]],$Te1[0]        ; Te1[s0>>8],   t3
149 ||      EXTU    $s[2],EXT2,24,$Te2[2]
150 ||      EXTU    $s[3],EXT2,24,$Te2[3]
151         LDW     *${TEA}[$Te2[2]],$Te2[2]        ; Te2[s2>>16],  t0
152 ||      LDW     *${TEB}[$Te2[3]],$Te2[3]        ; Te2[s3>>16],  t1
153 ||      EXTU    $s[3],EXT3,24,$Te3[3]
154 ||      EXTU    $s[2],EXT1,24,$Te1[2]
155         LDW     *${TEB}[$Te3[3]],$Te3[3]        ; Te3[s3>>24],  t0
156 ||      LDW     *${TEA}[$Te1[2]],$Te1[2]        ; Te1[s2>>8],   t1
157 ||      EXTU    $s[0],EXT2,24,$Te2[0]
158 ||      EXTU    $s[1],EXT2,24,$Te2[1]
159         LDW     *${TEA}[$Te2[0]],$Te2[0]        ; Te2[s0>>16],  t2
160 ||      LDW     *${TEB}[$Te2[1]],$Te2[1]        ; Te2[s1>>16],  t3
161 ||      EXTU    $s[3],EXT1,24,$Te1[3]
162 ||      EXTU    $s[2],EXT3,24,$Te3[2]
163         LDW     *${TEB}[$Te1[3]],$Te1[3]        ; Te1[s3>>8],   t2
164 ||      LDW     *${TEA}[$Te3[2]],$Te3[2]        ; Te3[s2>>24],  t3
165 ||      ROTL    $Te1[1],TBL1,$Te3[0]            ; t0
166 ||      ROTL    $Te3[0],TBL3,$Te1[1]            ; t1
167 ||      EXTU    $s[0],EXT0,24,$Te0[0]
168 ||      EXTU    $s[1],EXT0,24,$Te0[1]
169         LDW     *${TEA}[$Te0[0]],$Te0[0]        ; Te0[s0],      t0
170 ||      LDW     *${TEB}[$Te0[1]],$Te0[1]        ; Te0[s1],      t1
171 ||      ROTL    $Te3[1],TBL3,$Te1[0]            ; t2
172 ||      ROTL    $Te1[0],TBL1,$Te3[1]            ; t3
173 ||      EXTU    $s[2],EXT0,24,$Te0[2]
174 ||      EXTU    $s[3],EXT0,24,$Te0[3]
175         LDW     *${TEA}[$Te0[2]],$Te0[2]        ; Te0[s2],      t2
176 ||      LDW     *${TEB}[$Te0[3]],$Te0[3]        ; Te0[s3],      t3
177 ||      ROTL    $Te2[2],TBL2,$Te2[2]            ; t0
178 ||      ROTL    $Te2[3],TBL2,$Te2[3]            ; t1
179 ||      XOR     $K[0],$Te3[0],$s[0]
180 ||      XOR     $K[1],$Te1[1],$s[1]
181         ROTL    $Te3[3],TBL3,$Te1[2]            ; t0
182 ||      ROTL    $Te1[2],TBL1,$Te3[3]            ; t1
183 ||      XOR     $K[2],$Te1[0],$s[2]
184 ||      XOR     $K[3],$Te3[1],$s[3]
185 ||      LDW     *$KPA++[2],$K[0]                ; next round key
186 ||      LDW     *$KPB++[2],$K[1]
187         ROTL    $Te2[0],TBL2,$Te2[0]            ; t2
188 ||      ROTL    $Te2[1],TBL2,$Te2[1]            ; t3
189 ||      XOR     $s[0],$Te2[2],$s[0]
190 ||      XOR     $s[1],$Te2[3],$s[1]
191 ||      LDW     *$KPA++[2],$K[2]
192 ||      LDW     *$KPB++[2],$K[3]
193         ROTL    $Te1[3],TBL1,$Te3[2]            ; t2
194 ||      ROTL    $Te3[2],TBL3,$Te1[3]            ; t3
195 ||      XOR     $s[0],$Te1[2],$s[0]
196 ||      XOR     $s[1],$Te3[3],$s[1]
197         XOR     $s[2],$Te2[0],$s[2]
198 ||      XOR     $s[3],$Te2[1],$s[3]
199 ||      XOR     $s[0],$Te0[0],$s[0]
200 ||      XOR     $s[1],$Te0[1],$s[1]
201         SPKERNEL
202 ||      XOR.L   $s[2],$Te3[2],$s[2]
203 ||      XOR.L   $s[3],$Te1[3],$s[3]
204 ;;====================================================================
205         ADD.D   ${TEA},A0,${TEA}                ; point to Te4
206 ||      ADD.D   ${TEB},A0,${TEB}
207 ||      EXTU    $s[1],EXT1,24,$Te1[1]
208 ||      EXTU    $s[0],EXT3,24,$Te3[0]
209         LDBU    *${TEB}[$Te1[1]],$Te1[1]        ; Te1[s1>>8],   t0
210 ||      LDBU    *${TEA}[$Te3[0]],$Te3[0]        ; Te3[s0>>24],  t1
211 ||      XOR     $s[2],$Te0[2],$s[2]             ; modulo-scheduled
212 ||      XOR     $s[3],$Te0[3],$s[3]             ; modulo-scheduled
213 ||      EXTU    $s[0],EXT0,24,$Te0[0]
214 ||      EXTU    $s[1],EXT0,24,$Te0[1]
215         LDBU    *${TEA}[$Te0[0]],$Te0[0]        ; Te0[s0],      t0
216 ||      LDBU    *${TEB}[$Te0[1]],$Te0[1]        ; Te0[s1],      t1
217 ||      EXTU    $s[3],EXT3,24,$Te3[3]
218 ||      EXTU    $s[2],EXT1,24,$Te1[2]
219         LDBU    *${TEB}[$Te3[3]],$Te3[3]        ; Te3[s3>>24],  t0
220 ||      LDBU    *${TEA}[$Te1[2]],$Te1[2]        ; Te1[s2>>8],   t1
221 ||      EXTU    $s[2],EXT2,24,$Te2[2]
222 ||      EXTU    $s[3],EXT2,24,$Te2[3]
223         LDBU    *${TEA}[$Te2[2]],$Te2[2]        ; Te2[s2>>16],  t0
224 ||      LDBU    *${TEB}[$Te2[3]],$Te2[3]        ; Te2[s3>>16],  t1
225 ||      EXTU    $s[1],EXT3,24,$Te3[1]
226 ||      EXTU    $s[0],EXT1,24,$Te1[0]
227         LDBU    *${TEB}[$Te3[1]],$Te3[1]        ; Te3[s1>>24],  t2
228 ||      LDBU    *${TEA}[$Te1[0]],$Te1[0]        ; Te1[s0>>8],   t3
229 ||      EXTU    $s[3],EXT1,24,$Te1[3]
230 ||      EXTU    $s[2],EXT3,24,$Te3[2]
231         LDBU    *${TEB}[$Te1[3]],$Te1[3]        ; Te1[s3>>8],   t2
232 ||      LDBU    *${TEA}[$Te3[2]],$Te3[2]        ; Te3[s2>>24],  t3
233 ||      EXTU    $s[2],EXT0,24,$Te0[2]
234 ||      EXTU    $s[3],EXT0,24,$Te0[3]
235         LDBU    *${TEA}[$Te0[2]],$Te0[2]        ; Te0[s2],      t2
236 ||      LDBU    *${TEB}[$Te0[3]],$Te0[3]        ; Te0[s3],      t3
237 ||      EXTU    $s[0],EXT2,24,$Te2[0]
238 ||      EXTU    $s[1],EXT2,24,$Te2[1]
239         LDBU    *${TEA}[$Te2[0]],$Te2[0]        ; Te2[s0>>16],  t2
240 ||      LDBU    *${TEB}[$Te2[1]],$Te2[1]        ; Te2[s1>>16],  t3
241
242         .if     .BIG_ENDIAN
243         PACK2   $Te0[0],$Te1[1],$Te0[0]
244 ||      PACK2   $Te0[1],$Te1[2],$Te0[1]
245         PACK2   $Te2[2],$Te3[3],$Te2[2]
246 ||      PACK2   $Te2[3],$Te3[0],$Te2[3]
247         PACKL4  $Te0[0],$Te2[2],$Te0[0]
248 ||      PACKL4  $Te0[1],$Te2[3],$Te0[1]
249         XOR     $K[0],$Te0[0],$Te0[0]           ; s[0]
250 ||      XOR     $K[1],$Te0[1],$Te0[1]           ; s[1]
251
252         PACK2   $Te0[2],$Te1[3],$Te0[2]
253 ||      PACK2   $Te0[3],$Te1[0],$Te0[3]
254         PACK2   $Te2[0],$Te3[1],$Te2[0]
255 ||      PACK2   $Te2[1],$Te3[2],$Te2[1]
256 ||      BNOP    RA
257         PACKL4  $Te0[2],$Te2[0],$Te0[2]
258 ||      PACKL4  $Te0[3],$Te2[1],$Te0[3]
259         XOR     $K[2],$Te0[2],$Te0[2]           ; s[2]
260 ||      XOR     $K[3],$Te0[3],$Te0[3]           ; s[3]
261
262         MV      $Te0[0],A9
263 ||      MV      $Te0[1],A8
264         MV      $Te0[2],B9
265 ||      MV      $Te0[3],B8
266 || [B2] STNDW   A9:A8,*OUT++
267    [B2] STNDW   B9:B8,*OUT++
268         .else
269         PACK2   $Te1[1],$Te0[0],$Te1[1]
270 ||      PACK2   $Te1[2],$Te0[1],$Te1[2]
271         PACK2   $Te3[3],$Te2[2],$Te3[3]
272 ||      PACK2   $Te3[0],$Te2[3],$Te3[0]
273         PACKL4  $Te3[3],$Te1[1],$Te1[1]
274 ||      PACKL4  $Te3[0],$Te1[2],$Te1[2]
275         XOR     $K[0],$Te1[1],$Te1[1]           ; s[0]
276 ||      XOR     $K[1],$Te1[2],$Te1[2]           ; s[1]
277
278         PACK2   $Te1[3],$Te0[2],$Te1[3]
279 ||      PACK2   $Te1[0],$Te0[3],$Te1[0]
280         PACK2   $Te3[1],$Te2[0],$Te3[1]
281 ||      PACK2   $Te3[2],$Te2[1],$Te3[2]
282 ||      BNOP    RA
283         PACKL4  $Te3[1],$Te1[3],$Te1[3]
284 ||      PACKL4  $Te3[2],$Te1[0],$Te1[0]
285         XOR     $K[2],$Te1[3],$Te1[3]           ; s[2]
286 ||      XOR     $K[3],$Te1[0],$Te1[0]           ; s[3]
287
288         MV      $Te1[1],A8
289 ||      MV      $Te1[2],A9
290         MV      $Te1[3],B8
291 ||      MV      $Te1[0],B9
292 || [B2] STNDW   A9:A8,*OUT++
293    [B2] STNDW   B9:B8,*OUT++
294         .endif
295         .endasmfunc
296
297         .global _AES_decrypt
298 _AES_decrypt:
299         .asmfunc
300         MVK     1,B2
301 __decrypt:
302         .if     __TI_EABI__
303    [B2] LDNDW   *INP++,A9:A8                    ; load input
304 ||      MVKL    \$PCR_OFFSET(AES_Td,__decrypt),$TEA
305 ||      ADDKPC  __decrypt,B0
306    [B2] LDNDW   *INP++,B9:B8
307 ||      MVKH    \$PCR_OFFSET(AES_Td,__decrypt),$TEA
308 ||      ADD     0,KEY,$KPA
309 ||      ADD     4,KEY,$KPB
310         .else
311    [B2] LDNDW   *INP++,A9:A8                    ; load input
312 ||      MVKL    (AES_Td-__decrypt),$TEA
313 ||      ADDKPC  __decrypt,B0
314    [B2] LDNDW   *INP++,B9:B8
315 ||      MVKH    (AES_Td-__decrypt),$TEA
316 ||      ADD     0,KEY,$KPA
317 ||      ADD     4,KEY,$KPB
318         .endif
319         LDW     *$KPA++[2],$Td0[0]              ; zero round key
320 ||      LDW     *$KPB++[2],$Td0[1]
321 ||      MVK     60,A0
322 ||      ADD     B0,$TEA,$TEA                    ; AES_Td
323         LDW     *KEY[A0],B0                     ; rounds
324 ||      MVK     1024,A0                         ; sizeof(AES_Td)
325         LDW     *$KPA++[2],$Td0[2]
326 ||      LDW     *$KPB++[2],$Td0[3]
327 ||      MV      $TEA,$TEB
328         NOP
329         .if     .BIG_ENDIAN
330         MV      A9,$s[0]
331 ||      MV      A8,$s[1]
332 ||      MV      B9,$s[2]
333 ||      MV      B8,$s[3]
334         .else
335         MV      A8,$s[0]
336 ||      MV      A9,$s[1]
337 ||      MV      B8,$s[2]
338 ||      MV      B9,$s[3]
339         .endif
340         XOR     $Td0[0],$s[0],$s[0]
341 ||      XOR     $Td0[1],$s[1],$s[1]
342 ||      LDW     *$KPA++[2],$K[0]                ; 1st round key
343 ||      LDW     *$KPB++[2],$K[1]
344         SUB     B0,2,B0
345
346         SPLOOPD 13
347 ||      MVC     B0,ILC
348 ||      LDW     *$KPA++[2],$K[2]
349 ||      LDW     *$KPB++[2],$K[3]
350 ;;====================================================================
351         EXTU    $s[1],EXT3,24,$Td3[1]
352 ||      EXTU    $s[0],EXT1,24,$Td1[0]
353         LDW     *${TEB}[$Td3[1]],$Td3[1]        ; Td3[s1>>24],  t0
354 ||      LDW     *${TEA}[$Td1[0]],$Td1[0]        ; Td1[s0>>8],   t1
355 ||      XOR     $s[2],$Td0[2],$s[2]             ; modulo-scheduled
356 ||      XOR     $s[3],$Td0[3],$s[3]             ; modulo-scheduled
357 ||      EXTU    $s[1],EXT1,24,$Td1[1]
358 ||      EXTU    $s[0],EXT3,24,$Td3[0]
359         LDW     *${TEB}[$Td1[1]],$Td1[1]        ; Td1[s1>>8],   t2
360 ||      LDW     *${TEA}[$Td3[0]],$Td3[0]        ; Td3[s0>>24],  t3
361 ||      EXTU    $s[2],EXT2,24,$Td2[2]
362 ||      EXTU    $s[3],EXT2,24,$Td2[3]
363         LDW     *${TEA}[$Td2[2]],$Td2[2]        ; Td2[s2>>16],  t0
364 ||      LDW     *${TEB}[$Td2[3]],$Td2[3]        ; Td2[s3>>16],  t1
365 ||      EXTU    $s[3],EXT1,24,$Td1[3]
366 ||      EXTU    $s[2],EXT3,24,$Td3[2]
367         LDW     *${TEB}[$Td1[3]],$Td1[3]        ; Td1[s3>>8],   t0
368 ||      LDW     *${TEA}[$Td3[2]],$Td3[2]        ; Td3[s2>>24],  t1
369 ||      EXTU    $s[0],EXT2,24,$Td2[0]
370 ||      EXTU    $s[1],EXT2,24,$Td2[1]
371         LDW     *${TEA}[$Td2[0]],$Td2[0]        ; Td2[s0>>16],  t2
372 ||      LDW     *${TEB}[$Td2[1]],$Td2[1]        ; Td2[s1>>16],  t3
373 ||      EXTU    $s[3],EXT3,24,$Td3[3]
374 ||      EXTU    $s[2],EXT1,24,$Td1[2]
375         LDW     *${TEB}[$Td3[3]],$Td3[3]        ; Td3[s3>>24],  t2
376 ||      LDW     *${TEA}[$Td1[2]],$Td1[2]        ; Td1[s2>>8],   t3
377 ||      ROTL    $Td3[1],TBL3,$Td1[0]            ; t0
378 ||      ROTL    $Td1[0],TBL1,$Td3[1]            ; t1
379 ||      EXTU    $s[0],EXT0,24,$Td0[0]
380 ||      EXTU    $s[1],EXT0,24,$Td0[1]
381         LDW     *${TEA}[$Td0[0]],$Td0[0]        ; Td0[s0],      t0
382 ||      LDW     *${TEB}[$Td0[1]],$Td0[1]        ; Td0[s1],      t1
383 ||      ROTL    $Td1[1],TBL1,$Td3[0]            ; t2
384 ||      ROTL    $Td3[0],TBL3,$Td1[1]            ; t3
385 ||      EXTU    $s[2],EXT0,24,$Td0[2]
386 ||      EXTU    $s[3],EXT0,24,$Td0[3]
387         LDW     *${TEA}[$Td0[2]],$Td0[2]        ; Td0[s2],      t2
388 ||      LDW     *${TEB}[$Td0[3]],$Td0[3]        ; Td0[s3],      t3
389 ||      ROTL    $Td2[2],TBL2,$Td2[2]            ; t0
390 ||      ROTL    $Td2[3],TBL2,$Td2[3]            ; t1
391 ||      XOR     $K[0],$Td1[0],$s[0]
392 ||      XOR     $K[1],$Td3[1],$s[1]
393         ROTL    $Td1[3],TBL1,$Td3[2]            ; t0
394 ||      ROTL    $Td3[2],TBL3,$Td1[3]            ; t1
395 ||      XOR     $K[2],$Td3[0],$s[2]
396 ||      XOR     $K[3],$Td1[1],$s[3]
397 ||      LDW     *$KPA++[2],$K[0]                ; next round key
398 ||      LDW     *$KPB++[2],$K[1]
399         ROTL    $Td2[0],TBL2,$Td2[0]            ; t2
400 ||      ROTL    $Td2[1],TBL2,$Td2[1]            ; t3
401 ||      XOR     $s[0],$Td2[2],$s[0]
402 ||      XOR     $s[1],$Td2[3],$s[1]
403 ||      LDW     *$KPA++[2],$K[2]
404 ||      LDW     *$KPB++[2],$K[3]
405         ROTL    $Td3[3],TBL3,$Td1[2]            ; t2
406 ||      ROTL    $Td1[2],TBL1,$Td3[3]            ; t3
407 ||      XOR     $s[0],$Td3[2],$s[0]
408 ||      XOR     $s[1],$Td1[3],$s[1]
409         XOR     $s[2],$Td2[0],$s[2]
410 ||      XOR     $s[3],$Td2[1],$s[3]
411 ||      XOR     $s[0],$Td0[0],$s[0]
412 ||      XOR     $s[1],$Td0[1],$s[1]
413         SPKERNEL
414 ||      XOR.L   $s[2],$Td1[2],$s[2]
415 ||      XOR.L   $s[3],$Td3[3],$s[3]
416 ;;====================================================================
417         ADD.D   ${TEA},A0,${TEA}                ; point to Td4
418 ||      ADD.D   ${TEB},A0,${TEB}
419 ||      EXTU    $s[1],EXT3,24,$Td3[1]
420 ||      EXTU    $s[0],EXT1,24,$Td1[0]
421         LDBU    *${TEB}[$Td3[1]],$Td3[1]        ; Td3[s1>>24],  t0
422 ||      LDBU    *${TEA}[$Td1[0]],$Td1[0]        ; Td1[s0>>8],   t1
423 ||      XOR     $s[2],$Td0[2],$s[2]             ; modulo-scheduled
424 ||      XOR     $s[3],$Td0[3],$s[3]             ; modulo-scheduled
425 ||      EXTU    $s[0],EXT0,24,$Td0[0]
426 ||      EXTU    $s[1],EXT0,24,$Td0[1]
427         LDBU    *${TEA}[$Td0[0]],$Td0[0]        ; Td0[s0],      t0
428 ||      LDBU    *${TEB}[$Td0[1]],$Td0[1]        ; Td0[s1],      t1
429 ||      EXTU    $s[2],EXT2,24,$Td2[2]
430 ||      EXTU    $s[3],EXT2,24,$Td2[3]
431         LDBU    *${TEA}[$Td2[2]],$Td2[2]        ; Td2[s2>>16],  t0
432 ||      LDBU    *${TEB}[$Td2[3]],$Td2[3]        ; Td2[s3>>16],  t1
433 ||      EXTU    $s[3],EXT1,24,$Td1[3]
434 ||      EXTU    $s[2],EXT3,24,$Td3[2]
435         LDBU    *${TEB}[$Td1[3]],$Td1[3]        ; Td1[s3>>8],   t0
436 ||      LDBU    *${TEA}[$Td3[2]],$Td3[2]        ; Td3[s2>>24],  t1
437 ||      EXTU    $s[1],EXT1,24,$Td1[1]
438 ||      EXTU    $s[0],EXT3,24,$Td3[0]
439         LDBU    *${TEB}[$Td1[1]],$Td1[1]        ; Td1[s1>>8],   t2
440 ||      LDBU    *${TEA}[$Td3[0]],$Td3[0]        ; Td3[s0>>24],  t3
441 ||      EXTU    $s[0],EXT2,24,$Td2[0]
442 ||      EXTU    $s[1],EXT2,24,$Td2[1]
443         LDBU    *${TEA}[$Td2[0]],$Td2[0]        ; Td2[s0>>16],  t2
444 ||      LDBU    *${TEB}[$Td2[1]],$Td2[1]        ; Td2[s1>>16],  t3
445 ||      EXTU    $s[3],EXT3,24,$Td3[3]
446 ||      EXTU    $s[2],EXT1,24,$Td1[2]
447         LDBU    *${TEB}[$Td3[3]],$Td3[3]        ; Td3[s3>>24],  t2
448 ||      LDBU    *${TEA}[$Td1[2]],$Td1[2]        ; Td1[s2>>8],   t3
449 ||      EXTU    $s[2],EXT0,24,$Td0[2]
450 ||      EXTU    $s[3],EXT0,24,$Td0[3]
451         LDBU    *${TEA}[$Td0[2]],$Td0[2]        ; Td0[s2],      t2
452 ||      LDBU    *${TEB}[$Td0[3]],$Td0[3]        ; Td0[s3],      t3
453
454         .if     .BIG_ENDIAN
455         PACK2   $Td0[0],$Td1[3],$Td0[0]
456 ||      PACK2   $Td0[1],$Td1[0],$Td0[1]
457         PACK2   $Td2[2],$Td3[1],$Td2[2]
458 ||      PACK2   $Td2[3],$Td3[2],$Td2[3]
459         PACKL4  $Td0[0],$Td2[2],$Td0[0]
460 ||      PACKL4  $Td0[1],$Td2[3],$Td0[1]
461         XOR     $K[0],$Td0[0],$Td0[0]           ; s[0]
462 ||      XOR     $K[1],$Td0[1],$Td0[1]           ; s[1]
463
464         PACK2   $Td0[2],$Td1[1],$Td0[2]
465 ||      PACK2   $Td0[3],$Td1[2],$Td0[3]
466         PACK2   $Td2[0],$Td3[3],$Td2[0]
467 ||      PACK2   $Td2[1],$Td3[0],$Td2[1]
468 ||      BNOP    RA
469         PACKL4  $Td0[2],$Td2[0],$Td0[2]
470 ||      PACKL4  $Td0[3],$Td2[1],$Td0[3]
471         XOR     $K[2],$Td0[2],$Td0[2]           ; s[2]
472 ||      XOR     $K[3],$Td0[3],$Td0[3]           ; s[3]
473
474         MV      $Td0[0],A9
475 ||      MV      $Td0[1],A8
476         MV      $Td0[2],B9
477 ||      MV      $Td0[3],B8
478 || [B2] STNDW   A9:A8,*OUT++
479    [B2] STNDW   B9:B8,*OUT++
480         .else
481         PACK2   $Td1[3],$Td0[0],$Td1[3]
482 ||      PACK2   $Td1[0],$Td0[1],$Td1[0]
483         PACK2   $Td3[1],$Td2[2],$Td3[1]
484 ||      PACK2   $Td3[2],$Td2[3],$Td3[2]
485         PACKL4  $Td3[1],$Td1[3],$Td1[3]
486 ||      PACKL4  $Td3[2],$Td1[0],$Td1[0]
487         XOR     $K[0],$Td1[3],$Td1[3]           ; s[0]
488 ||      XOR     $K[1],$Td1[0],$Td1[0]           ; s[1]
489
490         PACK2   $Td1[1],$Td0[2],$Td1[1]
491 ||      PACK2   $Td1[2],$Td0[3],$Td1[2]
492         PACK2   $Td3[3],$Td2[0],$Td3[3]
493 ||      PACK2   $Td3[0],$Td2[1],$Td3[0]
494 ||      BNOP    RA
495         PACKL4  $Td3[3],$Td1[1],$Td1[1]
496 ||      PACKL4  $Td3[0],$Td1[2],$Td1[2]
497         XOR     $K[2],$Td1[1],$Td1[1]           ; s[2]
498 ||      XOR     $K[3],$Td1[2],$Td1[2]           ; s[3]
499
500         MV      $Td1[3],A8
501 ||      MV      $Td1[0],A9
502         MV      $Td1[1],B8
503 ||      MV      $Td1[2],B9
504 || [B2] STNDW   A9:A8,*OUT++
505    [B2] STNDW   B9:B8,*OUT++
506         .endif
507         .endasmfunc
508 ___
509 {
510 my @K=(@K,@s);                  # extended key
511 my @Te4=map("B$_",(16..19));
512
513 my @Kx9=@Te0;                   # used in AES_set_decrypt_key
514 my @KxB=@Te1;
515 my @KxD=@Te2;
516 my @KxE=@Te3;
517
518 $code.=<<___;
519         .asg    OUT,BITS
520
521         .global _AES_set_encrypt_key
522 _AES_set_encrypt_key:
523 __set_encrypt_key:
524         .asmfunc
525         MV      INP,A0
526 ||      SHRU    BITS,5,BITS                     ; 128-192-256 -> 4-6-8
527 ||      MV      KEY,A1
528   [!A0] B       RA
529 ||[!A0] MVK     -1,RET
530 ||[!A0] MVK     1,A1                            ; only one B RA
531   [!A1] B       RA
532 ||[!A1] MVK     -1,RET
533 ||[!A1] MVK     0,A0
534 ||      MVK     0,B0
535 ||      MVK     0,A1
536    [A0] LDNDW   *INP++,A9:A8
537 || [A0] CMPEQ   4,BITS,B0
538 || [A0] CMPLT   3,BITS,A1
539    [B0] B       key128?
540 || [A1] LDNDW   *INP++,B9:B8
541 || [A0] CMPEQ   6,BITS,B0
542 || [A0] CMPLT   5,BITS,A1
543    [B0] B       key192?
544 || [A1] LDNDW   *INP++,B17:B16
545 || [A0] CMPEQ   8,BITS,B0
546 || [A0] CMPLT   7,BITS,A1
547    [B0] B       key256?
548 || [A1] LDNDW   *INP++,B19:B18
549
550         .if     __TI_EABI__
551    [A0] ADD     0,KEY,$KPA
552 || [A0] ADD     4,KEY,$KPB
553 || [A0] MVKL    \$PCR_OFFSET(AES_Te4,__set_encrypt_key),$TEA
554 || [A0] ADDKPC  __set_encrypt_key,B6
555    [A0] MVKH    \$PCR_OFFSET(AES_Te4,__set_encrypt_key),$TEA
556    [A0] ADD     B6,$TEA,$TEA                    ; AES_Te4
557         .else
558    [A0] ADD     0,KEY,$KPA
559 || [A0] ADD     4,KEY,$KPB
560 || [A0] MVKL    (AES_Te4-__set_encrypt_key),$TEA
561 || [A0] ADDKPC  __set_encrypt_key,B6
562    [A0] MVKH    (AES_Te4-__set_encrypt_key),$TEA
563    [A0] ADD     B6,$TEA,$TEA                    ; AES_Te4
564         .endif
565         NOP
566         NOP
567
568         BNOP    RA,5
569 ||      MVK     -2,RET                          ; unknown bit lenght
570 ||      MVK     0,B0                            ; redundant
571 ;;====================================================================
572 ;;====================================================================
573 key128?:
574         .if     .BIG_ENDIAN
575         MV      A9,$K[0]
576 ||      MV      A8,$K[1]
577 ||      MV      B9,$Te4[2]
578 ||      MV      B8,$K[3]
579         .else
580         MV      A8,$K[0]
581 ||      MV      A9,$K[1]
582 ||      MV      B8,$Te4[2]
583 ||      MV      B9,$K[3]
584         .endif
585
586         MVK     256,A0
587 ||      MVK     9,B0
588
589         SPLOOPD 14
590 ||      MVC     B0,ILC
591 ||      MV      $TEA,$TEB
592 ||      ADD     $TEA,A0,A30                     ; rcon
593 ;;====================================================================
594         LDW     *A30++[1],A31                   ; rcon[i]
595 ||      MV      $Te4[2],$K[2]
596 ||      EXTU    $K[3],EXT1,24,$Te4[0]
597         LDBU    *${TEB}[$Te4[0]],$Te4[0]
598 ||      MV      $K[3],A0
599 ||      EXTU    $K[3],EXT2,24,$Te4[1]
600         LDBU    *${TEB}[$Te4[1]],$Te4[1]
601 ||      EXTU    A0,EXT3,24,A0
602 ||      EXTU    $K[3],EXT0,24,$Te4[3]
603         .if     .BIG_ENDIAN
604         LDBU    *${TEA}[A0],$Te4[3]
605 ||      LDBU    *${TEB}[$Te4[3]],A0
606         .else
607         LDBU    *${TEA}[A0],A0
608 ||      LDBU    *${TEB}[$Te4[3]],$Te4[3]
609         .endif
610
611         STW     $K[0],*$KPA++[2]
612 ||      STW     $K[1],*$KPB++[2]
613         STW     $K[2],*$KPA++[2]
614 ||      STW     $K[3],*$KPB++[2]
615
616         XOR     A31,$K[0],$K[0]                 ; ^=rcon[i]
617         .if     .BIG_ENDIAN
618         PACK2   $Te4[0],$Te4[1],$Te4[1]
619         PACK2   $Te4[3],A0,$Te4[3]
620         PACKL4  $Te4[1],$Te4[3],$Te4[3]
621         .else
622         PACK2   $Te4[1],$Te4[0],$Te4[1]
623         PACK2   $Te4[3],A0,$Te4[3]
624         PACKL4  $Te4[3],$Te4[1],$Te4[3]
625         .endif
626         XOR     $Te4[3],$K[0],$Te4[0]           ; K[0]
627         XOR     $Te4[0],$K[1],$K[1]             ; K[1]
628         MV      $Te4[0],$K[0]
629 ||      XOR     $K[1],$K[2],$Te4[2]             ; K[2]
630         XOR     $Te4[2],$K[3],$K[3]             ; K[3]
631         SPKERNEL
632 ;;====================================================================
633         BNOP    RA
634         MV      $Te4[2],$K[2]
635 ||      STW     $K[0],*$KPA++[2]
636 ||      STW     $K[1],*$KPB++[2]
637         STW     $K[2],*$KPA++[2]
638 ||      STW     $K[3],*$KPB++[2]
639         MVK     10,B0                           ; rounds
640         STW     B0,*++${KPB}[15]
641         MVK     0,RET
642 ;;====================================================================
643 ;;====================================================================
644 key192?:
645         .if     .BIG_ENDIAN
646         MV      A9,$K[0]
647 ||      MV      A8,$K[1]
648 ||      MV      B9,$K[2]
649 ||      MV      B8,$K[3]
650         MV      B17,$Te4[2]
651 ||      MV      B16,$K[5]
652         .else
653         MV      A8,$K[0]
654 ||      MV      A9,$K[1]
655 ||      MV      B8,$K[2]
656 ||      MV      B9,$K[3]
657         MV      B16,$Te4[2]
658 ||      MV      B17,$K[5]
659         .endif
660
661         MVK     256,A0
662 ||      MVK     6,B0
663         MV      $TEA,$TEB
664 ||      ADD     $TEA,A0,A30                     ; rcon
665 ;;====================================================================
666 loop192?:
667         LDW     *A30++[1],A31                   ; rcon[i]
668 ||      MV      $Te4[2],$K[4]
669 ||      EXTU    $K[5],EXT1,24,$Te4[0]
670         LDBU    *${TEB}[$Te4[0]],$Te4[0]
671 ||      MV      $K[5],A0
672 ||      EXTU    $K[5],EXT2,24,$Te4[1]
673         LDBU    *${TEB}[$Te4[1]],$Te4[1]
674 ||      EXTU    A0,EXT3,24,A0
675 ||      EXTU    $K[5],EXT0,24,$Te4[3]
676         .if     .BIG_ENDIAN
677         LDBU    *${TEA}[A0],$Te4[3]
678 ||      LDBU    *${TEB}[$Te4[3]],A0
679         .else
680         LDBU    *${TEA}[A0],A0
681 ||      LDBU    *${TEB}[$Te4[3]],$Te4[3]
682         .endif
683
684         STW     $K[0],*$KPA++[2]
685 ||      STW     $K[1],*$KPB++[2]
686         STW     $K[2],*$KPA++[2]
687 ||      STW     $K[3],*$KPB++[2]
688         STW     $K[4],*$KPA++[2]
689 ||      STW     $K[5],*$KPB++[2]
690
691         XOR     A31,$K[0],$K[0]                 ; ^=rcon[i]
692         .if     .BIG_ENDIAN
693         PACK2   $Te4[0],$Te4[1],$Te4[1]
694 ||      PACK2   $Te4[3],A0,$Te4[3]
695         PACKL4  $Te4[1],$Te4[3],$Te4[3]
696         .else
697         PACK2   $Te4[1],$Te4[0],$Te4[1]
698 ||      PACK2   $Te4[3],A0,$Te4[3]
699         PACKL4  $Te4[3],$Te4[1],$Te4[3]
700         .endif
701         BDEC    loop192?,B0
702 ||      XOR     $Te4[3],$K[0],$Te4[0]           ; K[0]
703         XOR     $Te4[0],$K[1],$K[1]             ; K[1]
704         MV      $Te4[0],$K[0]
705 ||      XOR     $K[1],$K[2],$Te4[2]             ; K[2]
706         XOR     $Te4[2],$K[3],$K[3]             ; K[3]
707         MV      $Te4[2],$K[2]
708 ||      XOR     $K[3],$K[4],$Te4[2]             ; K[4]
709         XOR     $Te4[2],$K[5],$K[5]             ; K[5]
710 ;;====================================================================
711         BNOP    RA
712         STW     $K[0],*$KPA++[2]
713 ||      STW     $K[1],*$KPB++[2]
714         STW     $K[2],*$KPA++[2]
715 ||      STW     $K[3],*$KPB++[2]
716         MVK     12,B0                           ; rounds
717         STW     B0,*++${KPB}[7]
718         MVK     0,RET
719 ;;====================================================================
720 ;;====================================================================
721 key256?:
722         .if     .BIG_ENDIAN
723         MV      A9,$K[0]
724 ||      MV      A8,$K[1]
725 ||      MV      B9,$K[2]
726 ||      MV      B8,$K[3]
727         MV      B17,$K[4]
728 ||      MV      B16,$K[5]
729 ||      MV      B19,$Te4[2]
730 ||      MV      B18,$K[7]
731         .else
732         MV      A8,$K[0]
733 ||      MV      A9,$K[1]
734 ||      MV      B8,$K[2]
735 ||      MV      B9,$K[3]
736         MV      B16,$K[4]
737 ||      MV      B17,$K[5]
738 ||      MV      B18,$Te4[2]
739 ||      MV      B19,$K[7]
740         .endif
741
742         MVK     256,A0
743 ||      MVK     6,B0
744         MV      $TEA,$TEB
745 ||      ADD     $TEA,A0,A30                     ; rcon
746 ;;====================================================================
747 loop256?:
748         LDW     *A30++[1],A31                   ; rcon[i]
749 ||      MV      $Te4[2],$K[6]
750 ||      EXTU    $K[7],EXT1,24,$Te4[0]
751         LDBU    *${TEB}[$Te4[0]],$Te4[0]
752 ||      MV      $K[7],A0
753 ||      EXTU    $K[7],EXT2,24,$Te4[1]
754         LDBU    *${TEB}[$Te4[1]],$Te4[1]
755 ||      EXTU    A0,EXT3,24,A0
756 ||      EXTU    $K[7],EXT0,24,$Te4[3]
757         .if     .BIG_ENDIAN
758         LDBU    *${TEA}[A0],$Te4[3]
759 ||      LDBU    *${TEB}[$Te4[3]],A0
760         .else
761         LDBU    *${TEA}[A0],A0
762 ||      LDBU    *${TEB}[$Te4[3]],$Te4[3]
763         .endif
764
765         STW     $K[0],*$KPA++[2]
766 ||      STW     $K[1],*$KPB++[2]
767         STW     $K[2],*$KPA++[2]
768 ||      STW     $K[3],*$KPB++[2]
769         STW     $K[4],*$KPA++[2]
770 ||      STW     $K[5],*$KPB++[2]
771         STW     $K[6],*$KPA++[2]
772 ||      STW     $K[7],*$KPB++[2]
773 ||      XOR     A31,$K[0],$K[0]                 ; ^=rcon[i]
774         .if     .BIG_ENDIAN
775         PACK2   $Te4[0],$Te4[1],$Te4[1]
776 ||      PACK2   $Te4[3],A0,$Te4[3]
777         PACKL4  $Te4[1],$Te4[3],$Te4[3]
778 ||[!B0] B       done256?
779         .else
780         PACK2   $Te4[1],$Te4[0],$Te4[1]
781 ||      PACK2   $Te4[3],A0,$Te4[3]
782         PACKL4  $Te4[3],$Te4[1],$Te4[3]
783 ||[!B0] B       done256?
784         .endif
785         XOR     $Te4[3],$K[0],$Te4[0]           ; K[0]
786         XOR     $Te4[0],$K[1],$K[1]             ; K[1]
787         MV      $Te4[0],$K[0]
788 ||      XOR     $K[1],$K[2],$Te4[2]             ; K[2]
789         XOR     $Te4[2],$K[3],$K[3]             ; K[3]
790
791         MV      $Te4[2],$K[2]
792 || [B0] EXTU    $K[3],EXT0,24,$Te4[0]
793 || [B0] SUB     B0,1,B0
794         LDBU    *${TEB}[$Te4[0]],$Te4[0]
795 ||      MV      $K[3],A0
796 ||      EXTU    $K[3],EXT1,24,$Te4[1]
797         LDBU    *${TEB}[$Te4[1]],$Te4[1]
798 ||      EXTU    A0,EXT2,24,A0
799 ||      EXTU    $K[3],EXT3,24,$Te4[3]
800
801         .if     .BIG_ENDIAN
802         LDBU    *${TEA}[A0],$Te4[3]
803 ||      LDBU    *${TEB}[$Te4[3]],A0
804         NOP     3
805         PACK2   $Te4[0],$Te4[1],$Te4[1]
806         PACK2   $Te4[3],A0,$Te4[3]
807 ||      B       loop256?
808         PACKL4  $Te4[1],$Te4[3],$Te4[3]
809         .else
810         LDBU    *${TEA}[A0],A0
811 ||      LDBU    *${TEB}[$Te4[3]],$Te4[3]
812         NOP     3
813         PACK2   $Te4[1],$Te4[0],$Te4[1]
814         PACK2   $Te4[3],A0,$Te4[3]
815 ||      B       loop256?
816         PACKL4  $Te4[3],$Te4[1],$Te4[3]
817         .endif
818
819         XOR     $Te4[3],$K[4],$Te4[0]           ; K[4]
820         XOR     $Te4[0],$K[5],$K[5]             ; K[5]
821         MV      $Te4[0],$K[4]
822 ||      XOR     $K[5],$K[6],$Te4[2]             ; K[6]
823         XOR     $Te4[2],$K[7],$K[7]             ; K[7]
824 ;;====================================================================
825 done256?:
826         BNOP    RA
827         STW     $K[0],*$KPA++[2]
828 ||      STW     $K[1],*$KPB++[2]
829         STW     $K[2],*$KPA++[2]
830 ||      STW     $K[3],*$KPB++[2]
831         MVK     14,B0                           ; rounds
832         STW     B0,*--${KPB}[1]
833         MVK     0,RET
834         .endasmfunc
835
836         .global _AES_set_decrypt_key
837 _AES_set_decrypt_key:
838         .asmfunc
839         B       __set_encrypt_key               ; guarantee local call
840         MV      KEY,B30                         ; B30 is not modified
841         MV      RA, B31                         ; B31 is not modified
842         ADDKPC  ret?,RA,2
843 ret?:                                           ; B0 holds rounds or zero
844   [!B0] BNOP    B31                             ; return if zero
845    [B0] SHL     B0,4,A0                         ; offset to last round key
846    [B0] SHRU    B0,1,B1
847    [B0] SUB     B1,1,B1
848    [B0] MVK     0x0000001B,B3                   ; AES polynomial
849    [B0] MVKH    0x07000000,B3
850
851         SPLOOPD 9                               ; flip round keys
852 ||      MVC     B1,ILC
853 ||      MV      B30,$KPA
854 ||      ADD     B30,A0,$KPB
855 ||      MVK     16,A0                           ; sizeof(round key)
856 ;;====================================================================
857         LDW     *${KPA}[0],A16
858 ||      LDW     *${KPB}[0],B16
859         LDW     *${KPA}[1],A17
860 ||      LDW     *${KPB}[1],B17
861         LDW     *${KPA}[2],A18
862 ||      LDW     *${KPB}[2],B18
863         LDW     *${KPA}[3],A19
864 ||      ADD     $KPA,A0,$KPA
865 ||      LDW     *${KPB}[3],B19
866 ||      SUB     $KPB,A0,$KPB
867         NOP
868         STW     B16,*${KPA}[-4]
869 ||      STW     A16,*${KPB}[4]
870         STW     B17,*${KPA}[-3]
871 ||      STW     A17,*${KPB}[5]
872         STW     B18,*${KPA}[-2]
873 ||      STW     A18,*${KPB}[6]
874         STW     B19,*${KPA}[-1]
875 ||      STW     A19,*${KPB}[7]
876         SPKERNEL
877 ;;====================================================================
878         SUB     B0,1,B0                         ; skip last round
879 ||      ADD     B30,A0,$KPA                     ; skip first round
880 ||      ADD     B30,A0,$KPB
881 ||      MVC     GFPGFR,B30                      ; save GFPGFR
882         LDW     *${KPA}[0],$K[0]
883 ||      LDW     *${KPB}[1],$K[1]
884 ||      MVC     B3,GFPGFR
885         LDW     *${KPA}[2],$K[2]
886 ||      LDW     *${KPB}[3],$K[3]
887         MVK     0x00000909,A24
888 ||      MVK     0x00000B0B,B24
889         MVKH    0x09090000,A24
890 ||      MVKH    0x0B0B0000,B24
891         MVC     B0,ILC
892 ||      SUB     B0,1,B0
893
894         GMPY4   $K[0],A24,$Kx9[0]               ; Â·0x09
895 ||      GMPY4   $K[1],A24,$Kx9[1]
896 ||      MVK     0x00000D0D,A25
897 ||      MVK     0x00000E0E,B25
898         GMPY4   $K[2],A24,$Kx9[2]
899 ||      GMPY4   $K[3],A24,$Kx9[3]
900 ||      MVKH    0x0D0D0000,A25
901 ||      MVKH    0x0E0E0000,B25
902
903         GMPY4   $K[0],B24,$KxB[0]               ; Â·0x0B
904 ||      GMPY4   $K[1],B24,$KxB[1]
905         GMPY4   $K[2],B24,$KxB[2]
906 ||      GMPY4   $K[3],B24,$KxB[3]
907
908         SPLOOP  11                              ; InvMixColumns
909 ;;====================================================================
910         GMPY4   $K[0],A25,$KxD[0]               ; Â·0x0D
911 ||      GMPY4   $K[1],A25,$KxD[1]
912 ||      SWAP2   $Kx9[0],$Kx9[0]                 ; rotate by 16
913 ||      SWAP2   $Kx9[1],$Kx9[1]
914 ||      MV      $K[0],$s[0]                     ; this or DINT
915 ||      MV      $K[1],$s[1]
916 || [B0] LDW     *${KPA}[4],$K[0]
917 || [B0] LDW     *${KPB}[5],$K[1]
918         GMPY4   $K[2],A25,$KxD[2]
919 ||      GMPY4   $K[3],A25,$KxD[3]
920 ||      SWAP2   $Kx9[2],$Kx9[2]
921 ||      SWAP2   $Kx9[3],$Kx9[3]
922 ||      MV      $K[2],$s[2]
923 ||      MV      $K[3],$s[3]
924 || [B0] LDW     *${KPA}[6],$K[2]
925 || [B0] LDW     *${KPB}[7],$K[3]
926
927         GMPY4   $s[0],B25,$KxE[0]               ; Â·0x0E
928 ||      GMPY4   $s[1],B25,$KxE[1]
929 ||      XOR     $Kx9[0],$KxB[0],$KxB[0]
930 ||      XOR     $Kx9[1],$KxB[1],$KxB[1]
931         GMPY4   $s[2],B25,$KxE[2]
932 ||      GMPY4   $s[3],B25,$KxE[3]
933 ||      XOR     $Kx9[2],$KxB[2],$KxB[2]
934 ||      XOR     $Kx9[3],$KxB[3],$KxB[3]
935
936         ROTL    $KxB[0],TBL3,$KxB[0]
937 ||      ROTL    $KxB[1],TBL3,$KxB[1]
938 ||      SWAP2   $KxD[0],$KxD[0]                 ; rotate by 16
939 ||      SWAP2   $KxD[1],$KxD[1]
940         ROTL    $KxB[2],TBL3,$KxB[2]
941 ||      ROTL    $KxB[3],TBL3,$KxB[3]
942 ||      SWAP2   $KxD[2],$KxD[2]
943 ||      SWAP2   $KxD[3],$KxD[3]
944
945         XOR     $KxE[0],$KxD[0],$KxE[0]
946 ||      XOR     $KxE[1],$KxD[1],$KxE[1]
947 || [B0] GMPY4   $K[0],A24,$Kx9[0]               ; Â·0x09
948 || [B0] GMPY4   $K[1],A24,$Kx9[1]
949 ||      ADDAW   $KPA,4,$KPA
950         XOR     $KxE[2],$KxD[2],$KxE[2]
951 ||      XOR     $KxE[3],$KxD[3],$KxE[3]
952 || [B0] GMPY4   $K[2],A24,$Kx9[2]
953 || [B0] GMPY4   $K[3],A24,$Kx9[3]
954 ||      ADDAW   $KPB,4,$KPB
955
956         XOR     $KxB[0],$KxE[0],$KxE[0]
957 ||      XOR     $KxB[1],$KxE[1],$KxE[1]
958 || [B0] GMPY4   $K[0],B24,$KxB[0]               ; Â·0x0B
959 || [B0] GMPY4   $K[1],B24,$KxB[1]
960         XOR     $KxB[2],$KxE[2],$KxE[2]
961 ||      XOR     $KxB[3],$KxE[3],$KxE[3]
962 || [B0] GMPY4   $K[2],B24,$KxB[2]
963 || [B0] GMPY4   $K[3],B24,$KxB[3]
964 ||      STW     $KxE[0],*${KPA}[-4]
965 ||      STW     $KxE[1],*${KPB}[-3]
966         STW     $KxE[2],*${KPA}[-2]
967 ||      STW     $KxE[3],*${KPB}[-1]
968 || [B0] SUB     B0,1,B0
969         SPKERNEL
970 ;;====================================================================
971         BNOP    B31,3
972         MVC     B30,GFPGFR                      ; restore GFPGFR(*)
973         MVK     0,RET
974         .endasmfunc
975 ___
976 # (*)   Even though ABI doesn't specify GFPGFR as non-volatile, there
977 #       are code samples out there that *assume* its default value.
978 }
979 {
980 my ($inp,$out,$blocks,$key,$ivp)=("A4","B4","A6","B6","A8");
981 $code.=<<___;
982         .global _AES_ctr32_encrypt
983 _AES_ctr32_encrypt:
984         .asmfunc
985         LDNDW   *${ivp}[0],A31:A30      ; load counter value
986 ||      MV      $blocks,A2              ; reassign $blocks
987 ||      DMV     RA,$key,B27:B26         ; reassign RA and $key
988         LDNDW   *${ivp}[1],B31:B30
989 ||      MVK     0,B2                    ; don't let __encrypt load input
990 ||      MVK     0,A1                    ; and postpone writing output
991         .if     .BIG_ENDIAN
992         NOP
993         .else
994         NOP     4
995         SWAP2   B31,B31                 ; keep least significant 32 bits
996         SWAP4   B31,B31                 ; in host byte order
997         .endif
998 ctr32_loop?:
999    [A2] BNOP    __encrypt
1000 || [A1] XOR     A29,A9,A9               ; input^Ek(counter)
1001 || [A1] XOR     A28,A8,A8
1002 || [A2] LDNDW   *INP++,A29:A28          ; load input
1003   [!A2] BNOP    B27                     ; return
1004 || [A1] XOR     B29,B9,B9
1005 || [A1] XOR     B28,B8,B8
1006 || [A2] LDNDW   *INP++,B29:B28
1007         .if     .BIG_ENDIAN
1008    [A1] STNDW   A9:A8,*OUT++            ; save output
1009 || [A2] DMV     A31,A30,A9:A8           ; pass counter value to __encrypt
1010    [A1] STNDW   B9:B8,*OUT++
1011 || [A2] DMV     B31,B30,B9:B8
1012 || [A2] ADD     B30,1,B30               ; counter++
1013         .else
1014    [A1] STNDW   A9:A8,*OUT++            ; save output
1015 || [A2] DMV     A31,A30,A9:A8
1016 || [A2] SWAP2   B31,B0
1017 || [A2] ADD     B31,1,B31               ; counter++
1018    [A1] STNDW   B9:B8,*OUT++
1019 || [A2] MV      B30,B8
1020 || [A2] SWAP4   B0,B9
1021         .endif
1022    [A2] ADDKPC  ctr32_loop?,RA          ; return to ctr32_loop?
1023 || [A2] MV      B26,KEY                 ; pass $key
1024 || [A2] SUB     A2,1,A2                 ; $blocks--
1025 ||[!A1] MVK     1,A1
1026         NOP
1027         NOP
1028         .endasmfunc
1029 ___
1030 }
1031 # Tables are kept in endian-neutral manner
1032 $code.=<<___;
1033         .if     __TI_EABI__
1034         .sect   ".text:aes_asm.const"
1035         .else
1036         .sect   ".const:aes_asm"
1037         .endif
1038         .align  128
1039 AES_Te:
1040         .byte   0xc6,0x63,0x63,0xa5,    0xf8,0x7c,0x7c,0x84
1041         .byte   0xee,0x77,0x77,0x99,    0xf6,0x7b,0x7b,0x8d
1042         .byte   0xff,0xf2,0xf2,0x0d,    0xd6,0x6b,0x6b,0xbd
1043         .byte   0xde,0x6f,0x6f,0xb1,    0x91,0xc5,0xc5,0x54
1044         .byte   0x60,0x30,0x30,0x50,    0x02,0x01,0x01,0x03
1045         .byte   0xce,0x67,0x67,0xa9,    0x56,0x2b,0x2b,0x7d
1046         .byte   0xe7,0xfe,0xfe,0x19,    0xb5,0xd7,0xd7,0x62
1047         .byte   0x4d,0xab,0xab,0xe6,    0xec,0x76,0x76,0x9a
1048         .byte   0x8f,0xca,0xca,0x45,    0x1f,0x82,0x82,0x9d
1049         .byte   0x89,0xc9,0xc9,0x40,    0xfa,0x7d,0x7d,0x87
1050         .byte   0xef,0xfa,0xfa,0x15,    0xb2,0x59,0x59,0xeb
1051         .byte   0x8e,0x47,0x47,0xc9,    0xfb,0xf0,0xf0,0x0b
1052         .byte   0x41,0xad,0xad,0xec,    0xb3,0xd4,0xd4,0x67
1053         .byte   0x5f,0xa2,0xa2,0xfd,    0x45,0xaf,0xaf,0xea
1054         .byte   0x23,0x9c,0x9c,0xbf,    0x53,0xa4,0xa4,0xf7
1055         .byte   0xe4,0x72,0x72,0x96,    0x9b,0xc0,0xc0,0x5b
1056         .byte   0x75,0xb7,0xb7,0xc2,    0xe1,0xfd,0xfd,0x1c
1057         .byte   0x3d,0x93,0x93,0xae,    0x4c,0x26,0x26,0x6a
1058         .byte   0x6c,0x36,0x36,0x5a,    0x7e,0x3f,0x3f,0x41
1059         .byte   0xf5,0xf7,0xf7,0x02,    0x83,0xcc,0xcc,0x4f
1060         .byte   0x68,0x34,0x34,0x5c,    0x51,0xa5,0xa5,0xf4
1061         .byte   0xd1,0xe5,0xe5,0x34,    0xf9,0xf1,0xf1,0x08
1062         .byte   0xe2,0x71,0x71,0x93,    0xab,0xd8,0xd8,0x73
1063         .byte   0x62,0x31,0x31,0x53,    0x2a,0x15,0x15,0x3f
1064         .byte   0x08,0x04,0x04,0x0c,    0x95,0xc7,0xc7,0x52
1065         .byte   0x46,0x23,0x23,0x65,    0x9d,0xc3,0xc3,0x5e
1066         .byte   0x30,0x18,0x18,0x28,    0x37,0x96,0x96,0xa1
1067         .byte   0x0a,0x05,0x05,0x0f,    0x2f,0x9a,0x9a,0xb5
1068         .byte   0x0e,0x07,0x07,0x09,    0x24,0x12,0x12,0x36
1069         .byte   0x1b,0x80,0x80,0x9b,    0xdf,0xe2,0xe2,0x3d
1070         .byte   0xcd,0xeb,0xeb,0x26,    0x4e,0x27,0x27,0x69
1071         .byte   0x7f,0xb2,0xb2,0xcd,    0xea,0x75,0x75,0x9f
1072         .byte   0x12,0x09,0x09,0x1b,    0x1d,0x83,0x83,0x9e
1073         .byte   0x58,0x2c,0x2c,0x74,    0x34,0x1a,0x1a,0x2e
1074         .byte   0x36,0x1b,0x1b,0x2d,    0xdc,0x6e,0x6e,0xb2
1075         .byte   0xb4,0x5a,0x5a,0xee,    0x5b,0xa0,0xa0,0xfb
1076         .byte   0xa4,0x52,0x52,0xf6,    0x76,0x3b,0x3b,0x4d
1077         .byte   0xb7,0xd6,0xd6,0x61,    0x7d,0xb3,0xb3,0xce
1078         .byte   0x52,0x29,0x29,0x7b,    0xdd,0xe3,0xe3,0x3e
1079         .byte   0x5e,0x2f,0x2f,0x71,    0x13,0x84,0x84,0x97
1080         .byte   0xa6,0x53,0x53,0xf5,    0xb9,0xd1,0xd1,0x68
1081         .byte   0x00,0x00,0x00,0x00,    0xc1,0xed,0xed,0x2c
1082         .byte   0x40,0x20,0x20,0x60,    0xe3,0xfc,0xfc,0x1f
1083         .byte   0x79,0xb1,0xb1,0xc8,    0xb6,0x5b,0x5b,0xed
1084         .byte   0xd4,0x6a,0x6a,0xbe,    0x8d,0xcb,0xcb,0x46
1085         .byte   0x67,0xbe,0xbe,0xd9,    0x72,0x39,0x39,0x4b
1086         .byte   0x94,0x4a,0x4a,0xde,    0x98,0x4c,0x4c,0xd4
1087         .byte   0xb0,0x58,0x58,0xe8,    0x85,0xcf,0xcf,0x4a
1088         .byte   0xbb,0xd0,0xd0,0x6b,    0xc5,0xef,0xef,0x2a
1089         .byte   0x4f,0xaa,0xaa,0xe5,    0xed,0xfb,0xfb,0x16
1090         .byte   0x86,0x43,0x43,0xc5,    0x9a,0x4d,0x4d,0xd7
1091         .byte   0x66,0x33,0x33,0x55,    0x11,0x85,0x85,0x94
1092         .byte   0x8a,0x45,0x45,0xcf,    0xe9,0xf9,0xf9,0x10
1093         .byte   0x04,0x02,0x02,0x06,    0xfe,0x7f,0x7f,0x81
1094         .byte   0xa0,0x50,0x50,0xf0,    0x78,0x3c,0x3c,0x44
1095         .byte   0x25,0x9f,0x9f,0xba,    0x4b,0xa8,0xa8,0xe3
1096         .byte   0xa2,0x51,0x51,0xf3,    0x5d,0xa3,0xa3,0xfe
1097         .byte   0x80,0x40,0x40,0xc0,    0x05,0x8f,0x8f,0x8a
1098         .byte   0x3f,0x92,0x92,0xad,    0x21,0x9d,0x9d,0xbc
1099         .byte   0x70,0x38,0x38,0x48,    0xf1,0xf5,0xf5,0x04
1100         .byte   0x63,0xbc,0xbc,0xdf,    0x77,0xb6,0xb6,0xc1
1101         .byte   0xaf,0xda,0xda,0x75,    0x42,0x21,0x21,0x63
1102         .byte   0x20,0x10,0x10,0x30,    0xe5,0xff,0xff,0x1a
1103         .byte   0xfd,0xf3,0xf3,0x0e,    0xbf,0xd2,0xd2,0x6d
1104         .byte   0x81,0xcd,0xcd,0x4c,    0x18,0x0c,0x0c,0x14
1105         .byte   0x26,0x13,0x13,0x35,    0xc3,0xec,0xec,0x2f
1106         .byte   0xbe,0x5f,0x5f,0xe1,    0x35,0x97,0x97,0xa2
1107         .byte   0x88,0x44,0x44,0xcc,    0x2e,0x17,0x17,0x39
1108         .byte   0x93,0xc4,0xc4,0x57,    0x55,0xa7,0xa7,0xf2
1109         .byte   0xfc,0x7e,0x7e,0x82,    0x7a,0x3d,0x3d,0x47
1110         .byte   0xc8,0x64,0x64,0xac,    0xba,0x5d,0x5d,0xe7
1111         .byte   0x32,0x19,0x19,0x2b,    0xe6,0x73,0x73,0x95
1112         .byte   0xc0,0x60,0x60,0xa0,    0x19,0x81,0x81,0x98
1113         .byte   0x9e,0x4f,0x4f,0xd1,    0xa3,0xdc,0xdc,0x7f
1114         .byte   0x44,0x22,0x22,0x66,    0x54,0x2a,0x2a,0x7e
1115         .byte   0x3b,0x90,0x90,0xab,    0x0b,0x88,0x88,0x83
1116         .byte   0x8c,0x46,0x46,0xca,    0xc7,0xee,0xee,0x29
1117         .byte   0x6b,0xb8,0xb8,0xd3,    0x28,0x14,0x14,0x3c
1118         .byte   0xa7,0xde,0xde,0x79,    0xbc,0x5e,0x5e,0xe2
1119         .byte   0x16,0x0b,0x0b,0x1d,    0xad,0xdb,0xdb,0x76
1120         .byte   0xdb,0xe0,0xe0,0x3b,    0x64,0x32,0x32,0x56
1121         .byte   0x74,0x3a,0x3a,0x4e,    0x14,0x0a,0x0a,0x1e
1122         .byte   0x92,0x49,0x49,0xdb,    0x0c,0x06,0x06,0x0a
1123         .byte   0x48,0x24,0x24,0x6c,    0xb8,0x5c,0x5c,0xe4
1124         .byte   0x9f,0xc2,0xc2,0x5d,    0xbd,0xd3,0xd3,0x6e
1125         .byte   0x43,0xac,0xac,0xef,    0xc4,0x62,0x62,0xa6
1126         .byte   0x39,0x91,0x91,0xa8,    0x31,0x95,0x95,0xa4
1127         .byte   0xd3,0xe4,0xe4,0x37,    0xf2,0x79,0x79,0x8b
1128         .byte   0xd5,0xe7,0xe7,0x32,    0x8b,0xc8,0xc8,0x43
1129         .byte   0x6e,0x37,0x37,0x59,    0xda,0x6d,0x6d,0xb7
1130         .byte   0x01,0x8d,0x8d,0x8c,    0xb1,0xd5,0xd5,0x64
1131         .byte   0x9c,0x4e,0x4e,0xd2,    0x49,0xa9,0xa9,0xe0
1132         .byte   0xd8,0x6c,0x6c,0xb4,    0xac,0x56,0x56,0xfa
1133         .byte   0xf3,0xf4,0xf4,0x07,    0xcf,0xea,0xea,0x25
1134         .byte   0xca,0x65,0x65,0xaf,    0xf4,0x7a,0x7a,0x8e
1135         .byte   0x47,0xae,0xae,0xe9,    0x10,0x08,0x08,0x18
1136         .byte   0x6f,0xba,0xba,0xd5,    0xf0,0x78,0x78,0x88
1137         .byte   0x4a,0x25,0x25,0x6f,    0x5c,0x2e,0x2e,0x72
1138         .byte   0x38,0x1c,0x1c,0x24,    0x57,0xa6,0xa6,0xf1
1139         .byte   0x73,0xb4,0xb4,0xc7,    0x97,0xc6,0xc6,0x51
1140         .byte   0xcb,0xe8,0xe8,0x23,    0xa1,0xdd,0xdd,0x7c
1141         .byte   0xe8,0x74,0x74,0x9c,    0x3e,0x1f,0x1f,0x21
1142         .byte   0x96,0x4b,0x4b,0xdd,    0x61,0xbd,0xbd,0xdc
1143         .byte   0x0d,0x8b,0x8b,0x86,    0x0f,0x8a,0x8a,0x85
1144         .byte   0xe0,0x70,0x70,0x90,    0x7c,0x3e,0x3e,0x42
1145         .byte   0x71,0xb5,0xb5,0xc4,    0xcc,0x66,0x66,0xaa
1146         .byte   0x90,0x48,0x48,0xd8,    0x06,0x03,0x03,0x05
1147         .byte   0xf7,0xf6,0xf6,0x01,    0x1c,0x0e,0x0e,0x12
1148         .byte   0xc2,0x61,0x61,0xa3,    0x6a,0x35,0x35,0x5f
1149         .byte   0xae,0x57,0x57,0xf9,    0x69,0xb9,0xb9,0xd0
1150         .byte   0x17,0x86,0x86,0x91,    0x99,0xc1,0xc1,0x58
1151         .byte   0x3a,0x1d,0x1d,0x27,    0x27,0x9e,0x9e,0xb9
1152         .byte   0xd9,0xe1,0xe1,0x38,    0xeb,0xf8,0xf8,0x13
1153         .byte   0x2b,0x98,0x98,0xb3,    0x22,0x11,0x11,0x33
1154         .byte   0xd2,0x69,0x69,0xbb,    0xa9,0xd9,0xd9,0x70
1155         .byte   0x07,0x8e,0x8e,0x89,    0x33,0x94,0x94,0xa7
1156         .byte   0x2d,0x9b,0x9b,0xb6,    0x3c,0x1e,0x1e,0x22
1157         .byte   0x15,0x87,0x87,0x92,    0xc9,0xe9,0xe9,0x20
1158         .byte   0x87,0xce,0xce,0x49,    0xaa,0x55,0x55,0xff
1159         .byte   0x50,0x28,0x28,0x78,    0xa5,0xdf,0xdf,0x7a
1160         .byte   0x03,0x8c,0x8c,0x8f,    0x59,0xa1,0xa1,0xf8
1161         .byte   0x09,0x89,0x89,0x80,    0x1a,0x0d,0x0d,0x17
1162         .byte   0x65,0xbf,0xbf,0xda,    0xd7,0xe6,0xe6,0x31
1163         .byte   0x84,0x42,0x42,0xc6,    0xd0,0x68,0x68,0xb8
1164         .byte   0x82,0x41,0x41,0xc3,    0x29,0x99,0x99,0xb0
1165         .byte   0x5a,0x2d,0x2d,0x77,    0x1e,0x0f,0x0f,0x11
1166         .byte   0x7b,0xb0,0xb0,0xcb,    0xa8,0x54,0x54,0xfc
1167         .byte   0x6d,0xbb,0xbb,0xd6,    0x2c,0x16,0x16,0x3a
1168 AES_Te4:
1169         .byte   0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
1170         .byte   0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
1171         .byte   0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
1172         .byte   0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
1173         .byte   0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
1174         .byte   0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
1175         .byte   0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
1176         .byte   0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
1177         .byte   0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
1178         .byte   0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
1179         .byte   0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
1180         .byte   0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
1181         .byte   0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
1182         .byte   0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
1183         .byte   0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
1184         .byte   0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
1185         .byte   0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
1186         .byte   0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
1187         .byte   0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
1188         .byte   0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
1189         .byte   0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
1190         .byte   0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
1191         .byte   0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
1192         .byte   0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
1193         .byte   0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
1194         .byte   0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
1195         .byte   0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
1196         .byte   0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
1197         .byte   0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
1198         .byte   0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
1199         .byte   0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
1200         .byte   0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
1201 rcon:
1202         .byte   0x01,0x00,0x00,0x00,    0x02,0x00,0x00,0x00
1203         .byte   0x04,0x00,0x00,0x00,    0x08,0x00,0x00,0x00
1204         .byte   0x10,0x00,0x00,0x00,    0x20,0x00,0x00,0x00
1205         .byte   0x40,0x00,0x00,0x00,    0x80,0x00,0x00,0x00
1206         .byte   0x1B,0x00,0x00,0x00,    0x36,0x00,0x00,0x00
1207         .align  128
1208 AES_Td:
1209         .byte   0x51,0xf4,0xa7,0x50,    0x7e,0x41,0x65,0x53
1210         .byte   0x1a,0x17,0xa4,0xc3,    0x3a,0x27,0x5e,0x96
1211         .byte   0x3b,0xab,0x6b,0xcb,    0x1f,0x9d,0x45,0xf1
1212         .byte   0xac,0xfa,0x58,0xab,    0x4b,0xe3,0x03,0x93
1213         .byte   0x20,0x30,0xfa,0x55,    0xad,0x76,0x6d,0xf6
1214         .byte   0x88,0xcc,0x76,0x91,    0xf5,0x02,0x4c,0x25
1215         .byte   0x4f,0xe5,0xd7,0xfc,    0xc5,0x2a,0xcb,0xd7
1216         .byte   0x26,0x35,0x44,0x80,    0xb5,0x62,0xa3,0x8f
1217         .byte   0xde,0xb1,0x5a,0x49,    0x25,0xba,0x1b,0x67
1218         .byte   0x45,0xea,0x0e,0x98,    0x5d,0xfe,0xc0,0xe1
1219         .byte   0xc3,0x2f,0x75,0x02,    0x81,0x4c,0xf0,0x12
1220         .byte   0x8d,0x46,0x97,0xa3,    0x6b,0xd3,0xf9,0xc6
1221         .byte   0x03,0x8f,0x5f,0xe7,    0x15,0x92,0x9c,0x95
1222         .byte   0xbf,0x6d,0x7a,0xeb,    0x95,0x52,0x59,0xda
1223         .byte   0xd4,0xbe,0x83,0x2d,    0x58,0x74,0x21,0xd3
1224         .byte   0x49,0xe0,0x69,0x29,    0x8e,0xc9,0xc8,0x44
1225         .byte   0x75,0xc2,0x89,0x6a,    0xf4,0x8e,0x79,0x78
1226         .byte   0x99,0x58,0x3e,0x6b,    0x27,0xb9,0x71,0xdd
1227         .byte   0xbe,0xe1,0x4f,0xb6,    0xf0,0x88,0xad,0x17
1228         .byte   0xc9,0x20,0xac,0x66,    0x7d,0xce,0x3a,0xb4
1229         .byte   0x63,0xdf,0x4a,0x18,    0xe5,0x1a,0x31,0x82
1230         .byte   0x97,0x51,0x33,0x60,    0x62,0x53,0x7f,0x45
1231         .byte   0xb1,0x64,0x77,0xe0,    0xbb,0x6b,0xae,0x84
1232         .byte   0xfe,0x81,0xa0,0x1c,    0xf9,0x08,0x2b,0x94
1233         .byte   0x70,0x48,0x68,0x58,    0x8f,0x45,0xfd,0x19
1234         .byte   0x94,0xde,0x6c,0x87,    0x52,0x7b,0xf8,0xb7
1235         .byte   0xab,0x73,0xd3,0x23,    0x72,0x4b,0x02,0xe2
1236         .byte   0xe3,0x1f,0x8f,0x57,    0x66,0x55,0xab,0x2a
1237         .byte   0xb2,0xeb,0x28,0x07,    0x2f,0xb5,0xc2,0x03
1238         .byte   0x86,0xc5,0x7b,0x9a,    0xd3,0x37,0x08,0xa5
1239         .byte   0x30,0x28,0x87,0xf2,    0x23,0xbf,0xa5,0xb2
1240         .byte   0x02,0x03,0x6a,0xba,    0xed,0x16,0x82,0x5c
1241         .byte   0x8a,0xcf,0x1c,0x2b,    0xa7,0x79,0xb4,0x92
1242         .byte   0xf3,0x07,0xf2,0xf0,    0x4e,0x69,0xe2,0xa1
1243         .byte   0x65,0xda,0xf4,0xcd,    0x06,0x05,0xbe,0xd5
1244         .byte   0xd1,0x34,0x62,0x1f,    0xc4,0xa6,0xfe,0x8a
1245         .byte   0x34,0x2e,0x53,0x9d,    0xa2,0xf3,0x55,0xa0
1246         .byte   0x05,0x8a,0xe1,0x32,    0xa4,0xf6,0xeb,0x75
1247         .byte   0x0b,0x83,0xec,0x39,    0x40,0x60,0xef,0xaa
1248         .byte   0x5e,0x71,0x9f,0x06,    0xbd,0x6e,0x10,0x51
1249         .byte   0x3e,0x21,0x8a,0xf9,    0x96,0xdd,0x06,0x3d
1250         .byte   0xdd,0x3e,0x05,0xae,    0x4d,0xe6,0xbd,0x46
1251         .byte   0x91,0x54,0x8d,0xb5,    0x71,0xc4,0x5d,0x05
1252         .byte   0x04,0x06,0xd4,0x6f,    0x60,0x50,0x15,0xff
1253         .byte   0x19,0x98,0xfb,0x24,    0xd6,0xbd,0xe9,0x97
1254         .byte   0x89,0x40,0x43,0xcc,    0x67,0xd9,0x9e,0x77
1255         .byte   0xb0,0xe8,0x42,0xbd,    0x07,0x89,0x8b,0x88
1256         .byte   0xe7,0x19,0x5b,0x38,    0x79,0xc8,0xee,0xdb
1257         .byte   0xa1,0x7c,0x0a,0x47,    0x7c,0x42,0x0f,0xe9
1258         .byte   0xf8,0x84,0x1e,0xc9,    0x00,0x00,0x00,0x00
1259         .byte   0x09,0x80,0x86,0x83,    0x32,0x2b,0xed,0x48
1260         .byte   0x1e,0x11,0x70,0xac,    0x6c,0x5a,0x72,0x4e
1261         .byte   0xfd,0x0e,0xff,0xfb,    0x0f,0x85,0x38,0x56
1262         .byte   0x3d,0xae,0xd5,0x1e,    0x36,0x2d,0x39,0x27
1263         .byte   0x0a,0x0f,0xd9,0x64,    0x68,0x5c,0xa6,0x21
1264         .byte   0x9b,0x5b,0x54,0xd1,    0x24,0x36,0x2e,0x3a
1265         .byte   0x0c,0x0a,0x67,0xb1,    0x93,0x57,0xe7,0x0f
1266         .byte   0xb4,0xee,0x96,0xd2,    0x1b,0x9b,0x91,0x9e
1267         .byte   0x80,0xc0,0xc5,0x4f,    0x61,0xdc,0x20,0xa2
1268         .byte   0x5a,0x77,0x4b,0x69,    0x1c,0x12,0x1a,0x16
1269         .byte   0xe2,0x93,0xba,0x0a,    0xc0,0xa0,0x2a,0xe5
1270         .byte   0x3c,0x22,0xe0,0x43,    0x12,0x1b,0x17,0x1d
1271         .byte   0x0e,0x09,0x0d,0x0b,    0xf2,0x8b,0xc7,0xad
1272         .byte   0x2d,0xb6,0xa8,0xb9,    0x14,0x1e,0xa9,0xc8
1273         .byte   0x57,0xf1,0x19,0x85,    0xaf,0x75,0x07,0x4c
1274         .byte   0xee,0x99,0xdd,0xbb,    0xa3,0x7f,0x60,0xfd
1275         .byte   0xf7,0x01,0x26,0x9f,    0x5c,0x72,0xf5,0xbc
1276         .byte   0x44,0x66,0x3b,0xc5,    0x5b,0xfb,0x7e,0x34
1277         .byte   0x8b,0x43,0x29,0x76,    0xcb,0x23,0xc6,0xdc
1278         .byte   0xb6,0xed,0xfc,0x68,    0xb8,0xe4,0xf1,0x63
1279         .byte   0xd7,0x31,0xdc,0xca,    0x42,0x63,0x85,0x10
1280         .byte   0x13,0x97,0x22,0x40,    0x84,0xc6,0x11,0x20
1281         .byte   0x85,0x4a,0x24,0x7d,    0xd2,0xbb,0x3d,0xf8
1282         .byte   0xae,0xf9,0x32,0x11,    0xc7,0x29,0xa1,0x6d
1283         .byte   0x1d,0x9e,0x2f,0x4b,    0xdc,0xb2,0x30,0xf3
1284         .byte   0x0d,0x86,0x52,0xec,    0x77,0xc1,0xe3,0xd0
1285         .byte   0x2b,0xb3,0x16,0x6c,    0xa9,0x70,0xb9,0x99
1286         .byte   0x11,0x94,0x48,0xfa,    0x47,0xe9,0x64,0x22
1287         .byte   0xa8,0xfc,0x8c,0xc4,    0xa0,0xf0,0x3f,0x1a
1288         .byte   0x56,0x7d,0x2c,0xd8,    0x22,0x33,0x90,0xef
1289         .byte   0x87,0x49,0x4e,0xc7,    0xd9,0x38,0xd1,0xc1
1290         .byte   0x8c,0xca,0xa2,0xfe,    0x98,0xd4,0x0b,0x36
1291         .byte   0xa6,0xf5,0x81,0xcf,    0xa5,0x7a,0xde,0x28
1292         .byte   0xda,0xb7,0x8e,0x26,    0x3f,0xad,0xbf,0xa4
1293         .byte   0x2c,0x3a,0x9d,0xe4,    0x50,0x78,0x92,0x0d
1294         .byte   0x6a,0x5f,0xcc,0x9b,    0x54,0x7e,0x46,0x62
1295         .byte   0xf6,0x8d,0x13,0xc2,    0x90,0xd8,0xb8,0xe8
1296         .byte   0x2e,0x39,0xf7,0x5e,    0x82,0xc3,0xaf,0xf5
1297         .byte   0x9f,0x5d,0x80,0xbe,    0x69,0xd0,0x93,0x7c
1298         .byte   0x6f,0xd5,0x2d,0xa9,    0xcf,0x25,0x12,0xb3
1299         .byte   0xc8,0xac,0x99,0x3b,    0x10,0x18,0x7d,0xa7
1300         .byte   0xe8,0x9c,0x63,0x6e,    0xdb,0x3b,0xbb,0x7b
1301         .byte   0xcd,0x26,0x78,0x09,    0x6e,0x59,0x18,0xf4
1302         .byte   0xec,0x9a,0xb7,0x01,    0x83,0x4f,0x9a,0xa8
1303         .byte   0xe6,0x95,0x6e,0x65,    0xaa,0xff,0xe6,0x7e
1304         .byte   0x21,0xbc,0xcf,0x08,    0xef,0x15,0xe8,0xe6
1305         .byte   0xba,0xe7,0x9b,0xd9,    0x4a,0x6f,0x36,0xce
1306         .byte   0xea,0x9f,0x09,0xd4,    0x29,0xb0,0x7c,0xd6
1307         .byte   0x31,0xa4,0xb2,0xaf,    0x2a,0x3f,0x23,0x31
1308         .byte   0xc6,0xa5,0x94,0x30,    0x35,0xa2,0x66,0xc0
1309         .byte   0x74,0x4e,0xbc,0x37,    0xfc,0x82,0xca,0xa6
1310         .byte   0xe0,0x90,0xd0,0xb0,    0x33,0xa7,0xd8,0x15
1311         .byte   0xf1,0x04,0x98,0x4a,    0x41,0xec,0xda,0xf7
1312         .byte   0x7f,0xcd,0x50,0x0e,    0x17,0x91,0xf6,0x2f
1313         .byte   0x76,0x4d,0xd6,0x8d,    0x43,0xef,0xb0,0x4d
1314         .byte   0xcc,0xaa,0x4d,0x54,    0xe4,0x96,0x04,0xdf
1315         .byte   0x9e,0xd1,0xb5,0xe3,    0x4c,0x6a,0x88,0x1b
1316         .byte   0xc1,0x2c,0x1f,0xb8,    0x46,0x65,0x51,0x7f
1317         .byte   0x9d,0x5e,0xea,0x04,    0x01,0x8c,0x35,0x5d
1318         .byte   0xfa,0x87,0x74,0x73,    0xfb,0x0b,0x41,0x2e
1319         .byte   0xb3,0x67,0x1d,0x5a,    0x92,0xdb,0xd2,0x52
1320         .byte   0xe9,0x10,0x56,0x33,    0x6d,0xd6,0x47,0x13
1321         .byte   0x9a,0xd7,0x61,0x8c,    0x37,0xa1,0x0c,0x7a
1322         .byte   0x59,0xf8,0x14,0x8e,    0xeb,0x13,0x3c,0x89
1323         .byte   0xce,0xa9,0x27,0xee,    0xb7,0x61,0xc9,0x35
1324         .byte   0xe1,0x1c,0xe5,0xed,    0x7a,0x47,0xb1,0x3c
1325         .byte   0x9c,0xd2,0xdf,0x59,    0x55,0xf2,0x73,0x3f
1326         .byte   0x18,0x14,0xce,0x79,    0x73,0xc7,0x37,0xbf
1327         .byte   0x53,0xf7,0xcd,0xea,    0x5f,0xfd,0xaa,0x5b
1328         .byte   0xdf,0x3d,0x6f,0x14,    0x78,0x44,0xdb,0x86
1329         .byte   0xca,0xaf,0xf3,0x81,    0xb9,0x68,0xc4,0x3e
1330         .byte   0x38,0x24,0x34,0x2c,    0xc2,0xa3,0x40,0x5f
1331         .byte   0x16,0x1d,0xc3,0x72,    0xbc,0xe2,0x25,0x0c
1332         .byte   0x28,0x3c,0x49,0x8b,    0xff,0x0d,0x95,0x41
1333         .byte   0x39,0xa8,0x01,0x71,    0x08,0x0c,0xb3,0xde
1334         .byte   0xd8,0xb4,0xe4,0x9c,    0x64,0x56,0xc1,0x90
1335         .byte   0x7b,0xcb,0x84,0x61,    0xd5,0x32,0xb6,0x70
1336         .byte   0x48,0x6c,0x5c,0x74,    0xd0,0xb8,0x57,0x42
1337 AES_Td4:
1338         .byte   0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
1339         .byte   0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
1340         .byte   0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
1341         .byte   0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
1342         .byte   0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
1343         .byte   0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
1344         .byte   0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
1345         .byte   0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
1346         .byte   0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
1347         .byte   0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
1348         .byte   0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
1349         .byte   0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
1350         .byte   0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
1351         .byte   0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
1352         .byte   0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
1353         .byte   0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
1354         .byte   0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
1355         .byte   0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
1356         .byte   0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
1357         .byte   0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
1358         .byte   0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
1359         .byte   0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
1360         .byte   0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
1361         .byte   0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
1362         .byte   0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
1363         .byte   0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
1364         .byte   0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
1365         .byte   0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
1366         .byte   0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
1367         .byte   0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
1368         .byte   0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
1369         .byte   0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
1370         .cstring "AES for C64x+, CRYPTOGAMS by <appro\@openssl.org>"
1371         .align  4
1372 ___
1373
1374 print $code;
1375 close STDOUT;