PA-RISC assembler pack: switch to bve in 64-bit builds.
[openssl.git] / crypto / aes / asm / aes-c64xplus.pl
1 #!/usr/bin/env perl
2 #
3 # ====================================================================
4 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9 #
10 # [Endian-neutral] AES for C64x+.
11 #
12 # Even though SPLOOPs are scheduled for 13 cycles, and thus expected
13 # performance is ~8.5 cycles per byte processed with 128-bit key,
14 # measured performance turned to be ~10 cycles per byte. Discrepancy
15 # must be caused by limitations of L1D memory banking(*), see SPRU871
16 # TI publication for further details. If any consolation it's still
17 # ~20% faster than TI's linear assembly module anyway... Compared to
18 # aes_core.c compiled with cl6x 6.0 with -mv6400+ -o2 options this
19 # code is 3.75x faster and almost 3x smaller (tables included).
20 #
21 # (*)   This means that there might be subtle correlation between data
22 #       and timing and one can wonder if it can be ... attacked:-(
23 #       On the other hand this also means that *if* one chooses to
24 #       implement *4* T-tables variant [instead of 1 T-table as in
25 #       this implementation, or in addition to], then one ought to
26 #       *interleave* them. Even though it complicates addressing,
27 #       references to interleaved tables would be guaranteed not to
28 #       clash. I reckon that it should be possible to break 8 cycles
29 #       per byte "barrier," i.e. improve by ~20%, naturally at the
30 #       cost of 8x increased pressure on L1D. 8x because you'd have
31 #       to interleave both Te and Td tables...
32
33 while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
34 open STDOUT,">$output";
35
36 ($TEA,$TEB)=("A5","B5");
37 ($KPA,$KPB)=("A3","B1");
38 @K=("A6","B6","A7","B7");
39 @s=("A8","B8","A9","B9");
40 @Te0=@Td0=("A16","B16","A17","B17");
41 @Te1=@Td1=("A18","B18","A19","B19");
42 @Te2=@Td2=("A20","B20","A21","B21");
43 @Te3=@Td3=("A22","B22","A23","B23");
44
45 $code=<<___;
46         .text
47         .if     __TI_EABI__
48         .nocmp
49         .asg    AES_encrypt,_AES_encrypt
50         .asg    AES_decrypt,_AES_decrypt
51         .asg    AES_set_encrypt_key,_AES_set_encrypt_key
52         .asg    AES_set_decrypt_key,_AES_set_decrypt_key
53         .asg    AES_ctr32_encrypt,_AES_ctr32_encrypt
54         .endif
55
56         .asg    B3,RA
57         .asg    A4,INP
58         .asg    B4,OUT
59         .asg    A6,KEY
60         .asg    A4,RET
61         .asg    B15,SP
62
63         .eval   24,EXT0
64         .eval   16,EXT1
65         .eval   8,EXT2
66         .eval   0,EXT3
67         .eval   8,TBL1
68         .eval   16,TBL2
69         .eval   24,TBL3
70
71         .if     .BIG_ENDIAN
72         .eval   24-EXT0,EXT0
73         .eval   24-EXT1,EXT1
74         .eval   24-EXT2,EXT2
75         .eval   24-EXT3,EXT3
76         .eval   32-TBL1,TBL1
77         .eval   32-TBL2,TBL2
78         .eval   32-TBL3,TBL3
79         .endif
80
81         .global _AES_encrypt
82 _AES_encrypt:
83         .asmfunc
84         MVK     1,B2
85 __encrypt:
86         .if     __TI_EABI__
87    [B2] LDNDW   *INP++,A9:A8                    ; load input
88 ||      MVKL    \$PCR_OFFSET(AES_Te,_AES_encrypt),$TEA
89 ||      ADDKPC  _AES_encrypt,B0
90    [B2] LDNDW   *INP++,B9:B8
91 ||      MVKH    \$PCR_OFFSET(AES_Te,_AES_encrypt),$TEA
92 ||      ADD     0,KEY,$KPA
93 ||      ADD     4,KEY,$KPB
94         .else
95    [B2] LDNDW   *INP++,A9:A8                    ; load input
96 ||      MVKL    (AES_Te-_AES_encrypt),$TEA
97 ||      ADDKPC  _AES_encrypt,B0
98    [B2] LDNDW   *INP++,B9:B8
99 ||      MVKH    (AES_Te-_AES_encrypt),$TEA
100 ||      ADD     0,KEY,$KPA
101 ||      ADD     4,KEY,$KPB
102         .endif
103         LDW     *$KPA++[2],$Te0[0]              ; zero round key
104 ||      LDW     *$KPB++[2],$Te0[1]
105 ||      MVK     60,A0
106 ||      ADD     B0,$TEA,$TEA                    ; AES_Te
107         LDW     *KEY[A0],B0                     ; rounds
108 ||      MVK     1024,A0                         ; sizeof(AES_Te)
109         LDW     *$KPA++[2],$Te0[2]
110 ||      LDW     *$KPB++[2],$Te0[3]
111 ||      MV      $TEA,$TEB
112         NOP
113         .if     .BIG_ENDIAN
114         MV      A9,$s[0]
115 ||      MV      A8,$s[1]
116 ||      MV      B9,$s[2]
117 ||      MV      B8,$s[3]
118         .else
119         MV      A8,$s[0]
120 ||      MV      A9,$s[1]
121 ||      MV      B8,$s[2]
122 ||      MV      B9,$s[3]
123         .endif
124         XOR     $Te0[0],$s[0],$s[0]
125 ||      XOR     $Te0[1],$s[1],$s[1]
126 ||      LDW     *$KPA++[2],$K[0]                ; 1st round key
127 ||      LDW     *$KPB++[2],$K[1]
128         SUB     B0,2,B0
129
130         SPLOOPD 13
131 ||      MVC     B0,ILC
132 ||      LDW     *$KPA++[2],$K[2]
133 ||      LDW     *$KPB++[2],$K[3]
134 ;;====================================================================
135         EXTU    $s[1],EXT1,24,$Te1[1]
136 ||      EXTU    $s[0],EXT3,24,$Te3[0]
137         LDW     *${TEB}[$Te1[1]],$Te1[1]        ; Te1[s1>>8],   t0
138 ||      LDW     *${TEA}[$Te3[0]],$Te3[0]        ; Te3[s0>>24],  t1
139 ||      XOR     $s[2],$Te0[2],$s[2]             ; modulo-scheduled
140 ||      XOR     $s[3],$Te0[3],$s[3]             ; modulo-scheduled
141 ||      EXTU    $s[1],EXT3,24,$Te3[1]
142 ||      EXTU    $s[0],EXT1,24,$Te1[0]
143         LDW     *${TEB}[$Te3[1]],$Te3[1]        ; Te3[s1>>24],  t2
144 ||      LDW     *${TEA}[$Te1[0]],$Te1[0]        ; Te1[s0>>8],   t3
145 ||      EXTU    $s[2],EXT2,24,$Te2[2]
146 ||      EXTU    $s[3],EXT2,24,$Te2[3]
147         LDW     *${TEA}[$Te2[2]],$Te2[2]        ; Te2[s2>>16],  t0
148 ||      LDW     *${TEB}[$Te2[3]],$Te2[3]        ; Te2[s3>>16],  t1
149 ||      EXTU    $s[3],EXT3,24,$Te3[3]
150 ||      EXTU    $s[2],EXT1,24,$Te1[2]
151         LDW     *${TEB}[$Te3[3]],$Te3[3]        ; Te3[s3>>24],  t0
152 ||      LDW     *${TEA}[$Te1[2]],$Te1[2]        ; Te1[s2>>8],   t1
153 ||      EXTU    $s[0],EXT2,24,$Te2[0]
154 ||      EXTU    $s[1],EXT2,24,$Te2[1]
155         LDW     *${TEA}[$Te2[0]],$Te2[0]        ; Te2[s0>>16],  t2
156 ||      LDW     *${TEB}[$Te2[1]],$Te2[1]        ; Te2[s1>>16],  t3
157 ||      EXTU    $s[3],EXT1,24,$Te1[3]
158 ||      EXTU    $s[2],EXT3,24,$Te3[2]
159         LDW     *${TEB}[$Te1[3]],$Te1[3]        ; Te1[s3>>8],   t2
160 ||      LDW     *${TEA}[$Te3[2]],$Te3[2]        ; Te3[s2>>24],  t3
161 ||      ROTL    $Te1[1],TBL1,$Te3[0]            ; t0
162 ||      ROTL    $Te3[0],TBL3,$Te1[1]            ; t1
163 ||      EXTU    $s[0],EXT0,24,$Te0[0]
164 ||      EXTU    $s[1],EXT0,24,$Te0[1]
165         LDW     *${TEA}[$Te0[0]],$Te0[0]        ; Te0[s0],      t0
166 ||      LDW     *${TEB}[$Te0[1]],$Te0[1]        ; Te0[s1],      t1
167 ||      ROTL    $Te3[1],TBL3,$Te1[0]            ; t2
168 ||      ROTL    $Te1[0],TBL1,$Te3[1]            ; t3
169 ||      EXTU    $s[2],EXT0,24,$Te0[2]
170 ||      EXTU    $s[3],EXT0,24,$Te0[3]
171         LDW     *${TEA}[$Te0[2]],$Te0[2]        ; Te0[s2],      t2
172 ||      LDW     *${TEB}[$Te0[3]],$Te0[3]        ; Te0[s3],      t3
173 ||      ROTL    $Te2[2],TBL2,$Te2[2]            ; t0
174 ||      ROTL    $Te2[3],TBL2,$Te2[3]            ; t1
175 ||      XOR     $K[0],$Te3[0],$s[0]
176 ||      XOR     $K[1],$Te1[1],$s[1]
177         ROTL    $Te3[3],TBL3,$Te1[2]            ; t0
178 ||      ROTL    $Te1[2],TBL1,$Te3[3]            ; t1
179 ||      XOR     $K[2],$Te1[0],$s[2]
180 ||      XOR     $K[3],$Te3[1],$s[3]
181 ||      LDW     *$KPA++[2],$K[0]                ; next round key
182 ||      LDW     *$KPB++[2],$K[1]
183         ROTL    $Te2[0],TBL2,$Te2[0]            ; t2
184 ||      ROTL    $Te2[1],TBL2,$Te2[1]            ; t3
185 ||      XOR     $s[0],$Te2[2],$s[0]
186 ||      XOR     $s[1],$Te2[3],$s[1]
187 ||      LDW     *$KPA++[2],$K[2]
188 ||      LDW     *$KPB++[2],$K[3]
189         ROTL    $Te1[3],TBL1,$Te3[2]            ; t2
190 ||      ROTL    $Te3[2],TBL3,$Te1[3]            ; t3
191 ||      XOR     $s[0],$Te1[2],$s[0]
192 ||      XOR     $s[1],$Te3[3],$s[1]
193         XOR     $s[2],$Te2[0],$s[2]
194 ||      XOR     $s[3],$Te2[1],$s[3]
195 ||      XOR     $s[0],$Te0[0],$s[0]
196 ||      XOR     $s[1],$Te0[1],$s[1]
197         SPKERNEL
198 ||      XOR.L   $s[2],$Te3[2],$s[2]
199 ||      XOR.L   $s[3],$Te1[3],$s[3]
200 ;;====================================================================
201         ADD.D   ${TEA},A0,${TEA}                ; point to Te4
202 ||      ADD.D   ${TEB},A0,${TEB}
203 ||      EXTU    $s[1],EXT1,24,$Te1[1]
204 ||      EXTU    $s[0],EXT3,24,$Te3[0]
205         LDBU    *${TEB}[$Te1[1]],$Te1[1]        ; Te1[s1>>8],   t0
206 ||      LDBU    *${TEA}[$Te3[0]],$Te3[0]        ; Te3[s0>>24],  t1
207 ||      XOR     $s[2],$Te0[2],$s[2]             ; modulo-scheduled
208 ||      XOR     $s[3],$Te0[3],$s[3]             ; modulo-scheduled
209 ||      EXTU    $s[0],EXT0,24,$Te0[0]
210 ||      EXTU    $s[1],EXT0,24,$Te0[1]
211         LDBU    *${TEA}[$Te0[0]],$Te0[0]        ; Te0[s0],      t0
212 ||      LDBU    *${TEB}[$Te0[1]],$Te0[1]        ; Te0[s1],      t1
213 ||      EXTU    $s[3],EXT3,24,$Te3[3]
214 ||      EXTU    $s[2],EXT1,24,$Te1[2]
215         LDBU    *${TEB}[$Te3[3]],$Te3[3]        ; Te3[s3>>24],  t0
216 ||      LDBU    *${TEA}[$Te1[2]],$Te1[2]        ; Te1[s2>>8],   t1
217 ||      EXTU    $s[2],EXT2,24,$Te2[2]
218 ||      EXTU    $s[3],EXT2,24,$Te2[3]
219         LDBU    *${TEA}[$Te2[2]],$Te2[2]        ; Te2[s2>>16],  t0
220 ||      LDBU    *${TEB}[$Te2[3]],$Te2[3]        ; Te2[s3>>16],  t1
221 ||      EXTU    $s[1],EXT3,24,$Te3[1]
222 ||      EXTU    $s[0],EXT1,24,$Te1[0]
223         LDBU    *${TEB}[$Te3[1]],$Te3[1]        ; Te3[s1>>24],  t2
224 ||      LDBU    *${TEA}[$Te1[0]],$Te1[0]        ; Te1[s0>>8],   t3
225 ||      EXTU    $s[3],EXT1,24,$Te1[3]
226 ||      EXTU    $s[2],EXT3,24,$Te3[2]
227         LDBU    *${TEB}[$Te1[3]],$Te1[3]        ; Te1[s3>>8],   t2
228 ||      LDBU    *${TEA}[$Te3[2]],$Te3[2]        ; Te3[s2>>24],  t3
229 ||      EXTU    $s[2],EXT0,24,$Te0[2]
230 ||      EXTU    $s[3],EXT0,24,$Te0[3]
231         LDBU    *${TEA}[$Te0[2]],$Te0[2]        ; Te0[s2],      t2
232 ||      LDBU    *${TEB}[$Te0[3]],$Te0[3]        ; Te0[s3],      t3
233 ||      EXTU    $s[0],EXT2,24,$Te2[0]
234 ||      EXTU    $s[1],EXT2,24,$Te2[1]
235         LDBU    *${TEA}[$Te2[0]],$Te2[0]        ; Te2[s0>>16],  t2
236 ||      LDBU    *${TEB}[$Te2[1]],$Te2[1]        ; Te2[s1>>16],  t3
237
238         .if     .BIG_ENDIAN
239         PACK2   $Te0[0],$Te1[1],$Te0[0]
240 ||      PACK2   $Te0[1],$Te1[2],$Te0[1]
241         PACK2   $Te2[2],$Te3[3],$Te2[2]
242 ||      PACK2   $Te2[3],$Te3[0],$Te2[3]
243         PACKL4  $Te0[0],$Te2[2],$Te0[0]
244 ||      PACKL4  $Te0[1],$Te2[3],$Te0[1]
245         XOR     $K[0],$Te0[0],$Te0[0]           ; s[0]
246 ||      XOR     $K[1],$Te0[1],$Te0[1]           ; s[1]
247
248         PACK2   $Te0[2],$Te1[3],$Te0[2]
249 ||      PACK2   $Te0[3],$Te1[0],$Te0[3]
250         PACK2   $Te2[0],$Te3[1],$Te2[0]
251 ||      PACK2   $Te2[1],$Te3[2],$Te2[1]
252 ||      BNOP    RA
253         PACKL4  $Te0[2],$Te2[0],$Te0[2]
254 ||      PACKL4  $Te0[3],$Te2[1],$Te0[3]
255         XOR     $K[2],$Te0[2],$Te0[2]           ; s[2]
256 ||      XOR     $K[3],$Te0[3],$Te0[3]           ; s[3]
257
258         MV      $Te0[0],A9
259 ||      MV      $Te0[1],A8
260         MV      $Te0[2],B9
261 ||      MV      $Te0[3],B8
262 || [B2] STNDW   A9:A8,*OUT++
263    [B2] STNDW   B9:B8,*OUT++
264         .else
265         PACK2   $Te1[1],$Te0[0],$Te1[1]
266 ||      PACK2   $Te1[2],$Te0[1],$Te1[2]
267         PACK2   $Te3[3],$Te2[2],$Te3[3]
268 ||      PACK2   $Te3[0],$Te2[3],$Te3[0]
269         PACKL4  $Te3[3],$Te1[1],$Te1[1]
270 ||      PACKL4  $Te3[0],$Te1[2],$Te1[2]
271         XOR     $K[0],$Te1[1],$Te1[1]           ; s[0]
272 ||      XOR     $K[1],$Te1[2],$Te1[2]           ; s[1]
273
274         PACK2   $Te1[3],$Te0[2],$Te1[3]
275 ||      PACK2   $Te1[0],$Te0[3],$Te1[0]
276         PACK2   $Te3[1],$Te2[0],$Te3[1]
277 ||      PACK2   $Te3[2],$Te2[1],$Te3[2]
278 ||      BNOP    RA
279         PACKL4  $Te3[1],$Te1[3],$Te1[3]
280 ||      PACKL4  $Te3[2],$Te1[0],$Te1[0]
281         XOR     $K[2],$Te1[3],$Te1[3]           ; s[2]
282 ||      XOR     $K[3],$Te1[0],$Te1[0]           ; s[3]
283
284         MV      $Te1[1],A8
285 ||      MV      $Te1[2],A9
286         MV      $Te1[3],B8
287 ||      MV      $Te1[0],B9
288 || [B2] STNDW   A9:A8,*OUT++
289    [B2] STNDW   B9:B8,*OUT++
290         .endif
291         .endasmfunc
292
293         .global _AES_decrypt
294 _AES_decrypt:
295         .asmfunc
296         MVK     1,B2
297 __decrypt:
298         .if     __TI_EABI__
299    [B2] LDNDW   *INP++,A9:A8                    ; load input
300 ||      MVKL    \$PCR_OFFSET(AES_Td,_AES_decrypt),$TEA
301 ||      ADDKPC  _AES_decrypt,B0
302    [B2] LDNDW   *INP++,B9:B8
303 ||      MVKH    \$PCR_OFFSET(AES_Td,_AES_decrypt),$TEA
304 ||      ADD     0,KEY,$KPA
305 ||      ADD     4,KEY,$KPB
306         .else
307    [B2] LDNDW   *INP++,A9:A8                    ; load input
308 ||      MVKL    (AES_Td-_AES_decrypt),$TEA
309 ||      ADDKPC  _AES_decrypt,B0
310    [B2] LDNDW   *INP++,B9:B8
311 ||      MVKH    (AES_Td-_AES_decrypt),$TEA
312 ||      ADD     0,KEY,$KPA
313 ||      ADD     4,KEY,$KPB
314         .endif
315         LDW     *$KPA++[2],$Td0[0]              ; zero round key
316 ||      LDW     *$KPB++[2],$Td0[1]
317 ||      MVK     60,A0
318 ||      ADD     B0,$TEA,$TEA                    ; AES_Td
319         LDW     *KEY[A0],B0                     ; rounds
320 ||      MVK     1024,A0                         ; sizeof(AES_Td)
321         LDW     *$KPA++[2],$Td0[2]
322 ||      LDW     *$KPB++[2],$Td0[3]
323 ||      MV      $TEA,$TEB
324         NOP
325         .if     .BIG_ENDIAN
326         MV      A9,$s[0]
327 ||      MV      A8,$s[1]
328 ||      MV      B9,$s[2]
329 ||      MV      B8,$s[3]
330         .else
331         MV      A8,$s[0]
332 ||      MV      A9,$s[1]
333 ||      MV      B8,$s[2]
334 ||      MV      B9,$s[3]
335         .endif
336         XOR     $Td0[0],$s[0],$s[0]
337 ||      XOR     $Td0[1],$s[1],$s[1]
338 ||      LDW     *$KPA++[2],$K[0]                ; 1st round key
339 ||      LDW     *$KPB++[2],$K[1]
340         SUB     B0,2,B0
341
342         SPLOOPD 13
343 ||      MVC     B0,ILC
344 ||      LDW     *$KPA++[2],$K[2]
345 ||      LDW     *$KPB++[2],$K[3]
346 ;;====================================================================
347         EXTU    $s[1],EXT3,24,$Td3[1]
348 ||      EXTU    $s[0],EXT1,24,$Td1[0]
349         LDW     *${TEB}[$Td3[1]],$Td3[1]        ; Td3[s1>>24],  t0
350 ||      LDW     *${TEA}[$Td1[0]],$Td1[0]        ; Td1[s0>>8],   t1
351 ||      XOR     $s[2],$Td0[2],$s[2]             ; modulo-scheduled
352 ||      XOR     $s[3],$Td0[3],$s[3]             ; modulo-scheduled
353 ||      EXTU    $s[1],EXT1,24,$Td1[1]
354 ||      EXTU    $s[0],EXT3,24,$Td3[0]
355         LDW     *${TEB}[$Td1[1]],$Td1[1]        ; Td1[s1>>8],   t2
356 ||      LDW     *${TEA}[$Td3[0]],$Td3[0]        ; Td3[s0>>24],  t3
357 ||      EXTU    $s[2],EXT2,24,$Td2[2]
358 ||      EXTU    $s[3],EXT2,24,$Td2[3]
359         LDW     *${TEA}[$Td2[2]],$Td2[2]        ; Td2[s2>>16],  t0
360 ||      LDW     *${TEB}[$Td2[3]],$Td2[3]        ; Td2[s3>>16],  t1
361 ||      EXTU    $s[3],EXT1,24,$Td1[3]
362 ||      EXTU    $s[2],EXT3,24,$Td3[2]
363         LDW     *${TEB}[$Td1[3]],$Td1[3]        ; Td1[s3>>8],   t0
364 ||      LDW     *${TEA}[$Td3[2]],$Td3[2]        ; Td3[s2>>24],  t1
365 ||      EXTU    $s[0],EXT2,24,$Td2[0]
366 ||      EXTU    $s[1],EXT2,24,$Td2[1]
367         LDW     *${TEA}[$Td2[0]],$Td2[0]        ; Td2[s0>>16],  t2
368 ||      LDW     *${TEB}[$Td2[1]],$Td2[1]        ; Td2[s1>>16],  t3
369 ||      EXTU    $s[3],EXT3,24,$Td3[3]
370 ||      EXTU    $s[2],EXT1,24,$Td1[2]
371         LDW     *${TEB}[$Td3[3]],$Td3[3]        ; Td3[s3>>24],  t2
372 ||      LDW     *${TEA}[$Td1[2]],$Td1[2]        ; Td1[s2>>8],   t3
373 ||      ROTL    $Td3[1],TBL3,$Td1[0]            ; t0
374 ||      ROTL    $Td1[0],TBL1,$Td3[1]            ; t1
375 ||      EXTU    $s[0],EXT0,24,$Td0[0]
376 ||      EXTU    $s[1],EXT0,24,$Td0[1]
377         LDW     *${TEA}[$Td0[0]],$Td0[0]        ; Td0[s0],      t0
378 ||      LDW     *${TEB}[$Td0[1]],$Td0[1]        ; Td0[s1],      t1
379 ||      ROTL    $Td1[1],TBL1,$Td3[0]            ; t2
380 ||      ROTL    $Td3[0],TBL3,$Td1[1]            ; t3
381 ||      EXTU    $s[2],EXT0,24,$Td0[2]
382 ||      EXTU    $s[3],EXT0,24,$Td0[3]
383         LDW     *${TEA}[$Td0[2]],$Td0[2]        ; Td0[s2],      t2
384 ||      LDW     *${TEB}[$Td0[3]],$Td0[3]        ; Td0[s3],      t3
385 ||      ROTL    $Td2[2],TBL2,$Td2[2]            ; t0
386 ||      ROTL    $Td2[3],TBL2,$Td2[3]            ; t1
387 ||      XOR     $K[0],$Td1[0],$s[0]
388 ||      XOR     $K[1],$Td3[1],$s[1]
389         ROTL    $Td1[3],TBL1,$Td3[2]            ; t0
390 ||      ROTL    $Td3[2],TBL3,$Td1[3]            ; t1
391 ||      XOR     $K[2],$Td3[0],$s[2]
392 ||      XOR     $K[3],$Td1[1],$s[3]
393 ||      LDW     *$KPA++[2],$K[0]                ; next round key
394 ||      LDW     *$KPB++[2],$K[1]
395         ROTL    $Td2[0],TBL2,$Td2[0]            ; t2
396 ||      ROTL    $Td2[1],TBL2,$Td2[1]            ; t3
397 ||      XOR     $s[0],$Td2[2],$s[0]
398 ||      XOR     $s[1],$Td2[3],$s[1]
399 ||      LDW     *$KPA++[2],$K[2]
400 ||      LDW     *$KPB++[2],$K[3]
401         ROTL    $Td3[3],TBL3,$Td1[2]            ; t2
402 ||      ROTL    $Td1[2],TBL1,$Td3[3]            ; t3
403 ||      XOR     $s[0],$Td3[2],$s[0]
404 ||      XOR     $s[1],$Td1[3],$s[1]
405         XOR     $s[2],$Td2[0],$s[2]
406 ||      XOR     $s[3],$Td2[1],$s[3]
407 ||      XOR     $s[0],$Td0[0],$s[0]
408 ||      XOR     $s[1],$Td0[1],$s[1]
409         SPKERNEL
410 ||      XOR.L   $s[2],$Td1[2],$s[2]
411 ||      XOR.L   $s[3],$Td3[3],$s[3]
412 ;;====================================================================
413         ADD.D   ${TEA},A0,${TEA}                ; point to Td4
414 ||      ADD.D   ${TEB},A0,${TEB}
415 ||      EXTU    $s[1],EXT3,24,$Td3[1]
416 ||      EXTU    $s[0],EXT1,24,$Td1[0]
417         LDBU    *${TEB}[$Td3[1]],$Td3[1]        ; Td3[s1>>24],  t0
418 ||      LDBU    *${TEA}[$Td1[0]],$Td1[0]        ; Td1[s0>>8],   t1
419 ||      XOR     $s[2],$Td0[2],$s[2]             ; modulo-scheduled
420 ||      XOR     $s[3],$Td0[3],$s[3]             ; modulo-scheduled
421 ||      EXTU    $s[0],EXT0,24,$Td0[0]
422 ||      EXTU    $s[1],EXT0,24,$Td0[1]
423         LDBU    *${TEA}[$Td0[0]],$Td0[0]        ; Td0[s0],      t0
424 ||      LDBU    *${TEB}[$Td0[1]],$Td0[1]        ; Td0[s1],      t1
425 ||      EXTU    $s[2],EXT2,24,$Td2[2]
426 ||      EXTU    $s[3],EXT2,24,$Td2[3]
427         LDBU    *${TEA}[$Td2[2]],$Td2[2]        ; Td2[s2>>16],  t0
428 ||      LDBU    *${TEB}[$Td2[3]],$Td2[3]        ; Td2[s3>>16],  t1
429 ||      EXTU    $s[3],EXT1,24,$Td1[3]
430 ||      EXTU    $s[2],EXT3,24,$Td3[2]
431         LDBU    *${TEB}[$Td1[3]],$Td1[3]        ; Td1[s3>>8],   t0
432 ||      LDBU    *${TEA}[$Td3[2]],$Td3[2]        ; Td3[s2>>24],  t1
433 ||      EXTU    $s[1],EXT1,24,$Td1[1]
434 ||      EXTU    $s[0],EXT3,24,$Td3[0]
435         LDBU    *${TEB}[$Td1[1]],$Td1[1]        ; Td1[s1>>8],   t2
436 ||      LDBU    *${TEA}[$Td3[0]],$Td3[0]        ; Td3[s0>>24],  t3
437 ||      EXTU    $s[0],EXT2,24,$Td2[0]
438 ||      EXTU    $s[1],EXT2,24,$Td2[1]
439         LDBU    *${TEA}[$Td2[0]],$Td2[0]        ; Td2[s0>>16],  t2
440 ||      LDBU    *${TEB}[$Td2[1]],$Td2[1]        ; Td2[s1>>16],  t3
441 ||      EXTU    $s[3],EXT3,24,$Td3[3]
442 ||      EXTU    $s[2],EXT1,24,$Td1[2]
443         LDBU    *${TEB}[$Td3[3]],$Td3[3]        ; Td3[s3>>24],  t2
444 ||      LDBU    *${TEA}[$Td1[2]],$Td1[2]        ; Td1[s2>>8],   t3
445 ||      EXTU    $s[2],EXT0,24,$Td0[2]
446 ||      EXTU    $s[3],EXT0,24,$Td0[3]
447         LDBU    *${TEA}[$Td0[2]],$Td0[2]        ; Td0[s2],      t2
448 ||      LDBU    *${TEB}[$Td0[3]],$Td0[3]        ; Td0[s3],      t3
449
450         .if     .BIG_ENDIAN
451         PACK2   $Td0[0],$Td1[3],$Td0[0]
452 ||      PACK2   $Td0[1],$Td1[0],$Td0[1]
453         PACK2   $Td2[2],$Td3[1],$Td2[2]
454 ||      PACK2   $Td2[3],$Td3[2],$Td2[3]
455         PACKL4  $Td0[0],$Td2[2],$Td0[0]
456 ||      PACKL4  $Td0[1],$Td2[3],$Td0[1]
457         XOR     $K[0],$Td0[0],$Td0[0]           ; s[0]
458 ||      XOR     $K[1],$Td0[1],$Td0[1]           ; s[1]
459
460         PACK2   $Td0[2],$Td1[1],$Td0[2]
461 ||      PACK2   $Td0[3],$Td1[2],$Td0[3]
462         PACK2   $Td2[0],$Td3[3],$Td2[0]
463 ||      PACK2   $Td2[1],$Td3[0],$Td2[1]
464 ||      BNOP    RA
465         PACKL4  $Td0[2],$Td2[0],$Td0[2]
466 ||      PACKL4  $Td0[3],$Td2[1],$Td0[3]
467         XOR     $K[2],$Td0[2],$Td0[2]           ; s[2]
468 ||      XOR     $K[3],$Td0[3],$Td0[3]           ; s[3]
469
470         MV      $Td0[0],A9
471 ||      MV      $Td0[1],A8
472         MV      $Td0[2],B9
473 ||      MV      $Td0[3],B8
474 || [B2] STNDW   A9:A8,*OUT++
475    [B2] STNDW   B9:B8,*OUT++
476         .else
477         PACK2   $Td1[3],$Td0[0],$Td1[3]
478 ||      PACK2   $Td1[0],$Td0[1],$Td1[0]
479         PACK2   $Td3[1],$Td2[2],$Td3[1]
480 ||      PACK2   $Td3[2],$Td2[3],$Td3[2]
481         PACKL4  $Td3[1],$Td1[3],$Td1[3]
482 ||      PACKL4  $Td3[2],$Td1[0],$Td1[0]
483         XOR     $K[0],$Td1[3],$Td1[3]           ; s[0]
484 ||      XOR     $K[1],$Td1[0],$Td1[0]           ; s[1]
485
486         PACK2   $Td1[1],$Td0[2],$Td1[1]
487 ||      PACK2   $Td1[2],$Td0[3],$Td1[2]
488         PACK2   $Td3[3],$Td2[0],$Td3[3]
489 ||      PACK2   $Td3[0],$Td2[1],$Td3[0]
490 ||      BNOP    RA
491         PACKL4  $Td3[3],$Td1[1],$Td1[1]
492 ||      PACKL4  $Td3[0],$Td1[2],$Td1[2]
493         XOR     $K[2],$Td1[1],$Td1[1]           ; s[2]
494 ||      XOR     $K[3],$Td1[2],$Td1[2]           ; s[3]
495
496         MV      $Td1[3],A8
497 ||      MV      $Td1[0],A9
498         MV      $Td1[1],B8
499 ||      MV      $Td1[2],B9
500 || [B2] STNDW   A9:A8,*OUT++
501    [B2] STNDW   B9:B8,*OUT++
502         .endif
503         .endasmfunc
504 ___
505 {
506 my @K=(@K,@s);                  # extended key
507 my @Te4=map("B$_",(16..19));
508
509 my @Kx9=@Te0;                   # used in AES_set_decrypt_key
510 my @KxB=@Te1;
511 my @KxD=@Te2;
512 my @KxE=@Te3;
513
514 $code.=<<___;
515         .asg    OUT,BITS
516
517         .global _AES_set_encrypt_key
518 _AES_set_encrypt_key:
519 __set_encrypt_key:
520         .asmfunc
521         MV      INP,A0
522 ||      SHRU    BITS,5,BITS                     ; 128-192-256 -> 4-6-8
523 ||      MV      KEY,A1
524   [!A0] B       RA
525 ||[!A0] MVK     -1,RET
526 ||[!A0] MVK     1,A1                            ; only one B RA
527   [!A1] B       RA
528 ||[!A1] MVK     -1,RET
529 ||[!A1] MVK     0,A0
530 ||      MVK     0,B0
531 ||      MVK     0,A1
532    [A0] LDNDW   *INP++,A9:A8
533 || [A0] CMPEQ   4,BITS,B0
534 || [A0] CMPLT   3,BITS,A1
535    [B0] B       key128?
536 || [A1] LDNDW   *INP++,B9:B8
537 || [A0] CMPEQ   6,BITS,B0
538 || [A0] CMPLT   5,BITS,A1
539    [B0] B       key192?
540 || [A1] LDNDW   *INP++,B17:B16
541 || [A0] CMPEQ   8,BITS,B0
542 || [A0] CMPLT   7,BITS,A1
543    [B0] B       key256?
544 || [A1] LDNDW   *INP++,B19:B18
545
546         .if     __TI_EABI__
547    [A0] ADD     0,KEY,$KPA
548 || [A0] ADD     4,KEY,$KPB
549 || [A0] MVKL    \$PCR_OFFSET(AES_Te4,_AES_set_encrypt_key),$TEA
550 || [A0] ADDKPC  _AES_set_encrypt_key,B6
551    [A0] MVKH    \$PCR_OFFSET(AES_Te4,_AES_set_encrypt_key),$TEA
552    [A0] ADD     B6,$TEA,$TEA                    ; AES_Te4
553         .else
554    [A0] ADD     0,KEY,$KPA
555 || [A0] ADD     4,KEY,$KPB
556 || [A0] MVKL    (AES_Te4-_AES_set_encrypt_key),$TEA
557 || [A0] ADDKPC  _AES_set_encrypt_key,B6
558    [A0] MVKH    (AES_Te4-_AES_set_encrypt_key),$TEA
559    [A0] ADD     B6,$TEA,$TEA                    ; AES_Te4
560         .endif
561         NOP
562         NOP
563
564         BNOP    RA,5
565 ||      MVK     -2,RET                          ; unknown bit lenght
566 ||      MVK     0,B0                            ; redundant
567 ;;====================================================================
568 ;;====================================================================
569 key128?:
570         .if     .BIG_ENDIAN
571         MV      A9,$K[0]
572 ||      MV      A8,$K[1]
573 ||      MV      B9,$Te4[2]
574 ||      MV      B8,$K[3]
575         .else
576         MV      A8,$K[0]
577 ||      MV      A9,$K[1]
578 ||      MV      B8,$Te4[2]
579 ||      MV      B9,$K[3]
580         .endif
581
582         MVK     256,A0
583 ||      MVK     9,B0
584
585         SPLOOPD 14
586 ||      MVC     B0,ILC
587 ||      MV      $TEA,$TEB
588 ||      ADD     $TEA,A0,A30                     ; rcon
589 ;;====================================================================
590         LDW     *A30++[1],A31                   ; rcon[i]
591 ||      MV      $Te4[2],$K[2]
592 ||      EXTU    $K[3],EXT1,24,$Te4[0]
593         LDBU    *${TEB}[$Te4[0]],$Te4[0]
594 ||      MV      $K[3],A0
595 ||      EXTU    $K[3],EXT2,24,$Te4[1]
596         LDBU    *${TEB}[$Te4[1]],$Te4[1]
597 ||      EXTU    A0,EXT3,24,A0
598 ||      EXTU    $K[3],EXT0,24,$Te4[3]
599         .if     .BIG_ENDIAN
600         LDBU    *${TEA}[A0],$Te4[3]
601 ||      LDBU    *${TEB}[$Te4[3]],A0
602         .else
603         LDBU    *${TEA}[A0],A0
604 ||      LDBU    *${TEB}[$Te4[3]],$Te4[3]
605         .endif
606
607         STW     $K[0],*$KPA++[2]
608 ||      STW     $K[1],*$KPB++[2]
609         STW     $K[2],*$KPA++[2]
610 ||      STW     $K[3],*$KPB++[2]
611
612         XOR     A31,$K[0],$K[0]                 ; ^=rcon[i]
613         .if     .BIG_ENDIAN
614         PACK2   $Te4[0],$Te4[1],$Te4[1]
615         PACK2   $Te4[3],A0,$Te4[3]
616         PACKL4  $Te4[1],$Te4[3],$Te4[3]
617         .else
618         PACK2   $Te4[1],$Te4[0],$Te4[1]
619         PACK2   $Te4[3],A0,$Te4[3]
620         PACKL4  $Te4[3],$Te4[1],$Te4[3]
621         .endif
622         XOR     $Te4[3],$K[0],$Te4[0]           ; K[0]
623         XOR     $Te4[0],$K[1],$K[1]             ; K[1]
624         MV      $Te4[0],$K[0]
625 ||      XOR     $K[1],$K[2],$Te4[2]             ; K[2]
626         XOR     $Te4[2],$K[3],$K[3]             ; K[3]
627         SPKERNEL
628 ;;====================================================================
629         BNOP    RA
630         MV      $Te4[2],$K[2]
631 ||      STW     $K[0],*$KPA++[2]
632 ||      STW     $K[1],*$KPB++[2]
633         STW     $K[2],*$KPA++[2]
634 ||      STW     $K[3],*$KPB++[2]
635         MVK     10,B0                           ; rounds
636         STW     B0,*++${KPB}[15]
637         MVK     0,RET
638 ;;====================================================================
639 ;;====================================================================
640 key192?:
641         .if     .BIG_ENDIAN
642         MV      A9,$K[0]
643 ||      MV      A8,$K[1]
644 ||      MV      B9,$K[2]
645 ||      MV      B8,$K[3]
646         MV      B17,$Te4[2]
647 ||      MV      B16,$K[5]
648         .else
649         MV      A8,$K[0]
650 ||      MV      A9,$K[1]
651 ||      MV      B8,$K[2]
652 ||      MV      B9,$K[3]
653         MV      B16,$Te4[2]
654 ||      MV      B17,$K[5]
655         .endif
656
657         MVK     256,A0
658 ||      MVK     6,B0
659         MV      $TEA,$TEB
660 ||      ADD     $TEA,A0,A30                     ; rcon
661 ;;====================================================================
662 loop192?:
663         LDW     *A30++[1],A31                   ; rcon[i]
664 ||      MV      $Te4[2],$K[4]
665 ||      EXTU    $K[5],EXT1,24,$Te4[0]
666         LDBU    *${TEB}[$Te4[0]],$Te4[0]
667 ||      MV      $K[5],A0
668 ||      EXTU    $K[5],EXT2,24,$Te4[1]
669         LDBU    *${TEB}[$Te4[1]],$Te4[1]
670 ||      EXTU    A0,EXT3,24,A0
671 ||      EXTU    $K[5],EXT0,24,$Te4[3]
672         .if     .BIG_ENDIAN
673         LDBU    *${TEA}[A0],$Te4[3]
674 ||      LDBU    *${TEB}[$Te4[3]],A0
675         .else
676         LDBU    *${TEA}[A0],A0
677 ||      LDBU    *${TEB}[$Te4[3]],$Te4[3]
678         .endif
679
680         STW     $K[0],*$KPA++[2]
681 ||      STW     $K[1],*$KPB++[2]
682         STW     $K[2],*$KPA++[2]
683 ||      STW     $K[3],*$KPB++[2]
684         STW     $K[4],*$KPA++[2]
685 ||      STW     $K[5],*$KPB++[2]
686
687         XOR     A31,$K[0],$K[0]                 ; ^=rcon[i]
688         .if     .BIG_ENDIAN
689         PACK2   $Te4[0],$Te4[1],$Te4[1]
690 ||      PACK2   $Te4[3],A0,$Te4[3]
691         PACKL4  $Te4[1],$Te4[3],$Te4[3]
692         .else
693         PACK2   $Te4[1],$Te4[0],$Te4[1]
694 ||      PACK2   $Te4[3],A0,$Te4[3]
695         PACKL4  $Te4[3],$Te4[1],$Te4[3]
696         .endif
697         BDEC    loop192?,B0
698 ||      XOR     $Te4[3],$K[0],$Te4[0]           ; K[0]
699         XOR     $Te4[0],$K[1],$K[1]             ; K[1]
700         MV      $Te4[0],$K[0]
701 ||      XOR     $K[1],$K[2],$Te4[2]             ; K[2]
702         XOR     $Te4[2],$K[3],$K[3]             ; K[3]
703         MV      $Te4[2],$K[2]
704 ||      XOR     $K[3],$K[4],$Te4[2]             ; K[4]
705         XOR     $Te4[2],$K[5],$K[5]             ; K[5]
706 ;;====================================================================
707         BNOP    RA
708         STW     $K[0],*$KPA++[2]
709 ||      STW     $K[1],*$KPB++[2]
710         STW     $K[2],*$KPA++[2]
711 ||      STW     $K[3],*$KPB++[2]
712         MVK     12,B0                           ; rounds
713         STW     B0,*++${KPB}[7]
714         MVK     0,RET
715 ;;====================================================================
716 ;;====================================================================
717 key256?:
718         .if     .BIG_ENDIAN
719         MV      A9,$K[0]
720 ||      MV      A8,$K[1]
721 ||      MV      B9,$K[2]
722 ||      MV      B8,$K[3]
723         MV      B17,$K[4]
724 ||      MV      B16,$K[5]
725 ||      MV      B19,$Te4[2]
726 ||      MV      B18,$K[7]
727         .else
728         MV      A8,$K[0]
729 ||      MV      A9,$K[1]
730 ||      MV      B8,$K[2]
731 ||      MV      B9,$K[3]
732         MV      B16,$K[4]
733 ||      MV      B17,$K[5]
734 ||      MV      B18,$Te4[2]
735 ||      MV      B19,$K[7]
736         .endif
737
738         MVK     256,A0
739 ||      MVK     6,B0
740         MV      $TEA,$TEB
741 ||      ADD     $TEA,A0,A30                     ; rcon
742 ;;====================================================================
743 loop256?:
744         LDW     *A30++[1],A31                   ; rcon[i]
745 ||      MV      $Te4[2],$K[6]
746 ||      EXTU    $K[7],EXT1,24,$Te4[0]
747         LDBU    *${TEB}[$Te4[0]],$Te4[0]
748 ||      MV      $K[7],A0
749 ||      EXTU    $K[7],EXT2,24,$Te4[1]
750         LDBU    *${TEB}[$Te4[1]],$Te4[1]
751 ||      EXTU    A0,EXT3,24,A0
752 ||      EXTU    $K[7],EXT0,24,$Te4[3]
753         .if     .BIG_ENDIAN
754         LDBU    *${TEA}[A0],$Te4[3]
755 ||      LDBU    *${TEB}[$Te4[3]],A0
756         .else
757         LDBU    *${TEA}[A0],A0
758 ||      LDBU    *${TEB}[$Te4[3]],$Te4[3]
759         .endif
760
761         STW     $K[0],*$KPA++[2]
762 ||      STW     $K[1],*$KPB++[2]
763         STW     $K[2],*$KPA++[2]
764 ||      STW     $K[3],*$KPB++[2]
765         STW     $K[4],*$KPA++[2]
766 ||      STW     $K[5],*$KPB++[2]
767         STW     $K[6],*$KPA++[2]
768 ||      STW     $K[7],*$KPB++[2]
769 ||      XOR     A31,$K[0],$K[0]                 ; ^=rcon[i]
770         .if     .BIG_ENDIAN
771         PACK2   $Te4[0],$Te4[1],$Te4[1]
772 ||      PACK2   $Te4[3],A0,$Te4[3]
773         PACKL4  $Te4[1],$Te4[3],$Te4[3]
774 ||[!B0] B       done256?
775         .else
776         PACK2   $Te4[1],$Te4[0],$Te4[1]
777 ||      PACK2   $Te4[3],A0,$Te4[3]
778         PACKL4  $Te4[3],$Te4[1],$Te4[3]
779 ||[!B0] B       done256?
780         .endif
781         XOR     $Te4[3],$K[0],$Te4[0]           ; K[0]
782         XOR     $Te4[0],$K[1],$K[1]             ; K[1]
783         MV      $Te4[0],$K[0]
784 ||      XOR     $K[1],$K[2],$Te4[2]             ; K[2]
785         XOR     $Te4[2],$K[3],$K[3]             ; K[3]
786
787         MV      $Te4[2],$K[2]
788 || [B0] EXTU    $K[3],EXT0,24,$Te4[0]
789 || [B0] SUB     B0,1,B0
790         LDBU    *${TEB}[$Te4[0]],$Te4[0]
791 ||      MV      $K[3],A0
792 ||      EXTU    $K[3],EXT1,24,$Te4[1]
793         LDBU    *${TEB}[$Te4[1]],$Te4[1]
794 ||      EXTU    A0,EXT2,24,A0
795 ||      EXTU    $K[3],EXT3,24,$Te4[3]
796
797         .if     .BIG_ENDIAN
798         LDBU    *${TEA}[A0],$Te4[3]
799 ||      LDBU    *${TEB}[$Te4[3]],A0
800         NOP     3
801         PACK2   $Te4[0],$Te4[1],$Te4[1]
802         PACK2   $Te4[3],A0,$Te4[3]
803 ||      B       loop256?
804         PACKL4  $Te4[1],$Te4[3],$Te4[3]
805         .else
806         LDBU    *${TEA}[A0],A0
807 ||      LDBU    *${TEB}[$Te4[3]],$Te4[3]
808         NOP     3
809         PACK2   $Te4[1],$Te4[0],$Te4[1]
810         PACK2   $Te4[3],A0,$Te4[3]
811 ||      B       loop256?
812         PACKL4  $Te4[3],$Te4[1],$Te4[3]
813         .endif
814
815         XOR     $Te4[3],$K[4],$Te4[0]           ; K[4]
816         XOR     $Te4[0],$K[5],$K[5]             ; K[5]
817         MV      $Te4[0],$K[4]
818 ||      XOR     $K[5],$K[6],$Te4[2]             ; K[6]
819         XOR     $Te4[2],$K[7],$K[7]             ; K[7]
820 ;;====================================================================
821 done256?:
822         BNOP    RA
823         STW     $K[0],*$KPA++[2]
824 ||      STW     $K[1],*$KPB++[2]
825         STW     $K[2],*$KPA++[2]
826 ||      STW     $K[3],*$KPB++[2]
827         MVK     14,B0                           ; rounds
828         STW     B0,*--${KPB}[1]
829         MVK     0,RET
830         .endasmfunc
831
832         .global _AES_set_decrypt_key
833 _AES_set_decrypt_key:
834         .asmfunc
835         B       __set_encrypt_key               ; guarantee local call
836         MV      KEY,B30                         ; B30 is not modified
837         MV      RA, B31                         ; B31 is not modified
838         ADDKPC  ret?,RA,2
839 ret?:                                           ; B0 holds rounds or zero
840   [!B0] BNOP    B31                             ; return if zero
841    [B0] SHL     B0,4,A0                         ; offset to last round key
842    [B0] SHRU    B0,1,B1
843    [B0] SUB     B1,1,B1
844    [B0] MVK     0x0000001B,B3                   ; AES polynomial
845    [B0] MVKH    0x07000000,B3
846
847         SPLOOPD 9                               ; flip round keys
848 ||      MVC     B1,ILC
849 ||      MV      B30,$KPA
850 ||      ADD     B30,A0,$KPB
851 ||      MVK     16,A0                           ; sizeof(round key)
852 ;;====================================================================
853         LDW     *${KPA}[0],A16
854 ||      LDW     *${KPB}[0],B16
855         LDW     *${KPA}[1],A17
856 ||      LDW     *${KPB}[1],B17
857         LDW     *${KPA}[2],A18
858 ||      LDW     *${KPB}[2],B18
859         LDW     *${KPA}[3],A19
860 ||      ADD     $KPA,A0,$KPA
861 ||      LDW     *${KPB}[3],B19
862 ||      SUB     $KPB,A0,$KPB
863         NOP
864         STW     B16,*${KPA}[-4]
865 ||      STW     A16,*${KPB}[4]
866         STW     B17,*${KPA}[-3]
867 ||      STW     A17,*${KPB}[5]
868         STW     B18,*${KPA}[-2]
869 ||      STW     A18,*${KPB}[6]
870         STW     B19,*${KPA}[-1]
871 ||      STW     A19,*${KPB}[7]
872         SPKERNEL
873 ;;====================================================================
874         SUB     B0,1,B0                         ; skip last round
875 ||      ADD     B30,A0,$KPA                     ; skip first round
876 ||      ADD     B30,A0,$KPB
877 ||      MVC     GFPGFR,B30                      ; save GFPGFR
878         LDW     *${KPA}[0],$K[0]
879 ||      LDW     *${KPB}[1],$K[1]
880 ||      MVC     B3,GFPGFR
881         LDW     *${KPA}[2],$K[2]
882 ||      LDW     *${KPB}[3],$K[3]
883         MVK     0x00000909,A24
884 ||      MVK     0x00000B0B,B24
885         MVKH    0x09090000,A24
886 ||      MVKH    0x0B0B0000,B24
887         MVC     B0,ILC
888 ||      SUB     B0,1,B0
889
890         GMPY4   $K[0],A24,$Kx9[0]               ; Â·0x09
891 ||      GMPY4   $K[1],A24,$Kx9[1]
892 ||      MVK     0x00000D0D,A25
893 ||      MVK     0x00000E0E,B25
894         GMPY4   $K[2],A24,$Kx9[2]
895 ||      GMPY4   $K[3],A24,$Kx9[3]
896 ||      MVKH    0x0D0D0000,A25
897 ||      MVKH    0x0E0E0000,B25
898
899         GMPY4   $K[0],B24,$KxB[0]               ; Â·0x0B
900 ||      GMPY4   $K[1],B24,$KxB[1]
901         GMPY4   $K[2],B24,$KxB[2]
902 ||      GMPY4   $K[3],B24,$KxB[3]
903
904         SPLOOP  11                              ; InvMixColumns
905 ;;====================================================================
906         GMPY4   $K[0],A25,$KxD[0]               ; Â·0x0D
907 ||      GMPY4   $K[1],A25,$KxD[1]
908 ||      SWAP2   $Kx9[0],$Kx9[0]                 ; rotate by 16
909 ||      SWAP2   $Kx9[1],$Kx9[1]
910 ||      MV      $K[0],$s[0]                     ; this or DINT
911 ||      MV      $K[1],$s[1]
912 || [B0] LDW     *${KPA}[4],$K[0]
913 || [B0] LDW     *${KPB}[5],$K[1]
914         GMPY4   $K[2],A25,$KxD[2]
915 ||      GMPY4   $K[3],A25,$KxD[3]
916 ||      SWAP2   $Kx9[2],$Kx9[2]
917 ||      SWAP2   $Kx9[3],$Kx9[3]
918 ||      MV      $K[2],$s[2]
919 ||      MV      $K[3],$s[3]
920 || [B0] LDW     *${KPA}[6],$K[2]
921 || [B0] LDW     *${KPB}[7],$K[3]
922
923         GMPY4   $s[0],B25,$KxE[0]               ; Â·0x0E
924 ||      GMPY4   $s[1],B25,$KxE[1]
925 ||      XOR     $Kx9[0],$KxB[0],$KxB[0]
926 ||      XOR     $Kx9[1],$KxB[1],$KxB[1]
927         GMPY4   $s[2],B25,$KxE[2]
928 ||      GMPY4   $s[3],B25,$KxE[3]
929 ||      XOR     $Kx9[2],$KxB[2],$KxB[2]
930 ||      XOR     $Kx9[3],$KxB[3],$KxB[3]
931
932         ROTL    $KxB[0],TBL3,$KxB[0]
933 ||      ROTL    $KxB[1],TBL3,$KxB[1]
934 ||      SWAP2   $KxD[0],$KxD[0]                 ; rotate by 16
935 ||      SWAP2   $KxD[1],$KxD[1]
936         ROTL    $KxB[2],TBL3,$KxB[2]
937 ||      ROTL    $KxB[3],TBL3,$KxB[3]
938 ||      SWAP2   $KxD[2],$KxD[2]
939 ||      SWAP2   $KxD[3],$KxD[3]
940
941         XOR     $KxE[0],$KxD[0],$KxE[0]
942 ||      XOR     $KxE[1],$KxD[1],$KxE[1]
943 || [B0] GMPY4   $K[0],A24,$Kx9[0]               ; Â·0x09
944 || [B0] GMPY4   $K[1],A24,$Kx9[1]
945 ||      ADDAW   $KPA,4,$KPA
946         XOR     $KxE[2],$KxD[2],$KxE[2]
947 ||      XOR     $KxE[3],$KxD[3],$KxE[3]
948 || [B0] GMPY4   $K[2],A24,$Kx9[2]
949 || [B0] GMPY4   $K[3],A24,$Kx9[3]
950 ||      ADDAW   $KPB,4,$KPB
951
952         XOR     $KxB[0],$KxE[0],$KxE[0]
953 ||      XOR     $KxB[1],$KxE[1],$KxE[1]
954 || [B0] GMPY4   $K[0],B24,$KxB[0]               ; Â·0x0B
955 || [B0] GMPY4   $K[1],B24,$KxB[1]
956         XOR     $KxB[2],$KxE[2],$KxE[2]
957 ||      XOR     $KxB[3],$KxE[3],$KxE[3]
958 || [B0] GMPY4   $K[2],B24,$KxB[2]
959 || [B0] GMPY4   $K[3],B24,$KxB[3]
960 ||      STW     $KxE[0],*${KPA}[-4]
961 ||      STW     $KxE[1],*${KPB}[-3]
962         STW     $KxE[2],*${KPA}[-2]
963 ||      STW     $KxE[3],*${KPB}[-1]
964 || [B0] SUB     B0,1,B0
965         SPKERNEL
966 ;;====================================================================
967         BNOP    B31,3
968         MVC     B30,GFPGFR                      ; restore GFPGFR(*)
969         MVK     0,RET
970         .endasmfunc
971 ___
972 # (*)   Even though ABI doesn't specify GFPGFR as non-volatile, there
973 #       are code samples out there that *assume* its default value.
974 }
975 {
976 my ($inp,$out,$blocks,$key,$ivp)=("A4","B4","A6","B6","A8");
977 $code.=<<___;
978         .global _AES_ctr32_encrypt
979 _AES_ctr32_encrypt:
980         .asmfunc
981         LDNDW   *${ivp}[0],A31:A30      ; load counter value
982 ||      MV      $blocks,A2              ; reassign $blocks
983 ||      DMV     RA,$key,B27:B26         ; reassign RA and $key
984         LDNDW   *${ivp}[1],B31:B30
985 ||      MVK     0,B2                    ; don't let __encrypt load input
986 ||      MVK     0,A1                    ; and postpone writing output
987         .if     .BIG_ENDIAN
988         NOP
989         .else
990         NOP     4
991         SWAP2   B31,B31                 ; keep least significant 32 bits
992         SWAP4   B31,B31                 ; in host byte order
993         .endif
994 ctr32_loop?:
995    [A2] BNOP    __encrypt
996 || [A1] XOR     A29,A9,A9               ; input^Ek(counter)
997 || [A1] XOR     A28,A8,A8
998 || [A2] LDNDW   *INP++,A29:A28          ; load input
999   [!A2] BNOP    B27                     ; return
1000 || [A1] XOR     B29,B9,B9
1001 || [A1] XOR     B28,B8,B8
1002 || [A2] LDNDW   *INP++,B29:B28
1003         .if     .BIG_ENDIAN
1004    [A1] STNDW   A9:A8,*OUT++            ; save output
1005 || [A2] DMV     A31,A30,A9:A8           ; pass counter value to __encrypt
1006    [A1] STNDW   B9:B8,*OUT++
1007 || [A2] DMV     B31,B30,B9:B8
1008 || [A2] ADD     B30,1,B30               ; counter++
1009         .else
1010    [A1] STNDW   A9:A8,*OUT++            ; save output
1011 || [A2] DMV     A31,A30,A9:A8
1012 || [A2] SWAP2   B31,B0
1013 || [A2] ADD     B31,1,B31               ; counter++
1014    [A1] STNDW   B9:B8,*OUT++
1015 || [A2] MV      B30,B8
1016 || [A2] SWAP4   B0,B9
1017         .endif
1018    [A2] ADDKPC  ctr32_loop?,RA          ; return to ctr32_loop?
1019 || [A2] MV      B26,KEY                 ; pass $key
1020 || [A2] SUB     A2,1,A2                 ; $blocks--
1021 ||[!A1] MVK     1,A1
1022         NOP
1023         NOP
1024         .endasmfunc
1025 ___
1026 }
1027 # Tables are kept in endian-neutral manner
1028 $code.=<<___;
1029         .if     __TI_EABI__
1030         .sect   ".text:aes_asm.const"
1031         .else
1032         .sect   ".const:aes_asm"
1033         .endif
1034         .align  128
1035 AES_Te:
1036         .byte   0xc6,0x63,0x63,0xa5,    0xf8,0x7c,0x7c,0x84
1037         .byte   0xee,0x77,0x77,0x99,    0xf6,0x7b,0x7b,0x8d
1038         .byte   0xff,0xf2,0xf2,0x0d,    0xd6,0x6b,0x6b,0xbd
1039         .byte   0xde,0x6f,0x6f,0xb1,    0x91,0xc5,0xc5,0x54
1040         .byte   0x60,0x30,0x30,0x50,    0x02,0x01,0x01,0x03
1041         .byte   0xce,0x67,0x67,0xa9,    0x56,0x2b,0x2b,0x7d
1042         .byte   0xe7,0xfe,0xfe,0x19,    0xb5,0xd7,0xd7,0x62
1043         .byte   0x4d,0xab,0xab,0xe6,    0xec,0x76,0x76,0x9a
1044         .byte   0x8f,0xca,0xca,0x45,    0x1f,0x82,0x82,0x9d
1045         .byte   0x89,0xc9,0xc9,0x40,    0xfa,0x7d,0x7d,0x87
1046         .byte   0xef,0xfa,0xfa,0x15,    0xb2,0x59,0x59,0xeb
1047         .byte   0x8e,0x47,0x47,0xc9,    0xfb,0xf0,0xf0,0x0b
1048         .byte   0x41,0xad,0xad,0xec,    0xb3,0xd4,0xd4,0x67
1049         .byte   0x5f,0xa2,0xa2,0xfd,    0x45,0xaf,0xaf,0xea
1050         .byte   0x23,0x9c,0x9c,0xbf,    0x53,0xa4,0xa4,0xf7
1051         .byte   0xe4,0x72,0x72,0x96,    0x9b,0xc0,0xc0,0x5b
1052         .byte   0x75,0xb7,0xb7,0xc2,    0xe1,0xfd,0xfd,0x1c
1053         .byte   0x3d,0x93,0x93,0xae,    0x4c,0x26,0x26,0x6a
1054         .byte   0x6c,0x36,0x36,0x5a,    0x7e,0x3f,0x3f,0x41
1055         .byte   0xf5,0xf7,0xf7,0x02,    0x83,0xcc,0xcc,0x4f
1056         .byte   0x68,0x34,0x34,0x5c,    0x51,0xa5,0xa5,0xf4
1057         .byte   0xd1,0xe5,0xe5,0x34,    0xf9,0xf1,0xf1,0x08
1058         .byte   0xe2,0x71,0x71,0x93,    0xab,0xd8,0xd8,0x73
1059         .byte   0x62,0x31,0x31,0x53,    0x2a,0x15,0x15,0x3f
1060         .byte   0x08,0x04,0x04,0x0c,    0x95,0xc7,0xc7,0x52
1061         .byte   0x46,0x23,0x23,0x65,    0x9d,0xc3,0xc3,0x5e
1062         .byte   0x30,0x18,0x18,0x28,    0x37,0x96,0x96,0xa1
1063         .byte   0x0a,0x05,0x05,0x0f,    0x2f,0x9a,0x9a,0xb5
1064         .byte   0x0e,0x07,0x07,0x09,    0x24,0x12,0x12,0x36
1065         .byte   0x1b,0x80,0x80,0x9b,    0xdf,0xe2,0xe2,0x3d
1066         .byte   0xcd,0xeb,0xeb,0x26,    0x4e,0x27,0x27,0x69
1067         .byte   0x7f,0xb2,0xb2,0xcd,    0xea,0x75,0x75,0x9f
1068         .byte   0x12,0x09,0x09,0x1b,    0x1d,0x83,0x83,0x9e
1069         .byte   0x58,0x2c,0x2c,0x74,    0x34,0x1a,0x1a,0x2e
1070         .byte   0x36,0x1b,0x1b,0x2d,    0xdc,0x6e,0x6e,0xb2
1071         .byte   0xb4,0x5a,0x5a,0xee,    0x5b,0xa0,0xa0,0xfb
1072         .byte   0xa4,0x52,0x52,0xf6,    0x76,0x3b,0x3b,0x4d
1073         .byte   0xb7,0xd6,0xd6,0x61,    0x7d,0xb3,0xb3,0xce
1074         .byte   0x52,0x29,0x29,0x7b,    0xdd,0xe3,0xe3,0x3e
1075         .byte   0x5e,0x2f,0x2f,0x71,    0x13,0x84,0x84,0x97
1076         .byte   0xa6,0x53,0x53,0xf5,    0xb9,0xd1,0xd1,0x68
1077         .byte   0x00,0x00,0x00,0x00,    0xc1,0xed,0xed,0x2c
1078         .byte   0x40,0x20,0x20,0x60,    0xe3,0xfc,0xfc,0x1f
1079         .byte   0x79,0xb1,0xb1,0xc8,    0xb6,0x5b,0x5b,0xed
1080         .byte   0xd4,0x6a,0x6a,0xbe,    0x8d,0xcb,0xcb,0x46
1081         .byte   0x67,0xbe,0xbe,0xd9,    0x72,0x39,0x39,0x4b
1082         .byte   0x94,0x4a,0x4a,0xde,    0x98,0x4c,0x4c,0xd4
1083         .byte   0xb0,0x58,0x58,0xe8,    0x85,0xcf,0xcf,0x4a
1084         .byte   0xbb,0xd0,0xd0,0x6b,    0xc5,0xef,0xef,0x2a
1085         .byte   0x4f,0xaa,0xaa,0xe5,    0xed,0xfb,0xfb,0x16
1086         .byte   0x86,0x43,0x43,0xc5,    0x9a,0x4d,0x4d,0xd7
1087         .byte   0x66,0x33,0x33,0x55,    0x11,0x85,0x85,0x94
1088         .byte   0x8a,0x45,0x45,0xcf,    0xe9,0xf9,0xf9,0x10
1089         .byte   0x04,0x02,0x02,0x06,    0xfe,0x7f,0x7f,0x81
1090         .byte   0xa0,0x50,0x50,0xf0,    0x78,0x3c,0x3c,0x44
1091         .byte   0x25,0x9f,0x9f,0xba,    0x4b,0xa8,0xa8,0xe3
1092         .byte   0xa2,0x51,0x51,0xf3,    0x5d,0xa3,0xa3,0xfe
1093         .byte   0x80,0x40,0x40,0xc0,    0x05,0x8f,0x8f,0x8a
1094         .byte   0x3f,0x92,0x92,0xad,    0x21,0x9d,0x9d,0xbc
1095         .byte   0x70,0x38,0x38,0x48,    0xf1,0xf5,0xf5,0x04
1096         .byte   0x63,0xbc,0xbc,0xdf,    0x77,0xb6,0xb6,0xc1
1097         .byte   0xaf,0xda,0xda,0x75,    0x42,0x21,0x21,0x63
1098         .byte   0x20,0x10,0x10,0x30,    0xe5,0xff,0xff,0x1a
1099         .byte   0xfd,0xf3,0xf3,0x0e,    0xbf,0xd2,0xd2,0x6d
1100         .byte   0x81,0xcd,0xcd,0x4c,    0x18,0x0c,0x0c,0x14
1101         .byte   0x26,0x13,0x13,0x35,    0xc3,0xec,0xec,0x2f
1102         .byte   0xbe,0x5f,0x5f,0xe1,    0x35,0x97,0x97,0xa2
1103         .byte   0x88,0x44,0x44,0xcc,    0x2e,0x17,0x17,0x39
1104         .byte   0x93,0xc4,0xc4,0x57,    0x55,0xa7,0xa7,0xf2
1105         .byte   0xfc,0x7e,0x7e,0x82,    0x7a,0x3d,0x3d,0x47
1106         .byte   0xc8,0x64,0x64,0xac,    0xba,0x5d,0x5d,0xe7
1107         .byte   0x32,0x19,0x19,0x2b,    0xe6,0x73,0x73,0x95
1108         .byte   0xc0,0x60,0x60,0xa0,    0x19,0x81,0x81,0x98
1109         .byte   0x9e,0x4f,0x4f,0xd1,    0xa3,0xdc,0xdc,0x7f
1110         .byte   0x44,0x22,0x22,0x66,    0x54,0x2a,0x2a,0x7e
1111         .byte   0x3b,0x90,0x90,0xab,    0x0b,0x88,0x88,0x83
1112         .byte   0x8c,0x46,0x46,0xca,    0xc7,0xee,0xee,0x29
1113         .byte   0x6b,0xb8,0xb8,0xd3,    0x28,0x14,0x14,0x3c
1114         .byte   0xa7,0xde,0xde,0x79,    0xbc,0x5e,0x5e,0xe2
1115         .byte   0x16,0x0b,0x0b,0x1d,    0xad,0xdb,0xdb,0x76
1116         .byte   0xdb,0xe0,0xe0,0x3b,    0x64,0x32,0x32,0x56
1117         .byte   0x74,0x3a,0x3a,0x4e,    0x14,0x0a,0x0a,0x1e
1118         .byte   0x92,0x49,0x49,0xdb,    0x0c,0x06,0x06,0x0a
1119         .byte   0x48,0x24,0x24,0x6c,    0xb8,0x5c,0x5c,0xe4
1120         .byte   0x9f,0xc2,0xc2,0x5d,    0xbd,0xd3,0xd3,0x6e
1121         .byte   0x43,0xac,0xac,0xef,    0xc4,0x62,0x62,0xa6
1122         .byte   0x39,0x91,0x91,0xa8,    0x31,0x95,0x95,0xa4
1123         .byte   0xd3,0xe4,0xe4,0x37,    0xf2,0x79,0x79,0x8b
1124         .byte   0xd5,0xe7,0xe7,0x32,    0x8b,0xc8,0xc8,0x43
1125         .byte   0x6e,0x37,0x37,0x59,    0xda,0x6d,0x6d,0xb7
1126         .byte   0x01,0x8d,0x8d,0x8c,    0xb1,0xd5,0xd5,0x64
1127         .byte   0x9c,0x4e,0x4e,0xd2,    0x49,0xa9,0xa9,0xe0
1128         .byte   0xd8,0x6c,0x6c,0xb4,    0xac,0x56,0x56,0xfa
1129         .byte   0xf3,0xf4,0xf4,0x07,    0xcf,0xea,0xea,0x25
1130         .byte   0xca,0x65,0x65,0xaf,    0xf4,0x7a,0x7a,0x8e
1131         .byte   0x47,0xae,0xae,0xe9,    0x10,0x08,0x08,0x18
1132         .byte   0x6f,0xba,0xba,0xd5,    0xf0,0x78,0x78,0x88
1133         .byte   0x4a,0x25,0x25,0x6f,    0x5c,0x2e,0x2e,0x72
1134         .byte   0x38,0x1c,0x1c,0x24,    0x57,0xa6,0xa6,0xf1
1135         .byte   0x73,0xb4,0xb4,0xc7,    0x97,0xc6,0xc6,0x51
1136         .byte   0xcb,0xe8,0xe8,0x23,    0xa1,0xdd,0xdd,0x7c
1137         .byte   0xe8,0x74,0x74,0x9c,    0x3e,0x1f,0x1f,0x21
1138         .byte   0x96,0x4b,0x4b,0xdd,    0x61,0xbd,0xbd,0xdc
1139         .byte   0x0d,0x8b,0x8b,0x86,    0x0f,0x8a,0x8a,0x85
1140         .byte   0xe0,0x70,0x70,0x90,    0x7c,0x3e,0x3e,0x42
1141         .byte   0x71,0xb5,0xb5,0xc4,    0xcc,0x66,0x66,0xaa
1142         .byte   0x90,0x48,0x48,0xd8,    0x06,0x03,0x03,0x05
1143         .byte   0xf7,0xf6,0xf6,0x01,    0x1c,0x0e,0x0e,0x12
1144         .byte   0xc2,0x61,0x61,0xa3,    0x6a,0x35,0x35,0x5f
1145         .byte   0xae,0x57,0x57,0xf9,    0x69,0xb9,0xb9,0xd0
1146         .byte   0x17,0x86,0x86,0x91,    0x99,0xc1,0xc1,0x58
1147         .byte   0x3a,0x1d,0x1d,0x27,    0x27,0x9e,0x9e,0xb9
1148         .byte   0xd9,0xe1,0xe1,0x38,    0xeb,0xf8,0xf8,0x13
1149         .byte   0x2b,0x98,0x98,0xb3,    0x22,0x11,0x11,0x33
1150         .byte   0xd2,0x69,0x69,0xbb,    0xa9,0xd9,0xd9,0x70
1151         .byte   0x07,0x8e,0x8e,0x89,    0x33,0x94,0x94,0xa7
1152         .byte   0x2d,0x9b,0x9b,0xb6,    0x3c,0x1e,0x1e,0x22
1153         .byte   0x15,0x87,0x87,0x92,    0xc9,0xe9,0xe9,0x20
1154         .byte   0x87,0xce,0xce,0x49,    0xaa,0x55,0x55,0xff
1155         .byte   0x50,0x28,0x28,0x78,    0xa5,0xdf,0xdf,0x7a
1156         .byte   0x03,0x8c,0x8c,0x8f,    0x59,0xa1,0xa1,0xf8
1157         .byte   0x09,0x89,0x89,0x80,    0x1a,0x0d,0x0d,0x17
1158         .byte   0x65,0xbf,0xbf,0xda,    0xd7,0xe6,0xe6,0x31
1159         .byte   0x84,0x42,0x42,0xc6,    0xd0,0x68,0x68,0xb8
1160         .byte   0x82,0x41,0x41,0xc3,    0x29,0x99,0x99,0xb0
1161         .byte   0x5a,0x2d,0x2d,0x77,    0x1e,0x0f,0x0f,0x11
1162         .byte   0x7b,0xb0,0xb0,0xcb,    0xa8,0x54,0x54,0xfc
1163         .byte   0x6d,0xbb,0xbb,0xd6,    0x2c,0x16,0x16,0x3a
1164 AES_Te4:
1165         .byte   0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
1166         .byte   0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
1167         .byte   0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
1168         .byte   0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
1169         .byte   0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
1170         .byte   0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
1171         .byte   0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
1172         .byte   0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
1173         .byte   0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
1174         .byte   0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
1175         .byte   0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
1176         .byte   0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
1177         .byte   0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
1178         .byte   0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
1179         .byte   0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
1180         .byte   0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
1181         .byte   0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
1182         .byte   0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
1183         .byte   0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
1184         .byte   0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
1185         .byte   0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
1186         .byte   0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
1187         .byte   0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
1188         .byte   0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
1189         .byte   0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
1190         .byte   0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
1191         .byte   0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
1192         .byte   0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
1193         .byte   0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
1194         .byte   0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
1195         .byte   0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
1196         .byte   0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
1197 rcon:
1198         .byte   0x01,0x00,0x00,0x00,    0x02,0x00,0x00,0x00
1199         .byte   0x04,0x00,0x00,0x00,    0x08,0x00,0x00,0x00
1200         .byte   0x10,0x00,0x00,0x00,    0x20,0x00,0x00,0x00
1201         .byte   0x40,0x00,0x00,0x00,    0x80,0x00,0x00,0x00
1202         .byte   0x1B,0x00,0x00,0x00,    0x36,0x00,0x00,0x00
1203         .align  128
1204 AES_Td:
1205         .byte   0x51,0xf4,0xa7,0x50,    0x7e,0x41,0x65,0x53
1206         .byte   0x1a,0x17,0xa4,0xc3,    0x3a,0x27,0x5e,0x96
1207         .byte   0x3b,0xab,0x6b,0xcb,    0x1f,0x9d,0x45,0xf1
1208         .byte   0xac,0xfa,0x58,0xab,    0x4b,0xe3,0x03,0x93
1209         .byte   0x20,0x30,0xfa,0x55,    0xad,0x76,0x6d,0xf6
1210         .byte   0x88,0xcc,0x76,0x91,    0xf5,0x02,0x4c,0x25
1211         .byte   0x4f,0xe5,0xd7,0xfc,    0xc5,0x2a,0xcb,0xd7
1212         .byte   0x26,0x35,0x44,0x80,    0xb5,0x62,0xa3,0x8f
1213         .byte   0xde,0xb1,0x5a,0x49,    0x25,0xba,0x1b,0x67
1214         .byte   0x45,0xea,0x0e,0x98,    0x5d,0xfe,0xc0,0xe1
1215         .byte   0xc3,0x2f,0x75,0x02,    0x81,0x4c,0xf0,0x12
1216         .byte   0x8d,0x46,0x97,0xa3,    0x6b,0xd3,0xf9,0xc6
1217         .byte   0x03,0x8f,0x5f,0xe7,    0x15,0x92,0x9c,0x95
1218         .byte   0xbf,0x6d,0x7a,0xeb,    0x95,0x52,0x59,0xda
1219         .byte   0xd4,0xbe,0x83,0x2d,    0x58,0x74,0x21,0xd3
1220         .byte   0x49,0xe0,0x69,0x29,    0x8e,0xc9,0xc8,0x44
1221         .byte   0x75,0xc2,0x89,0x6a,    0xf4,0x8e,0x79,0x78
1222         .byte   0x99,0x58,0x3e,0x6b,    0x27,0xb9,0x71,0xdd
1223         .byte   0xbe,0xe1,0x4f,0xb6,    0xf0,0x88,0xad,0x17
1224         .byte   0xc9,0x20,0xac,0x66,    0x7d,0xce,0x3a,0xb4
1225         .byte   0x63,0xdf,0x4a,0x18,    0xe5,0x1a,0x31,0x82
1226         .byte   0x97,0x51,0x33,0x60,    0x62,0x53,0x7f,0x45
1227         .byte   0xb1,0x64,0x77,0xe0,    0xbb,0x6b,0xae,0x84
1228         .byte   0xfe,0x81,0xa0,0x1c,    0xf9,0x08,0x2b,0x94
1229         .byte   0x70,0x48,0x68,0x58,    0x8f,0x45,0xfd,0x19
1230         .byte   0x94,0xde,0x6c,0x87,    0x52,0x7b,0xf8,0xb7
1231         .byte   0xab,0x73,0xd3,0x23,    0x72,0x4b,0x02,0xe2
1232         .byte   0xe3,0x1f,0x8f,0x57,    0x66,0x55,0xab,0x2a
1233         .byte   0xb2,0xeb,0x28,0x07,    0x2f,0xb5,0xc2,0x03
1234         .byte   0x86,0xc5,0x7b,0x9a,    0xd3,0x37,0x08,0xa5
1235         .byte   0x30,0x28,0x87,0xf2,    0x23,0xbf,0xa5,0xb2
1236         .byte   0x02,0x03,0x6a,0xba,    0xed,0x16,0x82,0x5c
1237         .byte   0x8a,0xcf,0x1c,0x2b,    0xa7,0x79,0xb4,0x92
1238         .byte   0xf3,0x07,0xf2,0xf0,    0x4e,0x69,0xe2,0xa1
1239         .byte   0x65,0xda,0xf4,0xcd,    0x06,0x05,0xbe,0xd5
1240         .byte   0xd1,0x34,0x62,0x1f,    0xc4,0xa6,0xfe,0x8a
1241         .byte   0x34,0x2e,0x53,0x9d,    0xa2,0xf3,0x55,0xa0
1242         .byte   0x05,0x8a,0xe1,0x32,    0xa4,0xf6,0xeb,0x75
1243         .byte   0x0b,0x83,0xec,0x39,    0x40,0x60,0xef,0xaa
1244         .byte   0x5e,0x71,0x9f,0x06,    0xbd,0x6e,0x10,0x51
1245         .byte   0x3e,0x21,0x8a,0xf9,    0x96,0xdd,0x06,0x3d
1246         .byte   0xdd,0x3e,0x05,0xae,    0x4d,0xe6,0xbd,0x46
1247         .byte   0x91,0x54,0x8d,0xb5,    0x71,0xc4,0x5d,0x05
1248         .byte   0x04,0x06,0xd4,0x6f,    0x60,0x50,0x15,0xff
1249         .byte   0x19,0x98,0xfb,0x24,    0xd6,0xbd,0xe9,0x97
1250         .byte   0x89,0x40,0x43,0xcc,    0x67,0xd9,0x9e,0x77
1251         .byte   0xb0,0xe8,0x42,0xbd,    0x07,0x89,0x8b,0x88
1252         .byte   0xe7,0x19,0x5b,0x38,    0x79,0xc8,0xee,0xdb
1253         .byte   0xa1,0x7c,0x0a,0x47,    0x7c,0x42,0x0f,0xe9
1254         .byte   0xf8,0x84,0x1e,0xc9,    0x00,0x00,0x00,0x00
1255         .byte   0x09,0x80,0x86,0x83,    0x32,0x2b,0xed,0x48
1256         .byte   0x1e,0x11,0x70,0xac,    0x6c,0x5a,0x72,0x4e
1257         .byte   0xfd,0x0e,0xff,0xfb,    0x0f,0x85,0x38,0x56
1258         .byte   0x3d,0xae,0xd5,0x1e,    0x36,0x2d,0x39,0x27
1259         .byte   0x0a,0x0f,0xd9,0x64,    0x68,0x5c,0xa6,0x21
1260         .byte   0x9b,0x5b,0x54,0xd1,    0x24,0x36,0x2e,0x3a
1261         .byte   0x0c,0x0a,0x67,0xb1,    0x93,0x57,0xe7,0x0f
1262         .byte   0xb4,0xee,0x96,0xd2,    0x1b,0x9b,0x91,0x9e
1263         .byte   0x80,0xc0,0xc5,0x4f,    0x61,0xdc,0x20,0xa2
1264         .byte   0x5a,0x77,0x4b,0x69,    0x1c,0x12,0x1a,0x16
1265         .byte   0xe2,0x93,0xba,0x0a,    0xc0,0xa0,0x2a,0xe5
1266         .byte   0x3c,0x22,0xe0,0x43,    0x12,0x1b,0x17,0x1d
1267         .byte   0x0e,0x09,0x0d,0x0b,    0xf2,0x8b,0xc7,0xad
1268         .byte   0x2d,0xb6,0xa8,0xb9,    0x14,0x1e,0xa9,0xc8
1269         .byte   0x57,0xf1,0x19,0x85,    0xaf,0x75,0x07,0x4c
1270         .byte   0xee,0x99,0xdd,0xbb,    0xa3,0x7f,0x60,0xfd
1271         .byte   0xf7,0x01,0x26,0x9f,    0x5c,0x72,0xf5,0xbc
1272         .byte   0x44,0x66,0x3b,0xc5,    0x5b,0xfb,0x7e,0x34
1273         .byte   0x8b,0x43,0x29,0x76,    0xcb,0x23,0xc6,0xdc
1274         .byte   0xb6,0xed,0xfc,0x68,    0xb8,0xe4,0xf1,0x63
1275         .byte   0xd7,0x31,0xdc,0xca,    0x42,0x63,0x85,0x10
1276         .byte   0x13,0x97,0x22,0x40,    0x84,0xc6,0x11,0x20
1277         .byte   0x85,0x4a,0x24,0x7d,    0xd2,0xbb,0x3d,0xf8
1278         .byte   0xae,0xf9,0x32,0x11,    0xc7,0x29,0xa1,0x6d
1279         .byte   0x1d,0x9e,0x2f,0x4b,    0xdc,0xb2,0x30,0xf3
1280         .byte   0x0d,0x86,0x52,0xec,    0x77,0xc1,0xe3,0xd0
1281         .byte   0x2b,0xb3,0x16,0x6c,    0xa9,0x70,0xb9,0x99
1282         .byte   0x11,0x94,0x48,0xfa,    0x47,0xe9,0x64,0x22
1283         .byte   0xa8,0xfc,0x8c,0xc4,    0xa0,0xf0,0x3f,0x1a
1284         .byte   0x56,0x7d,0x2c,0xd8,    0x22,0x33,0x90,0xef
1285         .byte   0x87,0x49,0x4e,0xc7,    0xd9,0x38,0xd1,0xc1
1286         .byte   0x8c,0xca,0xa2,0xfe,    0x98,0xd4,0x0b,0x36
1287         .byte   0xa6,0xf5,0x81,0xcf,    0xa5,0x7a,0xde,0x28
1288         .byte   0xda,0xb7,0x8e,0x26,    0x3f,0xad,0xbf,0xa4
1289         .byte   0x2c,0x3a,0x9d,0xe4,    0x50,0x78,0x92,0x0d
1290         .byte   0x6a,0x5f,0xcc,0x9b,    0x54,0x7e,0x46,0x62
1291         .byte   0xf6,0x8d,0x13,0xc2,    0x90,0xd8,0xb8,0xe8
1292         .byte   0x2e,0x39,0xf7,0x5e,    0x82,0xc3,0xaf,0xf5
1293         .byte   0x9f,0x5d,0x80,0xbe,    0x69,0xd0,0x93,0x7c
1294         .byte   0x6f,0xd5,0x2d,0xa9,    0xcf,0x25,0x12,0xb3
1295         .byte   0xc8,0xac,0x99,0x3b,    0x10,0x18,0x7d,0xa7
1296         .byte   0xe8,0x9c,0x63,0x6e,    0xdb,0x3b,0xbb,0x7b
1297         .byte   0xcd,0x26,0x78,0x09,    0x6e,0x59,0x18,0xf4
1298         .byte   0xec,0x9a,0xb7,0x01,    0x83,0x4f,0x9a,0xa8
1299         .byte   0xe6,0x95,0x6e,0x65,    0xaa,0xff,0xe6,0x7e
1300         .byte   0x21,0xbc,0xcf,0x08,    0xef,0x15,0xe8,0xe6
1301         .byte   0xba,0xe7,0x9b,0xd9,    0x4a,0x6f,0x36,0xce
1302         .byte   0xea,0x9f,0x09,0xd4,    0x29,0xb0,0x7c,0xd6
1303         .byte   0x31,0xa4,0xb2,0xaf,    0x2a,0x3f,0x23,0x31
1304         .byte   0xc6,0xa5,0x94,0x30,    0x35,0xa2,0x66,0xc0
1305         .byte   0x74,0x4e,0xbc,0x37,    0xfc,0x82,0xca,0xa6
1306         .byte   0xe0,0x90,0xd0,0xb0,    0x33,0xa7,0xd8,0x15
1307         .byte   0xf1,0x04,0x98,0x4a,    0x41,0xec,0xda,0xf7
1308         .byte   0x7f,0xcd,0x50,0x0e,    0x17,0x91,0xf6,0x2f
1309         .byte   0x76,0x4d,0xd6,0x8d,    0x43,0xef,0xb0,0x4d
1310         .byte   0xcc,0xaa,0x4d,0x54,    0xe4,0x96,0x04,0xdf
1311         .byte   0x9e,0xd1,0xb5,0xe3,    0x4c,0x6a,0x88,0x1b
1312         .byte   0xc1,0x2c,0x1f,0xb8,    0x46,0x65,0x51,0x7f
1313         .byte   0x9d,0x5e,0xea,0x04,    0x01,0x8c,0x35,0x5d
1314         .byte   0xfa,0x87,0x74,0x73,    0xfb,0x0b,0x41,0x2e
1315         .byte   0xb3,0x67,0x1d,0x5a,    0x92,0xdb,0xd2,0x52
1316         .byte   0xe9,0x10,0x56,0x33,    0x6d,0xd6,0x47,0x13
1317         .byte   0x9a,0xd7,0x61,0x8c,    0x37,0xa1,0x0c,0x7a
1318         .byte   0x59,0xf8,0x14,0x8e,    0xeb,0x13,0x3c,0x89
1319         .byte   0xce,0xa9,0x27,0xee,    0xb7,0x61,0xc9,0x35
1320         .byte   0xe1,0x1c,0xe5,0xed,    0x7a,0x47,0xb1,0x3c
1321         .byte   0x9c,0xd2,0xdf,0x59,    0x55,0xf2,0x73,0x3f
1322         .byte   0x18,0x14,0xce,0x79,    0x73,0xc7,0x37,0xbf
1323         .byte   0x53,0xf7,0xcd,0xea,    0x5f,0xfd,0xaa,0x5b
1324         .byte   0xdf,0x3d,0x6f,0x14,    0x78,0x44,0xdb,0x86
1325         .byte   0xca,0xaf,0xf3,0x81,    0xb9,0x68,0xc4,0x3e
1326         .byte   0x38,0x24,0x34,0x2c,    0xc2,0xa3,0x40,0x5f
1327         .byte   0x16,0x1d,0xc3,0x72,    0xbc,0xe2,0x25,0x0c
1328         .byte   0x28,0x3c,0x49,0x8b,    0xff,0x0d,0x95,0x41
1329         .byte   0x39,0xa8,0x01,0x71,    0x08,0x0c,0xb3,0xde
1330         .byte   0xd8,0xb4,0xe4,0x9c,    0x64,0x56,0xc1,0x90
1331         .byte   0x7b,0xcb,0x84,0x61,    0xd5,0x32,0xb6,0x70
1332         .byte   0x48,0x6c,0x5c,0x74,    0xd0,0xb8,0x57,0x42
1333 AES_Td4:
1334         .byte   0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
1335         .byte   0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
1336         .byte   0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
1337         .byte   0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
1338         .byte   0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
1339         .byte   0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
1340         .byte   0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
1341         .byte   0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
1342         .byte   0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
1343         .byte   0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
1344         .byte   0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
1345         .byte   0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
1346         .byte   0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
1347         .byte   0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
1348         .byte   0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
1349         .byte   0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
1350         .byte   0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
1351         .byte   0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
1352         .byte   0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
1353         .byte   0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
1354         .byte   0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
1355         .byte   0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
1356         .byte   0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
1357         .byte   0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
1358         .byte   0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
1359         .byte   0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
1360         .byte   0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
1361         .byte   0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
1362         .byte   0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
1363         .byte   0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
1364         .byte   0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
1365         .byte   0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
1366         .cstring "AES for C64x+, CRYPTOGAMS by <appro\@openssl.org>"
1367         .align  4
1368 ___
1369
1370 print $code;
1371 close STDOUT;