C64x+ assembler pack. linux-c64xplus build is *not* tested nor can it be
[openssl.git] / crypto / aes / asm / aes-c64xplus.pl
1 #!/usr/bin/env perl
2 #
3 # ====================================================================
4 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9 #
10 # [Endian-neutral] AES for C64x+.
11 #
12 # Even though SPLOOPs are scheduled for 13 cycles, and thus expected
13 # performance is ~8.5 cycles per byte processed with 128-bit key,
14 # measured performance turned to be ~10 cycles per byte. Discrepancy
15 # must be caused by limitations of L1D memory banking(*), see SPRU871
16 # TI publication for further details. If any consolation it's still
17 # ~20% faster than TI's linear assembly module anyway... Compared to
18 # aes_core.c compiled with cl6x 6.0 with -mv6400+ -o2 options this
19 # code is 3.75x faster and almost 3x smaller (tables included).
20 #
21 # (*)   This means that there might be subtle correlation between data
22 #       and timing and one can wonder if it can be ... attacked:-(
23 #       On the other hand this also means that *if* one chooses to
24 #       implement *4* T-tables variant [instead of 1 T-table as in
25 #       this implementation, or in addition to], then one ought to
26 #       *interleave* them. Even though it complicates addressing,
27 #       references to interleaved tables would be guaranteed not to
28 #       clash. I reckon that it should be possible to break 8 cycles
29 #       per byte "barrier," i.e. improve by ~20%, naturally at the
30 #       cost of 8x increased pressure on L1D. 8x because you'd have
31 #       to interleave both Te and Td tables...
32
33 while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
34 open STDOUT,">$output";
35
36 ($TEA,$TEB)=("A5","B5");
37 ($KPA,$KPB)=("A3","B1");
38 @K=("A6","B6","A7","B7");
39 @s=("A8","B8","A9","B9");
40 @Te0=@Td0=("A16","B16","A17","B17");
41 @Te1=@Td1=("A18","B18","A19","B19");
42 @Te2=@Td2=("A20","B20","A21","B21");
43 @Te3=@Td3=("A22","B22","A23","B23");
44
45 $code=<<___;
46         .text
47         .if     __TI_EABI__
48         .nocmp
49         .endif
50
51         .asg    B3,RA
52         .asg    A4,INP
53         .asg    B4,OUT
54         .asg    A6,KEY
55         .asg    A4,RET
56         .asg    B15,SP
57
58         .eval   24,EXT0
59         .eval   16,EXT1
60         .eval   8,EXT2
61         .eval   0,EXT3
62         .eval   8,TBL1
63         .eval   16,TBL2
64         .eval   24,TBL3
65
66         .if     .BIG_ENDIAN
67         .eval   24-EXT0,EXT0
68         .eval   24-EXT1,EXT1
69         .eval   24-EXT2,EXT2
70         .eval   24-EXT3,EXT3
71         .eval   32-TBL1,TBL1
72         .eval   32-TBL2,TBL2
73         .eval   32-TBL3,TBL3
74         .endif
75
76         .global _AES_encrypt
77 _AES_encrypt:
78         .asmfunc
79         MVK     1,B2
80 __encrypt:
81         .if     __TI_EABI__
82    [B2] LDNDW   *INP++,A9:A8                    ; load input
83 ||      MVKL    \$PCR_OFFSET(AES_Te,_AES_encrypt),$TEA
84 ||      ADDKPC  _AES_encrypt,B0
85    [B2] LDNDW   *INP++,B9:B8
86 ||      MVKH    \$PCR_OFFSET(AES_Te,_AES_encrypt),$TEA
87 ||      ADD     0,KEY,$KPA
88 ||      ADD     4,KEY,$KPB
89         .else
90    [B2] LDNDW   *INP++,A9:A8                    ; load input
91 ||      MVKL    (AES_Te-_AES_encrypt),$TEA
92 ||      ADDKPC  _AES_encrypt,B0
93    [B2] LDNDW   *INP++,B9:B8
94 ||      MVKH    (AES_Te-_AES_encrypt),$TEA
95 ||      ADD     0,KEY,$KPA
96 ||      ADD     4,KEY,$KPB
97         .endif
98         LDW     *$KPA++[2],$Te0[0]              ; zero round key
99 ||      LDW     *$KPB++[2],$Te0[1]
100 ||      MVK     60,A0
101 ||      ADD     B0,$TEA,$TEA                    ; AES_Te
102         LDW     *KEY[A0],B0                     ; rounds
103 ||      MVK     1024,A0                         ; sizeof(AES_Te)
104         LDW     *$KPA++[2],$Te0[2]
105 ||      LDW     *$KPB++[2],$Te0[3]
106 ||      MV      $TEA,$TEB
107         NOP
108         .if     .BIG_ENDIAN
109         MV      A9,$s[0]
110 ||      MV      A8,$s[1]
111 ||      MV      B9,$s[2]
112 ||      MV      B8,$s[3]
113         .else
114         MV      A8,$s[0]
115 ||      MV      A9,$s[1]
116 ||      MV      B8,$s[2]
117 ||      MV      B9,$s[3]
118         .endif
119         XOR     $Te0[0],$s[0],$s[0]
120 ||      XOR     $Te0[1],$s[1],$s[1]
121 ||      LDW     *$KPA++[2],$K[0]                ; 1st round key
122 ||      LDW     *$KPB++[2],$K[1]
123         SUB     B0,2,B0
124
125         SPLOOPD 13
126 ||      MVC     B0,ILC
127 ||      LDW     *$KPA++[2],$K[2]
128 ||      LDW     *$KPB++[2],$K[3]
129 ;;====================================================================
130         EXTU    $s[1],EXT1,24,$Te1[1]
131 ||      EXTU    $s[0],EXT3,24,$Te3[0]
132         LDW     *${TEB}[$Te1[1]],$Te1[1]        ; Te1[s1>>8],   t0
133 ||      LDW     *${TEA}[$Te3[0]],$Te3[0]        ; Te3[s0>>24],  t1
134 ||      XOR     $s[2],$Te0[2],$s[2]             ; modulo-scheduled
135 ||      XOR     $s[3],$Te0[3],$s[3]             ; modulo-scheduled
136 ||      EXTU    $s[1],EXT3,24,$Te3[1]
137 ||      EXTU    $s[0],EXT1,24,$Te1[0]
138         LDW     *${TEB}[$Te3[1]],$Te3[1]        ; Te3[s1>>24],  t2
139 ||      LDW     *${TEA}[$Te1[0]],$Te1[0]        ; Te1[s0>>8],   t3
140 ||      EXTU    $s[2],EXT2,24,$Te2[2]
141 ||      EXTU    $s[3],EXT2,24,$Te2[3]
142         LDW     *${TEA}[$Te2[2]],$Te2[2]        ; Te2[s2>>16],  t0
143 ||      LDW     *${TEB}[$Te2[3]],$Te2[3]        ; Te2[s3>>16],  t1
144 ||      EXTU    $s[3],EXT3,24,$Te3[3]
145 ||      EXTU    $s[2],EXT1,24,$Te1[2]
146         LDW     *${TEB}[$Te3[3]],$Te3[3]        ; Te3[s3>>24],  t0
147 ||      LDW     *${TEA}[$Te1[2]],$Te1[2]        ; Te1[s2>>8],   t1
148 ||      EXTU    $s[0],EXT2,24,$Te2[0]
149 ||      EXTU    $s[1],EXT2,24,$Te2[1]
150         LDW     *${TEA}[$Te2[0]],$Te2[0]        ; Te2[s0>>16],  t2
151 ||      LDW     *${TEB}[$Te2[1]],$Te2[1]        ; Te2[s1>>16],  t3
152 ||      EXTU    $s[3],EXT1,24,$Te1[3]
153 ||      EXTU    $s[2],EXT3,24,$Te3[2]
154         LDW     *${TEB}[$Te1[3]],$Te1[3]        ; Te1[s3>>8],   t2
155 ||      LDW     *${TEA}[$Te3[2]],$Te3[2]        ; Te3[s2>>24],  t3
156 ||      ROTL    $Te1[1],TBL1,$Te3[0]            ; t0
157 ||      ROTL    $Te3[0],TBL3,$Te1[1]            ; t1
158 ||      EXTU    $s[0],EXT0,24,$Te0[0]
159 ||      EXTU    $s[1],EXT0,24,$Te0[1]
160         LDW     *${TEA}[$Te0[0]],$Te0[0]        ; Te0[s0],      t0
161 ||      LDW     *${TEB}[$Te0[1]],$Te0[1]        ; Te0[s1],      t1
162 ||      ROTL    $Te3[1],TBL3,$Te1[0]            ; t2
163 ||      ROTL    $Te1[0],TBL1,$Te3[1]            ; t3
164 ||      EXTU    $s[2],EXT0,24,$Te0[2]
165 ||      EXTU    $s[3],EXT0,24,$Te0[3]
166         LDW     *${TEA}[$Te0[2]],$Te0[2]        ; Te0[s2],      t2
167 ||      LDW     *${TEB}[$Te0[3]],$Te0[3]        ; Te0[s3],      t3
168 ||      ROTL    $Te2[2],TBL2,$Te2[2]            ; t0
169 ||      ROTL    $Te2[3],TBL2,$Te2[3]            ; t1
170 ||      XOR     $K[0],$Te3[0],$s[0]
171 ||      XOR     $K[1],$Te1[1],$s[1]
172         ROTL    $Te3[3],TBL3,$Te1[2]            ; t0
173 ||      ROTL    $Te1[2],TBL1,$Te3[3]            ; t1
174 ||      XOR     $K[2],$Te1[0],$s[2]
175 ||      XOR     $K[3],$Te3[1],$s[3]
176 ||      LDW     *$KPA++[2],$K[0]                ; next round key
177 ||      LDW     *$KPB++[2],$K[1]
178         ROTL    $Te2[0],TBL2,$Te2[0]            ; t2
179 ||      ROTL    $Te2[1],TBL2,$Te2[1]            ; t3
180 ||      XOR     $s[0],$Te2[2],$s[0]
181 ||      XOR     $s[1],$Te2[3],$s[1]
182 ||      LDW     *$KPA++[2],$K[2]
183 ||      LDW     *$KPB++[2],$K[3]
184         ROTL    $Te1[3],TBL1,$Te3[2]            ; t2
185 ||      ROTL    $Te3[2],TBL3,$Te1[3]            ; t3
186 ||      XOR     $s[0],$Te1[2],$s[0]
187 ||      XOR     $s[1],$Te3[3],$s[1]
188         XOR     $s[2],$Te2[0],$s[2]
189 ||      XOR     $s[3],$Te2[1],$s[3]
190 ||      XOR     $s[0],$Te0[0],$s[0]
191 ||      XOR     $s[1],$Te0[1],$s[1]
192         SPKERNEL
193 ||      XOR.L   $s[2],$Te3[2],$s[2]
194 ||      XOR.L   $s[3],$Te1[3],$s[3]
195 ;;====================================================================
196         ADD.D   ${TEA},A0,${TEA}                ; point to Te4
197 ||      ADD.D   ${TEB},A0,${TEB}
198 ||      EXTU    $s[1],EXT1,24,$Te1[1]
199 ||      EXTU    $s[0],EXT3,24,$Te3[0]
200         LDBU    *${TEB}[$Te1[1]],$Te1[1]        ; Te1[s1>>8],   t0
201 ||      LDBU    *${TEA}[$Te3[0]],$Te3[0]        ; Te3[s0>>24],  t1
202 ||      XOR     $s[2],$Te0[2],$s[2]             ; modulo-scheduled
203 ||      XOR     $s[3],$Te0[3],$s[3]             ; modulo-scheduled
204 ||      EXTU    $s[0],EXT0,24,$Te0[0]
205 ||      EXTU    $s[1],EXT0,24,$Te0[1]
206         LDBU    *${TEA}[$Te0[0]],$Te0[0]        ; Te0[s0],      t0
207 ||      LDBU    *${TEB}[$Te0[1]],$Te0[1]        ; Te0[s1],      t1
208 ||      EXTU    $s[3],EXT3,24,$Te3[3]
209 ||      EXTU    $s[2],EXT1,24,$Te1[2]
210         LDBU    *${TEB}[$Te3[3]],$Te3[3]        ; Te3[s3>>24],  t0
211 ||      LDBU    *${TEA}[$Te1[2]],$Te1[2]        ; Te1[s2>>8],   t1
212 ||      EXTU    $s[2],EXT2,24,$Te2[2]
213 ||      EXTU    $s[3],EXT2,24,$Te2[3]
214         LDBU    *${TEA}[$Te2[2]],$Te2[2]        ; Te2[s2>>16],  t0
215 ||      LDBU    *${TEB}[$Te2[3]],$Te2[3]        ; Te2[s3>>16],  t1
216 ||      EXTU    $s[1],EXT3,24,$Te3[1]
217 ||      EXTU    $s[0],EXT1,24,$Te1[0]
218         LDBU    *${TEB}[$Te3[1]],$Te3[1]        ; Te3[s1>>24],  t2
219 ||      LDBU    *${TEA}[$Te1[0]],$Te1[0]        ; Te1[s0>>8],   t3
220 ||      EXTU    $s[3],EXT1,24,$Te1[3]
221 ||      EXTU    $s[2],EXT3,24,$Te3[2]
222         LDBU    *${TEB}[$Te1[3]],$Te1[3]        ; Te1[s3>>8],   t2
223 ||      LDBU    *${TEA}[$Te3[2]],$Te3[2]        ; Te3[s2>>24],  t3
224 ||      EXTU    $s[2],EXT0,24,$Te0[2]
225 ||      EXTU    $s[3],EXT0,24,$Te0[3]
226         LDBU    *${TEA}[$Te0[2]],$Te0[2]        ; Te0[s2],      t2
227 ||      LDBU    *${TEB}[$Te0[3]],$Te0[3]        ; Te0[s3],      t3
228 ||      EXTU    $s[0],EXT2,24,$Te2[0]
229 ||      EXTU    $s[1],EXT2,24,$Te2[1]
230         LDBU    *${TEA}[$Te2[0]],$Te2[0]        ; Te2[s0>>16],  t2
231 ||      LDBU    *${TEB}[$Te2[1]],$Te2[1]        ; Te2[s1>>16],  t3
232
233         .if     .BIG_ENDIAN
234         PACK2   $Te0[0],$Te1[1],$Te0[0]
235 ||      PACK2   $Te0[1],$Te1[2],$Te0[1]
236         PACK2   $Te2[2],$Te3[3],$Te2[2]
237 ||      PACK2   $Te2[3],$Te3[0],$Te2[3]
238         PACKL4  $Te0[0],$Te2[2],$Te0[0]
239 ||      PACKL4  $Te0[1],$Te2[3],$Te0[1]
240         XOR     $K[0],$Te0[0],$Te0[0]           ; s[0]
241 ||      XOR     $K[1],$Te0[1],$Te0[1]           ; s[1]
242
243         PACK2   $Te0[2],$Te1[3],$Te0[2]
244 ||      PACK2   $Te0[3],$Te1[0],$Te0[3]
245         PACK2   $Te2[0],$Te3[1],$Te2[0]
246 ||      PACK2   $Te2[1],$Te3[2],$Te2[1]
247 ||      BNOP    RA
248         PACKL4  $Te0[2],$Te2[0],$Te0[2]
249 ||      PACKL4  $Te0[3],$Te2[1],$Te0[3]
250         XOR     $K[2],$Te0[2],$Te0[2]           ; s[2]
251 ||      XOR     $K[3],$Te0[3],$Te0[3]           ; s[3]
252
253         MV      $Te0[0],A9
254 ||      MV      $Te0[1],A8
255         MV      $Te0[2],B9
256 ||      MV      $Te0[3],B8
257 || [B2] STNDW   A9:A8,*OUT++
258    [B2] STNDW   B9:B8,*OUT++
259         .else
260         PACK2   $Te1[1],$Te0[0],$Te1[1]
261 ||      PACK2   $Te1[2],$Te0[1],$Te1[2]
262         PACK2   $Te3[3],$Te2[2],$Te3[3]
263 ||      PACK2   $Te3[0],$Te2[3],$Te3[0]
264         PACKL4  $Te3[3],$Te1[1],$Te1[1]
265 ||      PACKL4  $Te3[0],$Te1[2],$Te1[2]
266         XOR     $K[0],$Te1[1],$Te1[1]           ; s[0]
267 ||      XOR     $K[1],$Te1[2],$Te1[2]           ; s[1]
268
269         PACK2   $Te1[3],$Te0[2],$Te1[3]
270 ||      PACK2   $Te1[0],$Te0[3],$Te1[0]
271         PACK2   $Te3[1],$Te2[0],$Te3[1]
272 ||      PACK2   $Te3[2],$Te2[1],$Te3[2]
273 ||      BNOP    RA
274         PACKL4  $Te3[1],$Te1[3],$Te1[3]
275 ||      PACKL4  $Te3[2],$Te1[0],$Te1[0]
276         XOR     $K[2],$Te1[3],$Te1[3]           ; s[2]
277 ||      XOR     $K[3],$Te1[0],$Te1[0]           ; s[3]
278
279         MV      $Te1[1],A8
280 ||      MV      $Te1[2],A9
281         MV      $Te1[3],B8
282 ||      MV      $Te1[0],B9
283 || [B2] STNDW   A9:A8,*OUT++
284    [B2] STNDW   B9:B8,*OUT++
285         .endif
286         .endasmfunc
287
288         .global _AES_decrypt
289 _AES_decrypt:
290         .asmfunc
291         MVK     1,B2
292 __decrypt:
293         .if     __TI_EABI__
294    [B2] LDNDW   *INP++,A9:A8                    ; load input
295 ||      MVKL    \$PCR_OFFSET(AES_Td,_AES_decrypt),$TEA
296 ||      ADDKPC  _AES_decrypt,B0
297    [B2] LDNDW   *INP++,B9:B8
298 ||      MVKH    \$PCR_OFFSET(AES_Td,_AES_decrypt),$TEA
299 ||      ADD     0,KEY,$KPA
300 ||      ADD     4,KEY,$KPB
301         .else
302    [B2] LDNDW   *INP++,A9:A8                    ; load input
303 ||      MVKL    (AES_Td-_AES_decrypt),$TEA
304 ||      ADDKPC  _AES_decrypt,B0
305    [B2] LDNDW   *INP++,B9:B8
306 ||      MVKH    (AES_Td-_AES_decrypt),$TEA
307 ||      ADD     0,KEY,$KPA
308 ||      ADD     4,KEY,$KPB
309         .endif
310         LDW     *$KPA++[2],$Td0[0]              ; zero round key
311 ||      LDW     *$KPB++[2],$Td0[1]
312 ||      MVK     60,A0
313 ||      ADD     B0,$TEA,$TEA                    ; AES_Td
314         LDW     *KEY[A0],B0                     ; rounds
315 ||      MVK     1024,A0                         ; sizeof(AES_Td)
316         LDW     *$KPA++[2],$Td0[2]
317 ||      LDW     *$KPB++[2],$Td0[3]
318 ||      MV      $TEA,$TEB
319         NOP
320         .if     .BIG_ENDIAN
321         MV      A9,$s[0]
322 ||      MV      A8,$s[1]
323 ||      MV      B9,$s[2]
324 ||      MV      B8,$s[3]
325         .else
326         MV      A8,$s[0]
327 ||      MV      A9,$s[1]
328 ||      MV      B8,$s[2]
329 ||      MV      B9,$s[3]
330         .endif
331         XOR     $Td0[0],$s[0],$s[0]
332 ||      XOR     $Td0[1],$s[1],$s[1]
333 ||      LDW     *$KPA++[2],$K[0]                ; 1st round key
334 ||      LDW     *$KPB++[2],$K[1]
335         SUB     B0,2,B0
336
337         SPLOOPD 13
338 ||      MVC     B0,ILC
339 ||      LDW     *$KPA++[2],$K[2]
340 ||      LDW     *$KPB++[2],$K[3]
341 ;;====================================================================
342         EXTU    $s[1],EXT3,24,$Td3[1]
343 ||      EXTU    $s[0],EXT1,24,$Td1[0]
344         LDW     *${TEB}[$Td3[1]],$Td3[1]        ; Td3[s1>>24],  t0
345 ||      LDW     *${TEA}[$Td1[0]],$Td1[0]        ; Td1[s0>>8],   t1
346 ||      XOR     $s[2],$Td0[2],$s[2]             ; modulo-scheduled
347 ||      XOR     $s[3],$Td0[3],$s[3]             ; modulo-scheduled
348 ||      EXTU    $s[1],EXT1,24,$Td1[1]
349 ||      EXTU    $s[0],EXT3,24,$Td3[0]
350         LDW     *${TEB}[$Td1[1]],$Td1[1]        ; Td1[s1>>8],   t2
351 ||      LDW     *${TEA}[$Td3[0]],$Td3[0]        ; Td3[s0>>24],  t3
352 ||      EXTU    $s[2],EXT2,24,$Td2[2]
353 ||      EXTU    $s[3],EXT2,24,$Td2[3]
354         LDW     *${TEA}[$Td2[2]],$Td2[2]        ; Td2[s2>>16],  t0
355 ||      LDW     *${TEB}[$Td2[3]],$Td2[3]        ; Td2[s3>>16],  t1
356 ||      EXTU    $s[3],EXT1,24,$Td1[3]
357 ||      EXTU    $s[2],EXT3,24,$Td3[2]
358         LDW     *${TEB}[$Td1[3]],$Td1[3]        ; Td1[s3>>8],   t0
359 ||      LDW     *${TEA}[$Td3[2]],$Td3[2]        ; Td3[s2>>24],  t1
360 ||      EXTU    $s[0],EXT2,24,$Td2[0]
361 ||      EXTU    $s[1],EXT2,24,$Td2[1]
362         LDW     *${TEA}[$Td2[0]],$Td2[0]        ; Td2[s0>>16],  t2
363 ||      LDW     *${TEB}[$Td2[1]],$Td2[1]        ; Td2[s1>>16],  t3
364 ||      EXTU    $s[3],EXT3,24,$Td3[3]
365 ||      EXTU    $s[2],EXT1,24,$Td1[2]
366         LDW     *${TEB}[$Td3[3]],$Td3[3]        ; Td3[s3>>24],  t2
367 ||      LDW     *${TEA}[$Td1[2]],$Td1[2]        ; Td1[s2>>8],   t3
368 ||      ROTL    $Td3[1],TBL3,$Td1[0]            ; t0
369 ||      ROTL    $Td1[0],TBL1,$Td3[1]            ; t1
370 ||      EXTU    $s[0],EXT0,24,$Td0[0]
371 ||      EXTU    $s[1],EXT0,24,$Td0[1]
372         LDW     *${TEA}[$Td0[0]],$Td0[0]        ; Td0[s0],      t0
373 ||      LDW     *${TEB}[$Td0[1]],$Td0[1]        ; Td0[s1],      t1
374 ||      ROTL    $Td1[1],TBL1,$Td3[0]            ; t2
375 ||      ROTL    $Td3[0],TBL3,$Td1[1]            ; t3
376 ||      EXTU    $s[2],EXT0,24,$Td0[2]
377 ||      EXTU    $s[3],EXT0,24,$Td0[3]
378         LDW     *${TEA}[$Td0[2]],$Td0[2]        ; Td0[s2],      t2
379 ||      LDW     *${TEB}[$Td0[3]],$Td0[3]        ; Td0[s3],      t3
380 ||      ROTL    $Td2[2],TBL2,$Td2[2]            ; t0
381 ||      ROTL    $Td2[3],TBL2,$Td2[3]            ; t1
382 ||      XOR     $K[0],$Td1[0],$s[0]
383 ||      XOR     $K[1],$Td3[1],$s[1]
384         ROTL    $Td1[3],TBL1,$Td3[2]            ; t0
385 ||      ROTL    $Td3[2],TBL3,$Td1[3]            ; t1
386 ||      XOR     $K[2],$Td3[0],$s[2]
387 ||      XOR     $K[3],$Td1[1],$s[3]
388 ||      LDW     *$KPA++[2],$K[0]                ; next round key
389 ||      LDW     *$KPB++[2],$K[1]
390         ROTL    $Td2[0],TBL2,$Td2[0]            ; t2
391 ||      ROTL    $Td2[1],TBL2,$Td2[1]            ; t3
392 ||      XOR     $s[0],$Td2[2],$s[0]
393 ||      XOR     $s[1],$Td2[3],$s[1]
394 ||      LDW     *$KPA++[2],$K[2]
395 ||      LDW     *$KPB++[2],$K[3]
396         ROTL    $Td3[3],TBL3,$Td1[2]            ; t2
397 ||      ROTL    $Td1[2],TBL1,$Td3[3]            ; t3
398 ||      XOR     $s[0],$Td3[2],$s[0]
399 ||      XOR     $s[1],$Td1[3],$s[1]
400         XOR     $s[2],$Td2[0],$s[2]
401 ||      XOR     $s[3],$Td2[1],$s[3]
402 ||      XOR     $s[0],$Td0[0],$s[0]
403 ||      XOR     $s[1],$Td0[1],$s[1]
404         SPKERNEL
405 ||      XOR.L   $s[2],$Td1[2],$s[2]
406 ||      XOR.L   $s[3],$Td3[3],$s[3]
407 ;;====================================================================
408         ADD.D   ${TEA},A0,${TEA}                ; point to Td4
409 ||      ADD.D   ${TEB},A0,${TEB}
410 ||      EXTU    $s[1],EXT3,24,$Td3[1]
411 ||      EXTU    $s[0],EXT1,24,$Td1[0]
412         LDBU    *${TEB}[$Td3[1]],$Td3[1]        ; Td3[s1>>24],  t0
413 ||      LDBU    *${TEA}[$Td1[0]],$Td1[0]        ; Td1[s0>>8],   t1
414 ||      XOR     $s[2],$Td0[2],$s[2]             ; modulo-scheduled
415 ||      XOR     $s[3],$Td0[3],$s[3]             ; modulo-scheduled
416 ||      EXTU    $s[0],EXT0,24,$Td0[0]
417 ||      EXTU    $s[1],EXT0,24,$Td0[1]
418         LDBU    *${TEA}[$Td0[0]],$Td0[0]        ; Td0[s0],      t0
419 ||      LDBU    *${TEB}[$Td0[1]],$Td0[1]        ; Td0[s1],      t1
420 ||      EXTU    $s[2],EXT2,24,$Td2[2]
421 ||      EXTU    $s[3],EXT2,24,$Td2[3]
422         LDBU    *${TEA}[$Td2[2]],$Td2[2]        ; Td2[s2>>16],  t0
423 ||      LDBU    *${TEB}[$Td2[3]],$Td2[3]        ; Td2[s3>>16],  t1
424 ||      EXTU    $s[3],EXT1,24,$Td1[3]
425 ||      EXTU    $s[2],EXT3,24,$Td3[2]
426         LDBU    *${TEB}[$Td1[3]],$Td1[3]        ; Td1[s3>>8],   t0
427 ||      LDBU    *${TEA}[$Td3[2]],$Td3[2]        ; Td3[s2>>24],  t1
428 ||      EXTU    $s[1],EXT1,24,$Td1[1]
429 ||      EXTU    $s[0],EXT3,24,$Td3[0]
430         LDBU    *${TEB}[$Td1[1]],$Td1[1]        ; Td1[s1>>8],   t2
431 ||      LDBU    *${TEA}[$Td3[0]],$Td3[0]        ; Td3[s0>>24],  t3
432 ||      EXTU    $s[0],EXT2,24,$Td2[0]
433 ||      EXTU    $s[1],EXT2,24,$Td2[1]
434         LDBU    *${TEA}[$Td2[0]],$Td2[0]        ; Td2[s0>>16],  t2
435 ||      LDBU    *${TEB}[$Td2[1]],$Td2[1]        ; Td2[s1>>16],  t3
436 ||      EXTU    $s[3],EXT3,24,$Td3[3]
437 ||      EXTU    $s[2],EXT1,24,$Td1[2]
438         LDBU    *${TEB}[$Td3[3]],$Td3[3]        ; Td3[s3>>24],  t2
439 ||      LDBU    *${TEA}[$Td1[2]],$Td1[2]        ; Td1[s2>>8],   t3
440 ||      EXTU    $s[2],EXT0,24,$Td0[2]
441 ||      EXTU    $s[3],EXT0,24,$Td0[3]
442         LDBU    *${TEA}[$Td0[2]],$Td0[2]        ; Td0[s2],      t2
443 ||      LDBU    *${TEB}[$Td0[3]],$Td0[3]        ; Td0[s3],      t3
444
445         .if     .BIG_ENDIAN
446         PACK2   $Td0[0],$Td1[3],$Td0[0]
447 ||      PACK2   $Td0[1],$Td1[0],$Td0[1]
448         PACK2   $Td2[2],$Td3[1],$Td2[2]
449 ||      PACK2   $Td2[3],$Td3[2],$Td2[3]
450         PACKL4  $Td0[0],$Td2[2],$Td0[0]
451 ||      PACKL4  $Td0[1],$Td2[3],$Td0[1]
452         XOR     $K[0],$Td0[0],$Td0[0]           ; s[0]
453 ||      XOR     $K[1],$Td0[1],$Td0[1]           ; s[1]
454
455         PACK2   $Td0[2],$Td1[1],$Td0[2]
456 ||      PACK2   $Td0[3],$Td1[2],$Td0[3]
457         PACK2   $Td2[0],$Td3[3],$Td2[0]
458 ||      PACK2   $Td2[1],$Td3[0],$Td2[1]
459 ||      BNOP    RA
460         PACKL4  $Td0[2],$Td2[0],$Td0[2]
461 ||      PACKL4  $Td0[3],$Td2[1],$Td0[3]
462         XOR     $K[2],$Td0[2],$Td0[2]           ; s[2]
463 ||      XOR     $K[3],$Td0[3],$Td0[3]           ; s[3]
464
465         MV      $Td0[0],A9
466 ||      MV      $Td0[1],A8
467         MV      $Td0[2],B9
468 ||      MV      $Td0[3],B8
469 || [B2] STNDW   A9:A8,*OUT++
470    [B2] STNDW   B9:B8,*OUT++
471         .else
472         PACK2   $Td1[3],$Td0[0],$Td1[3]
473 ||      PACK2   $Td1[0],$Td0[1],$Td1[0]
474         PACK2   $Td3[1],$Td2[2],$Td3[1]
475 ||      PACK2   $Td3[2],$Td2[3],$Td3[2]
476         PACKL4  $Td3[1],$Td1[3],$Td1[3]
477 ||      PACKL4  $Td3[2],$Td1[0],$Td1[0]
478         XOR     $K[0],$Td1[3],$Td1[3]           ; s[0]
479 ||      XOR     $K[1],$Td1[0],$Td1[0]           ; s[1]
480
481         PACK2   $Td1[1],$Td0[2],$Td1[1]
482 ||      PACK2   $Td1[2],$Td0[3],$Td1[2]
483         PACK2   $Td3[3],$Td2[0],$Td3[3]
484 ||      PACK2   $Td3[0],$Td2[1],$Td3[0]
485 ||      BNOP    RA
486         PACKL4  $Td3[3],$Td1[1],$Td1[1]
487 ||      PACKL4  $Td3[0],$Td1[2],$Td1[2]
488         XOR     $K[2],$Td1[1],$Td1[1]           ; s[2]
489 ||      XOR     $K[3],$Td1[2],$Td1[2]           ; s[3]
490
491         MV      $Td1[3],A8
492 ||      MV      $Td1[0],A9
493         MV      $Td1[1],B8
494 ||      MV      $Td1[2],B9
495 || [B2] STNDW   A9:A8,*OUT++
496    [B2] STNDW   B9:B8,*OUT++
497         .endif
498         .endasmfunc
499 ___
500 {
501 my @K=(@K,@s);                  # extended key
502 my @Te4=map("B$_",(16..19));
503
504 my @Kx9=@Te0;                   # used in AES_set_decrypt_key
505 my @KxB=@Te1;
506 my @KxD=@Te2;
507 my @KxE=@Te3;
508
509 $code.=<<___;
510         .asg    OUT,BITS
511
512         .global _AES_set_encrypt_key
513 _AES_set_encrypt_key:
514 __set_encrypt_key:
515         .asmfunc
516         MV      INP,A0
517 ||      SHRU    BITS,5,BITS                     ; 128-192-256 -> 4-6-8
518 ||      MV      KEY,A1
519   [!A0] B       RA
520 ||[!A0] MVK     -1,RET
521 ||[!A0] MVK     1,A1                            ; only one B RA
522   [!A1] B       RA
523 ||[!A1] MVK     -1,RET
524 ||[!A1] MVK     0,A0
525 ||      MVK     0,B0
526 ||      MVK     0,A1
527    [A0] LDNDW   *INP++,A9:A8
528 || [A0] CMPEQ   4,BITS,B0
529 || [A0] CMPLT   3,BITS,A1
530    [B0] B       key128?
531 || [A1] LDNDW   *INP++,B9:B8
532 || [A0] CMPEQ   6,BITS,B0
533 || [A0] CMPLT   5,BITS,A1
534    [B0] B       key192?
535 || [A1] LDNDW   *INP++,B17:B16
536 || [A0] CMPEQ   8,BITS,B0
537 || [A0] CMPLT   7,BITS,A1
538    [B0] B       key256?
539 || [A1] LDNDW   *INP++,B19:B18
540
541         .if     __TI_EABI__
542    [A0] ADD     0,KEY,$KPA
543 || [A0] ADD     4,KEY,$KPB
544 || [A0] MVKL    \$PCR_OFFSET(AES_Te4,_AES_set_encrypt_key),$TEA
545 || [A0] ADDKPC  _AES_set_encrypt_key,B6
546    [A0] MVKH    \$PCR_OFFSET(AES_Te4,_AES_set_encrypt_key),$TEA
547    [A0] ADD     B6,$TEA,$TEA                    ; AES_Te4
548         .else
549    [A0] ADD     0,KEY,$KPA
550 || [A0] ADD     4,KEY,$KPB
551 || [A0] MVKL    (AES_Te4-_AES_set_encrypt_key),$TEA
552 || [A0] ADDKPC  _AES_set_encrypt_key,B6
553    [A0] MVKH    (AES_Te4-_AES_set_encrypt_key),$TEA
554    [A0] ADD     B6,$TEA,$TEA                    ; AES_Te4
555         .endif
556         NOP
557         NOP
558
559         BNOP    RA,5
560 ||      MVK     -2,RET                          ; unknown bit lenght
561 ||      MVK     0,B0                            ; redundant
562 ;;====================================================================
563 ;;====================================================================
564 key128?:
565         .if     .BIG_ENDIAN
566         MV      A9,$K[0]
567 ||      MV      A8,$K[1]
568 ||      MV      B9,$Te4[2]
569 ||      MV      B8,$K[3]
570         .else
571         MV      A8,$K[0]
572 ||      MV      A9,$K[1]
573 ||      MV      B8,$Te4[2]
574 ||      MV      B9,$K[3]
575         .endif
576
577         MVK     256,A0
578 ||      MVK     9,B0
579
580         SPLOOPD 14
581 ||      MVC     B0,ILC
582 ||      MV      $TEA,$TEB
583 ||      ADD     $TEA,A0,A30                     ; rcon
584 ;;====================================================================
585         LDW     *A30++[1],A31                   ; rcon[i]
586 ||      MV      $Te4[2],$K[2]
587 ||      EXTU    $K[3],EXT1,24,$Te4[0]
588         LDBU    *${TEB}[$Te4[0]],$Te4[0]
589 ||      MV      $K[3],A0
590 ||      EXTU    $K[3],EXT2,24,$Te4[1]
591         LDBU    *${TEB}[$Te4[1]],$Te4[1]
592 ||      EXTU    A0,EXT3,24,A0
593 ||      EXTU    $K[3],EXT0,24,$Te4[3]
594         .if     .BIG_ENDIAN
595         LDBU    *${TEA}[A0],$Te4[3]
596 ||      LDBU    *${TEB}[$Te4[3]],A0
597         .else
598         LDBU    *${TEA}[A0],A0
599 ||      LDBU    *${TEB}[$Te4[3]],$Te4[3]
600         .endif
601
602         STW     $K[0],*$KPA++[2]
603 ||      STW     $K[1],*$KPB++[2]
604         STW     $K[2],*$KPA++[2]
605 ||      STW     $K[3],*$KPB++[2]
606
607         XOR     A31,$K[0],$K[0]                 ; ^=rcon[i]
608         .if     .BIG_ENDIAN
609         PACK2   $Te4[0],$Te4[1],$Te4[1]
610         PACK2   $Te4[3],A0,$Te4[3]
611         PACKL4  $Te4[1],$Te4[3],$Te4[3]
612         .else
613         PACK2   $Te4[1],$Te4[0],$Te4[1]
614         PACK2   $Te4[3],A0,$Te4[3]
615         PACKL4  $Te4[3],$Te4[1],$Te4[3]
616         .endif
617         XOR     $Te4[3],$K[0],$Te4[0]           ; K[0]
618         XOR     $Te4[0],$K[1],$K[1]             ; K[1]
619         MV      $Te4[0],$K[0]
620 ||      XOR     $K[1],$K[2],$Te4[2]             ; K[2]
621         XOR     $Te4[2],$K[3],$K[3]             ; K[3]
622         SPKERNEL
623 ;;====================================================================
624         BNOP    RA
625         MV      $Te4[2],$K[2]
626 ||      STW     $K[0],*$KPA++[2]
627 ||      STW     $K[1],*$KPB++[2]
628         STW     $K[2],*$KPA++[2]
629 ||      STW     $K[3],*$KPB++[2]
630         MVK     10,B0                           ; rounds
631         STW     B0,*++${KPB}[15]
632         MVK     0,RET
633 ;;====================================================================
634 ;;====================================================================
635 key192?:
636         .if     .BIG_ENDIAN
637         MV      A9,$K[0]
638 ||      MV      A8,$K[1]
639 ||      MV      B9,$K[2]
640 ||      MV      B8,$K[3]
641         MV      B17,$Te4[2]
642 ||      MV      B16,$K[5]
643         .else
644         MV      A8,$K[0]
645 ||      MV      A9,$K[1]
646 ||      MV      B8,$K[2]
647 ||      MV      B9,$K[3]
648         MV      B16,$Te4[2]
649 ||      MV      B17,$K[5]
650         .endif
651
652         MVK     256,A0
653 ||      MVK     6,B0
654         MV      $TEA,$TEB
655 ||      ADD     $TEA,A0,A30                     ; rcon
656 ;;====================================================================
657 loop192?:
658         LDW     *A30++[1],A31                   ; rcon[i]
659 ||      MV      $Te4[2],$K[4]
660 ||      EXTU    $K[5],EXT1,24,$Te4[0]
661         LDBU    *${TEB}[$Te4[0]],$Te4[0]
662 ||      MV      $K[5],A0
663 ||      EXTU    $K[5],EXT2,24,$Te4[1]
664         LDBU    *${TEB}[$Te4[1]],$Te4[1]
665 ||      EXTU    A0,EXT3,24,A0
666 ||      EXTU    $K[5],EXT0,24,$Te4[3]
667         .if     .BIG_ENDIAN
668         LDBU    *${TEA}[A0],$Te4[3]
669 ||      LDBU    *${TEB}[$Te4[3]],A0
670         .else
671         LDBU    *${TEA}[A0],A0
672 ||      LDBU    *${TEB}[$Te4[3]],$Te4[3]
673         .endif
674
675         STW     $K[0],*$KPA++[2]
676 ||      STW     $K[1],*$KPB++[2]
677         STW     $K[2],*$KPA++[2]
678 ||      STW     $K[3],*$KPB++[2]
679         STW     $K[4],*$KPA++[2]
680 ||      STW     $K[5],*$KPB++[2]
681
682         XOR     A31,$K[0],$K[0]                 ; ^=rcon[i]
683         .if     .BIG_ENDIAN
684         PACK2   $Te4[0],$Te4[1],$Te4[1]
685 ||      PACK2   $Te4[3],A0,$Te4[3]
686         PACKL4  $Te4[1],$Te4[3],$Te4[3]
687         .else
688         PACK2   $Te4[1],$Te4[0],$Te4[1]
689 ||      PACK2   $Te4[3],A0,$Te4[3]
690         PACKL4  $Te4[3],$Te4[1],$Te4[3]
691         .endif
692         BDEC    loop192?,B0
693 ||      XOR     $Te4[3],$K[0],$Te4[0]           ; K[0]
694         XOR     $Te4[0],$K[1],$K[1]             ; K[1]
695         MV      $Te4[0],$K[0]
696 ||      XOR     $K[1],$K[2],$Te4[2]             ; K[2]
697         XOR     $Te4[2],$K[3],$K[3]             ; K[3]
698         MV      $Te4[2],$K[2]
699 ||      XOR     $K[3],$K[4],$Te4[2]             ; K[4]
700         XOR     $Te4[2],$K[5],$K[5]             ; K[5]
701 ;;====================================================================
702         BNOP    RA
703         STW     $K[0],*$KPA++[2]
704 ||      STW     $K[1],*$KPB++[2]
705         STW     $K[2],*$KPA++[2]
706 ||      STW     $K[3],*$KPB++[2]
707         MVK     12,B0                           ; rounds
708         STW     B0,*++${KPB}[7]
709         MVK     0,RET
710 ;;====================================================================
711 ;;====================================================================
712 key256?:
713         .if     .BIG_ENDIAN
714         MV      A9,$K[0]
715 ||      MV      A8,$K[1]
716 ||      MV      B9,$K[2]
717 ||      MV      B8,$K[3]
718         MV      B17,$K[4]
719 ||      MV      B16,$K[5]
720 ||      MV      B19,$Te4[2]
721 ||      MV      B18,$K[7]
722         .else
723         MV      A8,$K[0]
724 ||      MV      A9,$K[1]
725 ||      MV      B8,$K[2]
726 ||      MV      B9,$K[3]
727         MV      B16,$K[4]
728 ||      MV      B17,$K[5]
729 ||      MV      B18,$Te4[2]
730 ||      MV      B19,$K[7]
731         .endif
732
733         MVK     256,A0
734 ||      MVK     6,B0
735         MV      $TEA,$TEB
736 ||      ADD     $TEA,A0,A30                     ; rcon
737 ;;====================================================================
738 loop256?:
739         LDW     *A30++[1],A31                   ; rcon[i]
740 ||      MV      $Te4[2],$K[6]
741 ||      EXTU    $K[7],EXT1,24,$Te4[0]
742         LDBU    *${TEB}[$Te4[0]],$Te4[0]
743 ||      MV      $K[7],A0
744 ||      EXTU    $K[7],EXT2,24,$Te4[1]
745         LDBU    *${TEB}[$Te4[1]],$Te4[1]
746 ||      EXTU    A0,EXT3,24,A0
747 ||      EXTU    $K[7],EXT0,24,$Te4[3]
748         .if     .BIG_ENDIAN
749         LDBU    *${TEA}[A0],$Te4[3]
750 ||      LDBU    *${TEB}[$Te4[3]],A0
751         .else
752         LDBU    *${TEA}[A0],A0
753 ||      LDBU    *${TEB}[$Te4[3]],$Te4[3]
754         .endif
755
756         STW     $K[0],*$KPA++[2]
757 ||      STW     $K[1],*$KPB++[2]
758         STW     $K[2],*$KPA++[2]
759 ||      STW     $K[3],*$KPB++[2]
760         STW     $K[4],*$KPA++[2]
761 ||      STW     $K[5],*$KPB++[2]
762         STW     $K[6],*$KPA++[2]
763 ||      STW     $K[7],*$KPB++[2]
764 ||      XOR     A31,$K[0],$K[0]                 ; ^=rcon[i]
765         .if     .BIG_ENDIAN
766         PACK2   $Te4[0],$Te4[1],$Te4[1]
767 ||      PACK2   $Te4[3],A0,$Te4[3]
768         PACKL4  $Te4[1],$Te4[3],$Te4[3]
769 ||[!B0] B       done256?
770         .else
771         PACK2   $Te4[1],$Te4[0],$Te4[1]
772 ||      PACK2   $Te4[3],A0,$Te4[3]
773         PACKL4  $Te4[3],$Te4[1],$Te4[3]
774 ||[!B0] B       done256?
775         .endif
776         XOR     $Te4[3],$K[0],$Te4[0]           ; K[0]
777         XOR     $Te4[0],$K[1],$K[1]             ; K[1]
778         MV      $Te4[0],$K[0]
779 ||      XOR     $K[1],$K[2],$Te4[2]             ; K[2]
780         XOR     $Te4[2],$K[3],$K[3]             ; K[3]
781
782         MV      $Te4[2],$K[2]
783 || [B0] EXTU    $K[3],EXT0,24,$Te4[0]
784 || [B0] SUB     B0,1,B0
785         LDBU    *${TEB}[$Te4[0]],$Te4[0]
786 ||      MV      $K[3],A0
787 ||      EXTU    $K[3],EXT1,24,$Te4[1]
788         LDBU    *${TEB}[$Te4[1]],$Te4[1]
789 ||      EXTU    A0,EXT2,24,A0
790 ||      EXTU    $K[3],EXT3,24,$Te4[3]
791
792         .if     .BIG_ENDIAN
793         LDBU    *${TEA}[A0],$Te4[3]
794 ||      LDBU    *${TEB}[$Te4[3]],A0
795         NOP     3
796         PACK2   $Te4[0],$Te4[1],$Te4[1]
797         PACK2   $Te4[3],A0,$Te4[3]
798 ||      B       loop256?
799         PACKL4  $Te4[1],$Te4[3],$Te4[3]
800         .else
801         LDBU    *${TEA}[A0],A0
802 ||      LDBU    *${TEB}[$Te4[3]],$Te4[3]
803         NOP     3
804         PACK2   $Te4[1],$Te4[0],$Te4[1]
805         PACK2   $Te4[3],A0,$Te4[3]
806 ||      B       loop256?
807         PACKL4  $Te4[3],$Te4[1],$Te4[3]
808         .endif
809
810         XOR     $Te4[3],$K[4],$Te4[0]           ; K[4]
811         XOR     $Te4[0],$K[5],$K[5]             ; K[5]
812         MV      $Te4[0],$K[4]
813 ||      XOR     $K[5],$K[6],$Te4[2]             ; K[6]
814         XOR     $Te4[2],$K[7],$K[7]             ; K[7]
815 ;;====================================================================
816 done256?:
817         BNOP    RA
818         STW     $K[0],*$KPA++[2]
819 ||      STW     $K[1],*$KPB++[2]
820         STW     $K[2],*$KPA++[2]
821 ||      STW     $K[3],*$KPB++[2]
822         MVK     14,B0                           ; rounds
823         STW     B0,*--${KPB}[1]
824         MVK     0,RET
825         .endasmfunc
826
827         .global _AES_set_decrypt_key
828 _AES_set_decrypt_key:
829         .asmfunc
830         B       __set_encrypt_key               ; guarantee local call
831         MV      KEY,B30                         ; B30 is not modified
832         MV      RA, B31                         ; B31 is not modified
833         ADDKPC  ret?,RA,2
834 ret?:                                           ; B0 holds rounds or zero
835   [!B0] BNOP    B31                             ; return if zero
836    [B0] SHL     B0,4,A0                         ; offset to last round key
837    [B0] SHRU    B0,1,B1
838    [B0] SUB     B1,1,B1
839    [B0] MVK     0x0000001B,B3                   ; AES polynomial
840    [B0] MVKH    0x07000000,B3
841
842         SPLOOPD 9                               ; flip round keys
843 ||      MVC     B1,ILC
844 ||      MV      B30,$KPA
845 ||      ADD     B30,A0,$KPB
846 ||      MVK     16,A0                           ; sizeof(round key)
847 ;;====================================================================
848         LDW     *${KPA}[0],A16
849 ||      LDW     *${KPB}[0],B16
850         LDW     *${KPA}[1],A17
851 ||      LDW     *${KPB}[1],B17
852         LDW     *${KPA}[2],A18
853 ||      LDW     *${KPB}[2],B18
854         LDW     *${KPA}[3],A19
855 ||      ADD     $KPA,A0,$KPA
856 ||      LDW     *${KPB}[3],B19
857 ||      SUB     $KPB,A0,$KPB
858         NOP
859         STW     B16,*${KPA}[-4]
860 ||      STW     A16,*${KPB}[4]
861         STW     B17,*${KPA}[-3]
862 ||      STW     A17,*${KPB}[5]
863         STW     B18,*${KPA}[-2]
864 ||      STW     A18,*${KPB}[6]
865         STW     B19,*${KPA}[-1]
866 ||      STW     A19,*${KPB}[7]
867         SPKERNEL
868 ;;====================================================================
869         SUB     B0,1,B0                         ; skip last round
870 ||      ADD     B30,A0,$KPA                     ; skip first round
871 ||      ADD     B30,A0,$KPB
872 ||      MVC     GFPGFR,B30                      ; save GFPGFR
873         LDW     *${KPA}[0],$K[0]
874 ||      LDW     *${KPB}[1],$K[1]
875 ||      MVC     B3,GFPGFR
876         LDW     *${KPA}[2],$K[2]
877 ||      LDW     *${KPB}[3],$K[3]
878         MVK     0x00000909,A24
879 ||      MVK     0x00000B0B,B24
880         MVKH    0x09090000,A24
881 ||      MVKH    0x0B0B0000,B24
882         MVC     B0,ILC
883 ||      SUB     B0,1,B0
884
885         GMPY4   $K[0],A24,$Kx9[0]               ; Â·0x09
886 ||      GMPY4   $K[1],A24,$Kx9[1]
887 ||      MVK     0x00000D0D,A25
888 ||      MVK     0x00000E0E,B25
889         GMPY4   $K[2],A24,$Kx9[2]
890 ||      GMPY4   $K[3],A24,$Kx9[3]
891 ||      MVKH    0x0D0D0000,A25
892 ||      MVKH    0x0E0E0000,B25
893
894         GMPY4   $K[0],B24,$KxB[0]               ; Â·0x0B
895 ||      GMPY4   $K[1],B24,$KxB[1]
896         GMPY4   $K[2],B24,$KxB[2]
897 ||      GMPY4   $K[3],B24,$KxB[3]
898
899         SPLOOP  11                              ; InvMixColumns
900 ;;====================================================================
901         GMPY4   $K[0],A25,$KxD[0]               ; Â·0x0D
902 ||      GMPY4   $K[1],A25,$KxD[1]
903 ||      SWAP2   $Kx9[0],$Kx9[0]                 ; rotate by 16
904 ||      SWAP2   $Kx9[1],$Kx9[1]
905 ||      MV      $K[0],$s[0]                     ; this or DINT
906 ||      MV      $K[1],$s[1]
907 || [B0] LDW     *${KPA}[4],$K[0]
908 || [B0] LDW     *${KPB}[5],$K[1]
909         GMPY4   $K[2],A25,$KxD[2]
910 ||      GMPY4   $K[3],A25,$KxD[3]
911 ||      SWAP2   $Kx9[2],$Kx9[2]
912 ||      SWAP2   $Kx9[3],$Kx9[3]
913 ||      MV      $K[2],$s[2]
914 ||      MV      $K[3],$s[3]
915 || [B0] LDW     *${KPA}[6],$K[2]
916 || [B0] LDW     *${KPB}[7],$K[3]
917
918         GMPY4   $s[0],B25,$KxE[0]               ; Â·0x0E
919 ||      GMPY4   $s[1],B25,$KxE[1]
920 ||      XOR     $Kx9[0],$KxB[0],$KxB[0]
921 ||      XOR     $Kx9[1],$KxB[1],$KxB[1]
922         GMPY4   $s[2],B25,$KxE[2]
923 ||      GMPY4   $s[3],B25,$KxE[3]
924 ||      XOR     $Kx9[2],$KxB[2],$KxB[2]
925 ||      XOR     $Kx9[3],$KxB[3],$KxB[3]
926
927         ROTL    $KxB[0],TBL3,$KxB[0]
928 ||      ROTL    $KxB[1],TBL3,$KxB[1]
929 ||      SWAP2   $KxD[0],$KxD[0]                 ; rotate by 16
930 ||      SWAP2   $KxD[1],$KxD[1]
931         ROTL    $KxB[2],TBL3,$KxB[2]
932 ||      ROTL    $KxB[3],TBL3,$KxB[3]
933 ||      SWAP2   $KxD[2],$KxD[2]
934 ||      SWAP2   $KxD[3],$KxD[3]
935
936         XOR     $KxE[0],$KxD[0],$KxE[0]
937 ||      XOR     $KxE[1],$KxD[1],$KxE[1]
938 || [B0] GMPY4   $K[0],A24,$Kx9[0]               ; Â·0x09
939 || [B0] GMPY4   $K[1],A24,$Kx9[1]
940 ||      ADDAW   $KPA,4,$KPA
941         XOR     $KxE[2],$KxD[2],$KxE[2]
942 ||      XOR     $KxE[3],$KxD[3],$KxE[3]
943 || [B0] GMPY4   $K[2],A24,$Kx9[2]
944 || [B0] GMPY4   $K[3],A24,$Kx9[3]
945 ||      ADDAW   $KPB,4,$KPB
946
947         XOR     $KxB[0],$KxE[0],$KxE[0]
948 ||      XOR     $KxB[1],$KxE[1],$KxE[1]
949 || [B0] GMPY4   $K[0],B24,$KxB[0]               ; Â·0x0B
950 || [B0] GMPY4   $K[1],B24,$KxB[1]
951         XOR     $KxB[2],$KxE[2],$KxE[2]
952 ||      XOR     $KxB[3],$KxE[3],$KxE[3]
953 || [B0] GMPY4   $K[2],B24,$KxB[2]
954 || [B0] GMPY4   $K[3],B24,$KxB[3]
955 ||      STW     $KxE[0],*${KPA}[-4]
956 ||      STW     $KxE[1],*${KPB}[-3]
957         STW     $KxE[2],*${KPA}[-2]
958 ||      STW     $KxE[3],*${KPB}[-1]
959 || [B0] SUB     B0,1,B0
960         SPKERNEL
961 ;;====================================================================
962         BNOP    B31,3
963         MVC     B30,GFPGFR                      ; restore GFPGFR(*)
964         MVK     0,RET
965         .endasmfunc
966 ___
967 # (*)   Even though ABI doesn't specify GFPGFR as non-volatile, there
968 #       are code samples out there that *assume* its default value.
969 }
970 {
971 my ($inp,$out,$blocks,$key,$ivp)=("A4","B4","A6","B6","A8");
972 $code.=<<___;
973         .global _AES_ctr32_encrypt
974 _AES_ctr32_encrypt:
975         .asmfunc
976         LDNDW   *${ivp}[0],A31:A30      ; load counter value
977 ||      MV      $blocks,A2              ; reassign $blocks
978 ||      DMV     RA,$key,B27:B26         ; reassign RA and $key
979         LDNDW   *${ivp}[1],B31:B30
980 ||      MVK     0,B2                    ; don't let __encrypt load input
981 ||      MVK     0,A1                    ; and postpone writing output
982         .if     .BIG_ENDIAN
983         NOP
984         .else
985         NOP     4
986         SWAP2   B31,B31                 ; keep least significant 32 bits
987         SWAP4   B31,B31                 ; in host byte order
988         .endif
989 ctr32_loop?:
990    [A2] BNOP    __encrypt
991 || [A1] XOR     A29,A9,A9               ; input^Ek(counter)
992 || [A1] XOR     A28,A8,A8
993 || [A2] LDNDW   *INP++,A29:A28          ; load input
994   [!A2] BNOP    B27                     ; return
995 || [A1] XOR     B29,B9,B9
996 || [A1] XOR     B28,B8,B8
997 || [A2] LDNDW   *INP++,B29:B28
998         .if     .BIG_ENDIAN
999    [A1] STNDW   A9:A8,*OUT++            ; save output
1000 || [A2] DMV     A31,A30,A9:A8           ; pass counter value to __encrypt
1001    [A1] STNDW   B9:B8,*OUT++
1002 || [A2] DMV     B31,B30,B9:B8
1003 || [A2] ADD     B30,1,B30               ; counter++
1004         .else
1005    [A1] STNDW   A9:A8,*OUT++            ; save output
1006 || [A2] DMV     A31,A30,A9:A8
1007 || [A2] SWAP2   B31,B0
1008 || [A2] ADD     B31,1,B31               ; counter++
1009    [A1] STNDW   B9:B8,*OUT++
1010 || [A2] MV      B30,B8
1011 || [A2] SWAP4   B0,B9
1012         .endif
1013    [A2] ADDKPC  ctr32_loop?,RA          ; return to ctr32_loop?
1014 || [A2] MV      B26,KEY                 ; pass $key
1015 || [A2] SUB     A2,1,A2                 ; $blocks--
1016 ||[!A1] MVK     1,A1
1017         NOP
1018         NOP
1019         .endasmfunc
1020 ___
1021 }
1022 # Tables are kept in endian-neutral manner
1023 $code.=<<___;
1024         .sect   ".const:aes_asm"
1025         .align  128
1026 AES_Te:
1027         .byte   0xc6,0x63,0x63,0xa5,    0xf8,0x7c,0x7c,0x84
1028         .byte   0xee,0x77,0x77,0x99,    0xf6,0x7b,0x7b,0x8d
1029         .byte   0xff,0xf2,0xf2,0x0d,    0xd6,0x6b,0x6b,0xbd
1030         .byte   0xde,0x6f,0x6f,0xb1,    0x91,0xc5,0xc5,0x54
1031         .byte   0x60,0x30,0x30,0x50,    0x02,0x01,0x01,0x03
1032         .byte   0xce,0x67,0x67,0xa9,    0x56,0x2b,0x2b,0x7d
1033         .byte   0xe7,0xfe,0xfe,0x19,    0xb5,0xd7,0xd7,0x62
1034         .byte   0x4d,0xab,0xab,0xe6,    0xec,0x76,0x76,0x9a
1035         .byte   0x8f,0xca,0xca,0x45,    0x1f,0x82,0x82,0x9d
1036         .byte   0x89,0xc9,0xc9,0x40,    0xfa,0x7d,0x7d,0x87
1037         .byte   0xef,0xfa,0xfa,0x15,    0xb2,0x59,0x59,0xeb
1038         .byte   0x8e,0x47,0x47,0xc9,    0xfb,0xf0,0xf0,0x0b
1039         .byte   0x41,0xad,0xad,0xec,    0xb3,0xd4,0xd4,0x67
1040         .byte   0x5f,0xa2,0xa2,0xfd,    0x45,0xaf,0xaf,0xea
1041         .byte   0x23,0x9c,0x9c,0xbf,    0x53,0xa4,0xa4,0xf7
1042         .byte   0xe4,0x72,0x72,0x96,    0x9b,0xc0,0xc0,0x5b
1043         .byte   0x75,0xb7,0xb7,0xc2,    0xe1,0xfd,0xfd,0x1c
1044         .byte   0x3d,0x93,0x93,0xae,    0x4c,0x26,0x26,0x6a
1045         .byte   0x6c,0x36,0x36,0x5a,    0x7e,0x3f,0x3f,0x41
1046         .byte   0xf5,0xf7,0xf7,0x02,    0x83,0xcc,0xcc,0x4f
1047         .byte   0x68,0x34,0x34,0x5c,    0x51,0xa5,0xa5,0xf4
1048         .byte   0xd1,0xe5,0xe5,0x34,    0xf9,0xf1,0xf1,0x08
1049         .byte   0xe2,0x71,0x71,0x93,    0xab,0xd8,0xd8,0x73
1050         .byte   0x62,0x31,0x31,0x53,    0x2a,0x15,0x15,0x3f
1051         .byte   0x08,0x04,0x04,0x0c,    0x95,0xc7,0xc7,0x52
1052         .byte   0x46,0x23,0x23,0x65,    0x9d,0xc3,0xc3,0x5e
1053         .byte   0x30,0x18,0x18,0x28,    0x37,0x96,0x96,0xa1
1054         .byte   0x0a,0x05,0x05,0x0f,    0x2f,0x9a,0x9a,0xb5
1055         .byte   0x0e,0x07,0x07,0x09,    0x24,0x12,0x12,0x36
1056         .byte   0x1b,0x80,0x80,0x9b,    0xdf,0xe2,0xe2,0x3d
1057         .byte   0xcd,0xeb,0xeb,0x26,    0x4e,0x27,0x27,0x69
1058         .byte   0x7f,0xb2,0xb2,0xcd,    0xea,0x75,0x75,0x9f
1059         .byte   0x12,0x09,0x09,0x1b,    0x1d,0x83,0x83,0x9e
1060         .byte   0x58,0x2c,0x2c,0x74,    0x34,0x1a,0x1a,0x2e
1061         .byte   0x36,0x1b,0x1b,0x2d,    0xdc,0x6e,0x6e,0xb2
1062         .byte   0xb4,0x5a,0x5a,0xee,    0x5b,0xa0,0xa0,0xfb
1063         .byte   0xa4,0x52,0x52,0xf6,    0x76,0x3b,0x3b,0x4d
1064         .byte   0xb7,0xd6,0xd6,0x61,    0x7d,0xb3,0xb3,0xce
1065         .byte   0x52,0x29,0x29,0x7b,    0xdd,0xe3,0xe3,0x3e
1066         .byte   0x5e,0x2f,0x2f,0x71,    0x13,0x84,0x84,0x97
1067         .byte   0xa6,0x53,0x53,0xf5,    0xb9,0xd1,0xd1,0x68
1068         .byte   0x00,0x00,0x00,0x00,    0xc1,0xed,0xed,0x2c
1069         .byte   0x40,0x20,0x20,0x60,    0xe3,0xfc,0xfc,0x1f
1070         .byte   0x79,0xb1,0xb1,0xc8,    0xb6,0x5b,0x5b,0xed
1071         .byte   0xd4,0x6a,0x6a,0xbe,    0x8d,0xcb,0xcb,0x46
1072         .byte   0x67,0xbe,0xbe,0xd9,    0x72,0x39,0x39,0x4b
1073         .byte   0x94,0x4a,0x4a,0xde,    0x98,0x4c,0x4c,0xd4
1074         .byte   0xb0,0x58,0x58,0xe8,    0x85,0xcf,0xcf,0x4a
1075         .byte   0xbb,0xd0,0xd0,0x6b,    0xc5,0xef,0xef,0x2a
1076         .byte   0x4f,0xaa,0xaa,0xe5,    0xed,0xfb,0xfb,0x16
1077         .byte   0x86,0x43,0x43,0xc5,    0x9a,0x4d,0x4d,0xd7
1078         .byte   0x66,0x33,0x33,0x55,    0x11,0x85,0x85,0x94
1079         .byte   0x8a,0x45,0x45,0xcf,    0xe9,0xf9,0xf9,0x10
1080         .byte   0x04,0x02,0x02,0x06,    0xfe,0x7f,0x7f,0x81
1081         .byte   0xa0,0x50,0x50,0xf0,    0x78,0x3c,0x3c,0x44
1082         .byte   0x25,0x9f,0x9f,0xba,    0x4b,0xa8,0xa8,0xe3
1083         .byte   0xa2,0x51,0x51,0xf3,    0x5d,0xa3,0xa3,0xfe
1084         .byte   0x80,0x40,0x40,0xc0,    0x05,0x8f,0x8f,0x8a
1085         .byte   0x3f,0x92,0x92,0xad,    0x21,0x9d,0x9d,0xbc
1086         .byte   0x70,0x38,0x38,0x48,    0xf1,0xf5,0xf5,0x04
1087         .byte   0x63,0xbc,0xbc,0xdf,    0x77,0xb6,0xb6,0xc1
1088         .byte   0xaf,0xda,0xda,0x75,    0x42,0x21,0x21,0x63
1089         .byte   0x20,0x10,0x10,0x30,    0xe5,0xff,0xff,0x1a
1090         .byte   0xfd,0xf3,0xf3,0x0e,    0xbf,0xd2,0xd2,0x6d
1091         .byte   0x81,0xcd,0xcd,0x4c,    0x18,0x0c,0x0c,0x14
1092         .byte   0x26,0x13,0x13,0x35,    0xc3,0xec,0xec,0x2f
1093         .byte   0xbe,0x5f,0x5f,0xe1,    0x35,0x97,0x97,0xa2
1094         .byte   0x88,0x44,0x44,0xcc,    0x2e,0x17,0x17,0x39
1095         .byte   0x93,0xc4,0xc4,0x57,    0x55,0xa7,0xa7,0xf2
1096         .byte   0xfc,0x7e,0x7e,0x82,    0x7a,0x3d,0x3d,0x47
1097         .byte   0xc8,0x64,0x64,0xac,    0xba,0x5d,0x5d,0xe7
1098         .byte   0x32,0x19,0x19,0x2b,    0xe6,0x73,0x73,0x95
1099         .byte   0xc0,0x60,0x60,0xa0,    0x19,0x81,0x81,0x98
1100         .byte   0x9e,0x4f,0x4f,0xd1,    0xa3,0xdc,0xdc,0x7f
1101         .byte   0x44,0x22,0x22,0x66,    0x54,0x2a,0x2a,0x7e
1102         .byte   0x3b,0x90,0x90,0xab,    0x0b,0x88,0x88,0x83
1103         .byte   0x8c,0x46,0x46,0xca,    0xc7,0xee,0xee,0x29
1104         .byte   0x6b,0xb8,0xb8,0xd3,    0x28,0x14,0x14,0x3c
1105         .byte   0xa7,0xde,0xde,0x79,    0xbc,0x5e,0x5e,0xe2
1106         .byte   0x16,0x0b,0x0b,0x1d,    0xad,0xdb,0xdb,0x76
1107         .byte   0xdb,0xe0,0xe0,0x3b,    0x64,0x32,0x32,0x56
1108         .byte   0x74,0x3a,0x3a,0x4e,    0x14,0x0a,0x0a,0x1e
1109         .byte   0x92,0x49,0x49,0xdb,    0x0c,0x06,0x06,0x0a
1110         .byte   0x48,0x24,0x24,0x6c,    0xb8,0x5c,0x5c,0xe4
1111         .byte   0x9f,0xc2,0xc2,0x5d,    0xbd,0xd3,0xd3,0x6e
1112         .byte   0x43,0xac,0xac,0xef,    0xc4,0x62,0x62,0xa6
1113         .byte   0x39,0x91,0x91,0xa8,    0x31,0x95,0x95,0xa4
1114         .byte   0xd3,0xe4,0xe4,0x37,    0xf2,0x79,0x79,0x8b
1115         .byte   0xd5,0xe7,0xe7,0x32,    0x8b,0xc8,0xc8,0x43
1116         .byte   0x6e,0x37,0x37,0x59,    0xda,0x6d,0x6d,0xb7
1117         .byte   0x01,0x8d,0x8d,0x8c,    0xb1,0xd5,0xd5,0x64
1118         .byte   0x9c,0x4e,0x4e,0xd2,    0x49,0xa9,0xa9,0xe0
1119         .byte   0xd8,0x6c,0x6c,0xb4,    0xac,0x56,0x56,0xfa
1120         .byte   0xf3,0xf4,0xf4,0x07,    0xcf,0xea,0xea,0x25
1121         .byte   0xca,0x65,0x65,0xaf,    0xf4,0x7a,0x7a,0x8e
1122         .byte   0x47,0xae,0xae,0xe9,    0x10,0x08,0x08,0x18
1123         .byte   0x6f,0xba,0xba,0xd5,    0xf0,0x78,0x78,0x88
1124         .byte   0x4a,0x25,0x25,0x6f,    0x5c,0x2e,0x2e,0x72
1125         .byte   0x38,0x1c,0x1c,0x24,    0x57,0xa6,0xa6,0xf1
1126         .byte   0x73,0xb4,0xb4,0xc7,    0x97,0xc6,0xc6,0x51
1127         .byte   0xcb,0xe8,0xe8,0x23,    0xa1,0xdd,0xdd,0x7c
1128         .byte   0xe8,0x74,0x74,0x9c,    0x3e,0x1f,0x1f,0x21
1129         .byte   0x96,0x4b,0x4b,0xdd,    0x61,0xbd,0xbd,0xdc
1130         .byte   0x0d,0x8b,0x8b,0x86,    0x0f,0x8a,0x8a,0x85
1131         .byte   0xe0,0x70,0x70,0x90,    0x7c,0x3e,0x3e,0x42
1132         .byte   0x71,0xb5,0xb5,0xc4,    0xcc,0x66,0x66,0xaa
1133         .byte   0x90,0x48,0x48,0xd8,    0x06,0x03,0x03,0x05
1134         .byte   0xf7,0xf6,0xf6,0x01,    0x1c,0x0e,0x0e,0x12
1135         .byte   0xc2,0x61,0x61,0xa3,    0x6a,0x35,0x35,0x5f
1136         .byte   0xae,0x57,0x57,0xf9,    0x69,0xb9,0xb9,0xd0
1137         .byte   0x17,0x86,0x86,0x91,    0x99,0xc1,0xc1,0x58
1138         .byte   0x3a,0x1d,0x1d,0x27,    0x27,0x9e,0x9e,0xb9
1139         .byte   0xd9,0xe1,0xe1,0x38,    0xeb,0xf8,0xf8,0x13
1140         .byte   0x2b,0x98,0x98,0xb3,    0x22,0x11,0x11,0x33
1141         .byte   0xd2,0x69,0x69,0xbb,    0xa9,0xd9,0xd9,0x70
1142         .byte   0x07,0x8e,0x8e,0x89,    0x33,0x94,0x94,0xa7
1143         .byte   0x2d,0x9b,0x9b,0xb6,    0x3c,0x1e,0x1e,0x22
1144         .byte   0x15,0x87,0x87,0x92,    0xc9,0xe9,0xe9,0x20
1145         .byte   0x87,0xce,0xce,0x49,    0xaa,0x55,0x55,0xff
1146         .byte   0x50,0x28,0x28,0x78,    0xa5,0xdf,0xdf,0x7a
1147         .byte   0x03,0x8c,0x8c,0x8f,    0x59,0xa1,0xa1,0xf8
1148         .byte   0x09,0x89,0x89,0x80,    0x1a,0x0d,0x0d,0x17
1149         .byte   0x65,0xbf,0xbf,0xda,    0xd7,0xe6,0xe6,0x31
1150         .byte   0x84,0x42,0x42,0xc6,    0xd0,0x68,0x68,0xb8
1151         .byte   0x82,0x41,0x41,0xc3,    0x29,0x99,0x99,0xb0
1152         .byte   0x5a,0x2d,0x2d,0x77,    0x1e,0x0f,0x0f,0x11
1153         .byte   0x7b,0xb0,0xb0,0xcb,    0xa8,0x54,0x54,0xfc
1154         .byte   0x6d,0xbb,0xbb,0xd6,    0x2c,0x16,0x16,0x3a
1155 AES_Te4:
1156         .byte   0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
1157         .byte   0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
1158         .byte   0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
1159         .byte   0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
1160         .byte   0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
1161         .byte   0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
1162         .byte   0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
1163         .byte   0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
1164         .byte   0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
1165         .byte   0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
1166         .byte   0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
1167         .byte   0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
1168         .byte   0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
1169         .byte   0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
1170         .byte   0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
1171         .byte   0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
1172         .byte   0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
1173         .byte   0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
1174         .byte   0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
1175         .byte   0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
1176         .byte   0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
1177         .byte   0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
1178         .byte   0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
1179         .byte   0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
1180         .byte   0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
1181         .byte   0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
1182         .byte   0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
1183         .byte   0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
1184         .byte   0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
1185         .byte   0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
1186         .byte   0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
1187         .byte   0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
1188 rcon:
1189         .byte   0x01,0x00,0x00,0x00,    0x02,0x00,0x00,0x00
1190         .byte   0x04,0x00,0x00,0x00,    0x08,0x00,0x00,0x00
1191         .byte   0x10,0x00,0x00,0x00,    0x20,0x00,0x00,0x00
1192         .byte   0x40,0x00,0x00,0x00,    0x80,0x00,0x00,0x00
1193         .byte   0x1B,0x00,0x00,0x00,    0x36,0x00,0x00,0x00
1194         .align  128
1195 AES_Td:
1196         .byte   0x51,0xf4,0xa7,0x50,    0x7e,0x41,0x65,0x53
1197         .byte   0x1a,0x17,0xa4,0xc3,    0x3a,0x27,0x5e,0x96
1198         .byte   0x3b,0xab,0x6b,0xcb,    0x1f,0x9d,0x45,0xf1
1199         .byte   0xac,0xfa,0x58,0xab,    0x4b,0xe3,0x03,0x93
1200         .byte   0x20,0x30,0xfa,0x55,    0xad,0x76,0x6d,0xf6
1201         .byte   0x88,0xcc,0x76,0x91,    0xf5,0x02,0x4c,0x25
1202         .byte   0x4f,0xe5,0xd7,0xfc,    0xc5,0x2a,0xcb,0xd7
1203         .byte   0x26,0x35,0x44,0x80,    0xb5,0x62,0xa3,0x8f
1204         .byte   0xde,0xb1,0x5a,0x49,    0x25,0xba,0x1b,0x67
1205         .byte   0x45,0xea,0x0e,0x98,    0x5d,0xfe,0xc0,0xe1
1206         .byte   0xc3,0x2f,0x75,0x02,    0x81,0x4c,0xf0,0x12
1207         .byte   0x8d,0x46,0x97,0xa3,    0x6b,0xd3,0xf9,0xc6
1208         .byte   0x03,0x8f,0x5f,0xe7,    0x15,0x92,0x9c,0x95
1209         .byte   0xbf,0x6d,0x7a,0xeb,    0x95,0x52,0x59,0xda
1210         .byte   0xd4,0xbe,0x83,0x2d,    0x58,0x74,0x21,0xd3
1211         .byte   0x49,0xe0,0x69,0x29,    0x8e,0xc9,0xc8,0x44
1212         .byte   0x75,0xc2,0x89,0x6a,    0xf4,0x8e,0x79,0x78
1213         .byte   0x99,0x58,0x3e,0x6b,    0x27,0xb9,0x71,0xdd
1214         .byte   0xbe,0xe1,0x4f,0xb6,    0xf0,0x88,0xad,0x17
1215         .byte   0xc9,0x20,0xac,0x66,    0x7d,0xce,0x3a,0xb4
1216         .byte   0x63,0xdf,0x4a,0x18,    0xe5,0x1a,0x31,0x82
1217         .byte   0x97,0x51,0x33,0x60,    0x62,0x53,0x7f,0x45
1218         .byte   0xb1,0x64,0x77,0xe0,    0xbb,0x6b,0xae,0x84
1219         .byte   0xfe,0x81,0xa0,0x1c,    0xf9,0x08,0x2b,0x94
1220         .byte   0x70,0x48,0x68,0x58,    0x8f,0x45,0xfd,0x19
1221         .byte   0x94,0xde,0x6c,0x87,    0x52,0x7b,0xf8,0xb7
1222         .byte   0xab,0x73,0xd3,0x23,    0x72,0x4b,0x02,0xe2
1223         .byte   0xe3,0x1f,0x8f,0x57,    0x66,0x55,0xab,0x2a
1224         .byte   0xb2,0xeb,0x28,0x07,    0x2f,0xb5,0xc2,0x03
1225         .byte   0x86,0xc5,0x7b,0x9a,    0xd3,0x37,0x08,0xa5
1226         .byte   0x30,0x28,0x87,0xf2,    0x23,0xbf,0xa5,0xb2
1227         .byte   0x02,0x03,0x6a,0xba,    0xed,0x16,0x82,0x5c
1228         .byte   0x8a,0xcf,0x1c,0x2b,    0xa7,0x79,0xb4,0x92
1229         .byte   0xf3,0x07,0xf2,0xf0,    0x4e,0x69,0xe2,0xa1
1230         .byte   0x65,0xda,0xf4,0xcd,    0x06,0x05,0xbe,0xd5
1231         .byte   0xd1,0x34,0x62,0x1f,    0xc4,0xa6,0xfe,0x8a
1232         .byte   0x34,0x2e,0x53,0x9d,    0xa2,0xf3,0x55,0xa0
1233         .byte   0x05,0x8a,0xe1,0x32,    0xa4,0xf6,0xeb,0x75
1234         .byte   0x0b,0x83,0xec,0x39,    0x40,0x60,0xef,0xaa
1235         .byte   0x5e,0x71,0x9f,0x06,    0xbd,0x6e,0x10,0x51
1236         .byte   0x3e,0x21,0x8a,0xf9,    0x96,0xdd,0x06,0x3d
1237         .byte   0xdd,0x3e,0x05,0xae,    0x4d,0xe6,0xbd,0x46
1238         .byte   0x91,0x54,0x8d,0xb5,    0x71,0xc4,0x5d,0x05
1239         .byte   0x04,0x06,0xd4,0x6f,    0x60,0x50,0x15,0xff
1240         .byte   0x19,0x98,0xfb,0x24,    0xd6,0xbd,0xe9,0x97
1241         .byte   0x89,0x40,0x43,0xcc,    0x67,0xd9,0x9e,0x77
1242         .byte   0xb0,0xe8,0x42,0xbd,    0x07,0x89,0x8b,0x88
1243         .byte   0xe7,0x19,0x5b,0x38,    0x79,0xc8,0xee,0xdb
1244         .byte   0xa1,0x7c,0x0a,0x47,    0x7c,0x42,0x0f,0xe9
1245         .byte   0xf8,0x84,0x1e,0xc9,    0x00,0x00,0x00,0x00
1246         .byte   0x09,0x80,0x86,0x83,    0x32,0x2b,0xed,0x48
1247         .byte   0x1e,0x11,0x70,0xac,    0x6c,0x5a,0x72,0x4e
1248         .byte   0xfd,0x0e,0xff,0xfb,    0x0f,0x85,0x38,0x56
1249         .byte   0x3d,0xae,0xd5,0x1e,    0x36,0x2d,0x39,0x27
1250         .byte   0x0a,0x0f,0xd9,0x64,    0x68,0x5c,0xa6,0x21
1251         .byte   0x9b,0x5b,0x54,0xd1,    0x24,0x36,0x2e,0x3a
1252         .byte   0x0c,0x0a,0x67,0xb1,    0x93,0x57,0xe7,0x0f
1253         .byte   0xb4,0xee,0x96,0xd2,    0x1b,0x9b,0x91,0x9e
1254         .byte   0x80,0xc0,0xc5,0x4f,    0x61,0xdc,0x20,0xa2
1255         .byte   0x5a,0x77,0x4b,0x69,    0x1c,0x12,0x1a,0x16
1256         .byte   0xe2,0x93,0xba,0x0a,    0xc0,0xa0,0x2a,0xe5
1257         .byte   0x3c,0x22,0xe0,0x43,    0x12,0x1b,0x17,0x1d
1258         .byte   0x0e,0x09,0x0d,0x0b,    0xf2,0x8b,0xc7,0xad
1259         .byte   0x2d,0xb6,0xa8,0xb9,    0x14,0x1e,0xa9,0xc8
1260         .byte   0x57,0xf1,0x19,0x85,    0xaf,0x75,0x07,0x4c
1261         .byte   0xee,0x99,0xdd,0xbb,    0xa3,0x7f,0x60,0xfd
1262         .byte   0xf7,0x01,0x26,0x9f,    0x5c,0x72,0xf5,0xbc
1263         .byte   0x44,0x66,0x3b,0xc5,    0x5b,0xfb,0x7e,0x34
1264         .byte   0x8b,0x43,0x29,0x76,    0xcb,0x23,0xc6,0xdc
1265         .byte   0xb6,0xed,0xfc,0x68,    0xb8,0xe4,0xf1,0x63
1266         .byte   0xd7,0x31,0xdc,0xca,    0x42,0x63,0x85,0x10
1267         .byte   0x13,0x97,0x22,0x40,    0x84,0xc6,0x11,0x20
1268         .byte   0x85,0x4a,0x24,0x7d,    0xd2,0xbb,0x3d,0xf8
1269         .byte   0xae,0xf9,0x32,0x11,    0xc7,0x29,0xa1,0x6d
1270         .byte   0x1d,0x9e,0x2f,0x4b,    0xdc,0xb2,0x30,0xf3
1271         .byte   0x0d,0x86,0x52,0xec,    0x77,0xc1,0xe3,0xd0
1272         .byte   0x2b,0xb3,0x16,0x6c,    0xa9,0x70,0xb9,0x99
1273         .byte   0x11,0x94,0x48,0xfa,    0x47,0xe9,0x64,0x22
1274         .byte   0xa8,0xfc,0x8c,0xc4,    0xa0,0xf0,0x3f,0x1a
1275         .byte   0x56,0x7d,0x2c,0xd8,    0x22,0x33,0x90,0xef
1276         .byte   0x87,0x49,0x4e,0xc7,    0xd9,0x38,0xd1,0xc1
1277         .byte   0x8c,0xca,0xa2,0xfe,    0x98,0xd4,0x0b,0x36
1278         .byte   0xa6,0xf5,0x81,0xcf,    0xa5,0x7a,0xde,0x28
1279         .byte   0xda,0xb7,0x8e,0x26,    0x3f,0xad,0xbf,0xa4
1280         .byte   0x2c,0x3a,0x9d,0xe4,    0x50,0x78,0x92,0x0d
1281         .byte   0x6a,0x5f,0xcc,0x9b,    0x54,0x7e,0x46,0x62
1282         .byte   0xf6,0x8d,0x13,0xc2,    0x90,0xd8,0xb8,0xe8
1283         .byte   0x2e,0x39,0xf7,0x5e,    0x82,0xc3,0xaf,0xf5
1284         .byte   0x9f,0x5d,0x80,0xbe,    0x69,0xd0,0x93,0x7c
1285         .byte   0x6f,0xd5,0x2d,0xa9,    0xcf,0x25,0x12,0xb3
1286         .byte   0xc8,0xac,0x99,0x3b,    0x10,0x18,0x7d,0xa7
1287         .byte   0xe8,0x9c,0x63,0x6e,    0xdb,0x3b,0xbb,0x7b
1288         .byte   0xcd,0x26,0x78,0x09,    0x6e,0x59,0x18,0xf4
1289         .byte   0xec,0x9a,0xb7,0x01,    0x83,0x4f,0x9a,0xa8
1290         .byte   0xe6,0x95,0x6e,0x65,    0xaa,0xff,0xe6,0x7e
1291         .byte   0x21,0xbc,0xcf,0x08,    0xef,0x15,0xe8,0xe6
1292         .byte   0xba,0xe7,0x9b,0xd9,    0x4a,0x6f,0x36,0xce
1293         .byte   0xea,0x9f,0x09,0xd4,    0x29,0xb0,0x7c,0xd6
1294         .byte   0x31,0xa4,0xb2,0xaf,    0x2a,0x3f,0x23,0x31
1295         .byte   0xc6,0xa5,0x94,0x30,    0x35,0xa2,0x66,0xc0
1296         .byte   0x74,0x4e,0xbc,0x37,    0xfc,0x82,0xca,0xa6
1297         .byte   0xe0,0x90,0xd0,0xb0,    0x33,0xa7,0xd8,0x15
1298         .byte   0xf1,0x04,0x98,0x4a,    0x41,0xec,0xda,0xf7
1299         .byte   0x7f,0xcd,0x50,0x0e,    0x17,0x91,0xf6,0x2f
1300         .byte   0x76,0x4d,0xd6,0x8d,    0x43,0xef,0xb0,0x4d
1301         .byte   0xcc,0xaa,0x4d,0x54,    0xe4,0x96,0x04,0xdf
1302         .byte   0x9e,0xd1,0xb5,0xe3,    0x4c,0x6a,0x88,0x1b
1303         .byte   0xc1,0x2c,0x1f,0xb8,    0x46,0x65,0x51,0x7f
1304         .byte   0x9d,0x5e,0xea,0x04,    0x01,0x8c,0x35,0x5d
1305         .byte   0xfa,0x87,0x74,0x73,    0xfb,0x0b,0x41,0x2e
1306         .byte   0xb3,0x67,0x1d,0x5a,    0x92,0xdb,0xd2,0x52
1307         .byte   0xe9,0x10,0x56,0x33,    0x6d,0xd6,0x47,0x13
1308         .byte   0x9a,0xd7,0x61,0x8c,    0x37,0xa1,0x0c,0x7a
1309         .byte   0x59,0xf8,0x14,0x8e,    0xeb,0x13,0x3c,0x89
1310         .byte   0xce,0xa9,0x27,0xee,    0xb7,0x61,0xc9,0x35
1311         .byte   0xe1,0x1c,0xe5,0xed,    0x7a,0x47,0xb1,0x3c
1312         .byte   0x9c,0xd2,0xdf,0x59,    0x55,0xf2,0x73,0x3f
1313         .byte   0x18,0x14,0xce,0x79,    0x73,0xc7,0x37,0xbf
1314         .byte   0x53,0xf7,0xcd,0xea,    0x5f,0xfd,0xaa,0x5b
1315         .byte   0xdf,0x3d,0x6f,0x14,    0x78,0x44,0xdb,0x86
1316         .byte   0xca,0xaf,0xf3,0x81,    0xb9,0x68,0xc4,0x3e
1317         .byte   0x38,0x24,0x34,0x2c,    0xc2,0xa3,0x40,0x5f
1318         .byte   0x16,0x1d,0xc3,0x72,    0xbc,0xe2,0x25,0x0c
1319         .byte   0x28,0x3c,0x49,0x8b,    0xff,0x0d,0x95,0x41
1320         .byte   0x39,0xa8,0x01,0x71,    0x08,0x0c,0xb3,0xde
1321         .byte   0xd8,0xb4,0xe4,0x9c,    0x64,0x56,0xc1,0x90
1322         .byte   0x7b,0xcb,0x84,0x61,    0xd5,0x32,0xb6,0x70
1323         .byte   0x48,0x6c,0x5c,0x74,    0xd0,0xb8,0x57,0x42
1324 AES_Td4:
1325         .byte   0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
1326         .byte   0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
1327         .byte   0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
1328         .byte   0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
1329         .byte   0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
1330         .byte   0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
1331         .byte   0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
1332         .byte   0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
1333         .byte   0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
1334         .byte   0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
1335         .byte   0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
1336         .byte   0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
1337         .byte   0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
1338         .byte   0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
1339         .byte   0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
1340         .byte   0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
1341         .byte   0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
1342         .byte   0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
1343         .byte   0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
1344         .byte   0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
1345         .byte   0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
1346         .byte   0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
1347         .byte   0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
1348         .byte   0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
1349         .byte   0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
1350         .byte   0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
1351         .byte   0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
1352         .byte   0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
1353         .byte   0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
1354         .byte   0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
1355         .byte   0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
1356         .byte   0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
1357         .cstring "AES for C64x+, CRYPTOGAMS by <appro\@openssl.org>"
1358         .align  4
1359 ___
1360
1361 print $code;