d70592e62957da433524fb23f83f79924f9a1cc3
[openssl.git] / crypto / aes / asm / aes-ia64.S
1 // Copyright 2004-2016 The OpenSSL Project Authors. All Rights Reserved.
2 //
3 // Licensed under the OpenSSL license (the "License").  You may not use
4 // this file except in compliance with the License.  You can obtain a copy
5 // in the file LICENSE in the source distribution or at
6 // https://www.openssl.org/source/license.html
7 //
8 // ====================================================================
9 // Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
10 // project. Rights for redistribution and usage in source and binary
11 // forms are granted according to the License.
12 // ====================================================================
13 //
14 // What's wrong with compiler generated code? Compiler never uses
15 // variable 'shr' which is pairable with 'extr'/'dep' instructions.
16 // Then it uses 'zxt' which is an I-type, but can be replaced with
17 // 'and' which in turn can be assigned to M-port [there're double as
18 // much M-ports as there're I-ports on Itanium 2]. By sacrificing few
19 // registers for small constants (255, 24 and 16) to be used with
20 // 'shr' and 'and' instructions I can achieve better ILP, Instruction
21 // Level Parallelism, and performance. This code outperforms GCC 3.3
22 // generated code by over factor of 2 (two), GCC 3.4 - by 70% and
23 // HP C - by 40%. Measured best-case scenario, i.e. aligned
24 // big-endian input, ECB timing on Itanium 2 is (18 + 13*rounds)
25 // ticks per block, or 9.25 CPU cycles per byte for 128 bit key.
26
27 // Version 1.2 mitigates the hazard of cache-timing attacks by
28 // a) compressing S-boxes from 8KB to 2KB+256B, b) scheduling
29 // references to S-boxes for L2 cache latency, c) prefetching T[ed]4
30 // prior last round. As result performance dropped to (26 + 15*rounds)
31 // ticks per block or 11 cycles per byte processed with 128-bit key.
32 // This is ~16% deterioration. For reference Itanium 2 L1 cache has
33 // 64 bytes line size and L2 - 128 bytes...
34
35 .ident  "aes-ia64.S, version 1.2"
36 .ident  "IA-64 ISA artwork by Andy Polyakov <appro@openssl.org>"
37 .explicit
38 .text
39
40 rk0=r8;     rk1=r9;
41
42 pfssave=r2;
43 lcsave=r10;
44 prsave=r3;
45 maskff=r11;
46 twenty4=r14;
47 sixteen=r15;
48
49 te00=r16;   te11=r17;   te22=r18;   te33=r19;
50 te01=r20;   te12=r21;   te23=r22;   te30=r23;
51 te02=r24;   te13=r25;   te20=r26;   te31=r27;
52 te03=r28;   te10=r29;   te21=r30;   te32=r31;
53
54 // these are rotating...
55 t0=r32;     s0=r33;
56 t1=r34;     s1=r35;
57 t2=r36;     s2=r37;
58 t3=r38;     s3=r39;
59
60 te0=r40;    te1=r41;    te2=r42;    te3=r43;
61
62 #if defined(_HPUX_SOURCE) && !defined(_LP64)
63 # define ADDP   addp4
64 #else
65 # define ADDP   add
66 #endif
67
68 // Offsets from Te0
69 #define TE0     0
70 #define TE2     2
71 #if defined(_HPUX_SOURCE) || defined(B_ENDIAN)
72 #define TE1     3
73 #define TE3     1
74 #else
75 #define TE1     1
76 #define TE3     3
77 #endif
78
79 // This implies that AES_KEY comprises 32-bit key schedule elements
80 // even on LP64 platforms.
81 #ifndef KSZ
82 # define KSZ    4
83 # define LDKEY  ld4
84 #endif
85
86 .proc   _ia64_AES_encrypt#
87 // Input:       rk0-rk1
88 //              te0
89 //              te3     as AES_KEY->rounds!!!
90 //              s0-s3
91 //              maskff,twenty4,sixteen
92 // Output:      r16,r20,r24,r28 as s0-s3
93 // Clobber:     r16-r31,rk0-rk1,r32-r43
94 .align  32
95 _ia64_AES_encrypt:
96         .prologue
97         .altrp  b6
98         .body
99 { .mmi; alloc   r16=ar.pfs,12,0,0,8
100         LDKEY   t0=[rk0],2*KSZ
101         mov     pr.rot=1<<16    }
102 { .mmi; LDKEY   t1=[rk1],2*KSZ
103         add     te1=TE1,te0
104         add     te3=-3,te3      };;
105 { .mib; LDKEY   t2=[rk0],2*KSZ
106         mov     ar.ec=2         }
107 { .mib; LDKEY   t3=[rk1],2*KSZ
108         add     te2=TE2,te0
109         brp.loop.imp    .Le_top,.Le_end-16      };;
110
111 { .mmi; xor     s0=s0,t0
112         xor     s1=s1,t1
113         mov     ar.lc=te3       }
114 { .mmi; xor     s2=s2,t2
115         xor     s3=s3,t3
116         add     te3=TE3,te0     };;
117 \f
118 .align  32
119 .Le_top:
120 { .mmi; (p0)    LDKEY   t0=[rk0],2*KSZ          // 0/0:rk[0]
121         (p0)    and     te33=s3,maskff          // 0/0:s3&0xff
122         (p0)    extr.u  te22=s2,8,8     }       // 0/0:s2>>8&0xff
123 { .mmi; (p0)    LDKEY   t1=[rk1],2*KSZ          // 0/1:rk[1]
124         (p0)    and     te30=s0,maskff          // 0/1:s0&0xff
125         (p0)    shr.u   te00=s0,twenty4 };;     // 0/0:s0>>24
126 { .mmi; (p0)    LDKEY   t2=[rk0],2*KSZ          // 1/2:rk[2]
127         (p0)    shladd  te33=te33,3,te3         // 1/0:te0+s0>>24
128         (p0)    extr.u  te23=s3,8,8     }       // 1/1:s3>>8&0xff
129 { .mmi; (p0)    LDKEY   t3=[rk1],2*KSZ          // 1/3:rk[3]
130         (p0)    shladd  te30=te30,3,te3         // 1/1:te3+s0
131         (p0)    shr.u   te01=s1,twenty4 };;     // 1/1:s1>>24
132 { .mmi; (p0)    ld4     te33=[te33]             // 2/0:te3[s3&0xff]
133         (p0)    shladd  te22=te22,3,te2         // 2/0:te2+s2>>8&0xff
134         (p0)    extr.u  te20=s0,8,8     }       // 2/2:s0>>8&0xff
135 { .mmi; (p0)    ld4     te30=[te30]             // 2/1:te3[s0]
136         (p0)    shladd  te23=te23,3,te2         // 2/1:te2+s3>>8
137         (p0)    shr.u   te02=s2,twenty4 };;     // 2/2:s2>>24
138 { .mmi; (p0)    ld4     te22=[te22]             // 3/0:te2[s2>>8]
139         (p0)    shladd  te20=te20,3,te2         // 3/2:te2+s0>>8
140         (p0)    extr.u  te21=s1,8,8     }       // 3/3:s1>>8&0xff
141 { .mmi; (p0)    ld4     te23=[te23]             // 3/1:te2[s3>>8]
142         (p0)    shladd  te00=te00,3,te0         // 3/0:te0+s0>>24
143         (p0)    shr.u   te03=s3,twenty4 };;     // 3/3:s3>>24
144 { .mmi; (p0)    ld4     te20=[te20]             // 4/2:te2[s0>>8]
145         (p0)    shladd  te21=te21,3,te2         // 4/3:te3+s2
146         (p0)    extr.u  te11=s1,16,8    }       // 4/0:s1>>16&0xff
147 { .mmi; (p0)    ld4     te00=[te00]             // 4/0:te0[s0>>24]
148         (p0)    shladd  te01=te01,3,te0         // 4/1:te0+s1>>24
149         (p0)    shr.u   te13=s3,sixteen };;     // 4/2:s3>>16
150 { .mmi; (p0)    ld4     te21=[te21]             // 5/3:te2[s1>>8]
151         (p0)    shladd  te11=te11,3,te1         // 5/0:te1+s1>>16
152         (p0)    extr.u  te12=s2,16,8    }       // 5/1:s2>>16&0xff
153 { .mmi; (p0)    ld4     te01=[te01]             // 5/1:te0[s1>>24]
154         (p0)    shladd  te02=te02,3,te0         // 5/2:te0+s2>>24
155         (p0)    and     te31=s1,maskff  };;     // 5/2:s1&0xff
156 { .mmi; (p0)    ld4     te11=[te11]             // 6/0:te1[s1>>16]
157         (p0)    shladd  te12=te12,3,te1         // 6/1:te1+s2>>16
158         (p0)    extr.u  te10=s0,16,8    }       // 6/3:s0>>16&0xff
159 { .mmi; (p0)    ld4     te02=[te02]             // 6/2:te0[s2>>24]
160         (p0)    shladd  te03=te03,3,te0         // 6/3:te1+s0>>16
161         (p0)    and     te32=s2,maskff  };;     // 6/3:s2&0xff
162
163 { .mmi; (p0)    ld4     te12=[te12]             // 7/1:te1[s2>>16]
164         (p0)    shladd  te31=te31,3,te3         // 7/2:te3+s1&0xff
165         (p0)    and     te13=te13,maskff}       // 7/2:s3>>16&0xff
166 { .mmi; (p0)    ld4     te03=[te03]             // 7/3:te0[s3>>24]
167         (p0)    shladd  te32=te32,3,te3         // 7/3:te3+s2
168         (p0)    xor     t0=t0,te33      };;     // 7/0:
169 { .mmi; (p0)    ld4     te31=[te31]             // 8/2:te3[s1]
170         (p0)    shladd  te13=te13,3,te1         // 8/2:te1+s3>>16
171         (p0)    xor     t0=t0,te22      }       // 8/0:
172 { .mmi; (p0)    ld4     te32=[te32]             // 8/3:te3[s2]
173         (p0)    shladd  te10=te10,3,te1         // 8/3:te1+s0>>16
174         (p0)    xor     t1=t1,te30      };;     // 8/1:
175 { .mmi; (p0)    ld4     te13=[te13]             // 9/2:te1[s3>>16]
176         (p0)    ld4     te10=[te10]             // 9/3:te1[s0>>16]
177         (p0)    xor     t0=t0,te00      };;     // 9/0:         !L2 scheduling
178 { .mmi; (p0)    xor     t1=t1,te23              // 10[9]/1:     
179         (p0)    xor     t2=t2,te20              // 10[9]/2:
180         (p0)    xor     t3=t3,te21      };;     // 10[9]/3:
181 { .mmi; (p0)    xor     t0=t0,te11              // 11[10]/0:done!
182         (p0)    xor     t1=t1,te01              // 11[10]/1:
183         (p0)    xor     t2=t2,te02      };;     // 11[10]/2:    !L2 scheduling
184 { .mmi; (p0)    xor     t3=t3,te03              // 12[10]/3:
185         (p16)   cmp.eq  p0,p17=r0,r0    };;     // 12[10]/clear (p17)
186 { .mmi; (p0)    xor     t1=t1,te12              // 13[11]/1:done!
187         (p0)    xor     t2=t2,te31              // 13[11]/2:
188         (p0)    xor     t3=t3,te32      }       // 13[11]/3:
189 { .mmi; (p17)   add     te0=2048,te0            // 13[11]/
190         (p17)   add     te1=2048+64-TE1,te1};;  // 13[11]/
191 { .mib; (p0)    xor     t2=t2,te13              // 14[12]/2:done!
192         (p17)   add     te2=2048+128-TE2,te2}   // 14[12]/
193 { .mib; (p0)    xor     t3=t3,te10              // 14[12]/3:done!
194         (p17)   add     te3=2048+192-TE3,te3    // 14[12]/
195         br.ctop.sptk    .Le_top         };;
196 .Le_end:
197 \f
198
199 { .mmi; ld8     te12=[te0]              // prefetch Te4
200         ld8     te31=[te1]      }
201 { .mmi; ld8     te10=[te2]
202         ld8     te32=[te3]      }
203
204 { .mmi; LDKEY   t0=[rk0],2*KSZ          // 0/0:rk[0]
205         and     te33=s3,maskff          // 0/0:s3&0xff
206         extr.u  te22=s2,8,8     }       // 0/0:s2>>8&0xff
207 { .mmi; LDKEY   t1=[rk1],2*KSZ          // 0/1:rk[1]
208         and     te30=s0,maskff          // 0/1:s0&0xff
209         shr.u   te00=s0,twenty4 };;     // 0/0:s0>>24
210 { .mmi; LDKEY   t2=[rk0],2*KSZ          // 1/2:rk[2]
211         add     te33=te33,te0           // 1/0:te0+s0>>24
212         extr.u  te23=s3,8,8     }       // 1/1:s3>>8&0xff
213 { .mmi; LDKEY   t3=[rk1],2*KSZ          // 1/3:rk[3]
214         add     te30=te30,te0           // 1/1:te0+s0
215         shr.u   te01=s1,twenty4 };;     // 1/1:s1>>24
216 { .mmi; ld1     te33=[te33]             // 2/0:te0[s3&0xff]
217         add     te22=te22,te0           // 2/0:te0+s2>>8&0xff
218         extr.u  te20=s0,8,8     }       // 2/2:s0>>8&0xff
219 { .mmi; ld1     te30=[te30]             // 2/1:te0[s0]
220         add     te23=te23,te0           // 2/1:te0+s3>>8
221         shr.u   te02=s2,twenty4 };;     // 2/2:s2>>24
222 { .mmi; ld1     te22=[te22]             // 3/0:te0[s2>>8]
223         add     te20=te20,te0           // 3/2:te0+s0>>8
224         extr.u  te21=s1,8,8     }       // 3/3:s1>>8&0xff
225 { .mmi; ld1     te23=[te23]             // 3/1:te0[s3>>8]
226         add     te00=te00,te0           // 3/0:te0+s0>>24
227         shr.u   te03=s3,twenty4 };;     // 3/3:s3>>24
228 { .mmi; ld1     te20=[te20]             // 4/2:te0[s0>>8]
229         add     te21=te21,te0           // 4/3:te0+s2
230         extr.u  te11=s1,16,8    }       // 4/0:s1>>16&0xff
231 { .mmi; ld1     te00=[te00]             // 4/0:te0[s0>>24]
232         add     te01=te01,te0           // 4/1:te0+s1>>24
233         shr.u   te13=s3,sixteen };;     // 4/2:s3>>16
234 { .mmi; ld1     te21=[te21]             // 5/3:te0[s1>>8]
235         add     te11=te11,te0           // 5/0:te0+s1>>16
236         extr.u  te12=s2,16,8    }       // 5/1:s2>>16&0xff
237 { .mmi; ld1     te01=[te01]             // 5/1:te0[s1>>24]
238         add     te02=te02,te0           // 5/2:te0+s2>>24
239         and     te31=s1,maskff  };;     // 5/2:s1&0xff
240 { .mmi; ld1     te11=[te11]             // 6/0:te0[s1>>16]
241         add     te12=te12,te0           // 6/1:te0+s2>>16
242         extr.u  te10=s0,16,8    }       // 6/3:s0>>16&0xff
243 { .mmi; ld1     te02=[te02]             // 6/2:te0[s2>>24]
244         add     te03=te03,te0           // 6/3:te0+s0>>16
245         and     te32=s2,maskff  };;     // 6/3:s2&0xff
246
247 { .mmi; ld1     te12=[te12]             // 7/1:te0[s2>>16]
248         add     te31=te31,te0           // 7/2:te0+s1&0xff
249         dep     te33=te22,te33,8,8}     // 7/0:
250 { .mmi; ld1     te03=[te03]             // 7/3:te0[s3>>24]
251         add     te32=te32,te0           // 7/3:te0+s2
252         and     te13=te13,maskff};;     // 7/2:s3>>16&0xff
253 { .mmi; ld1     te31=[te31]             // 8/2:te0[s1]
254         add     te13=te13,te0           // 8/2:te0+s3>>16
255         dep     te30=te23,te30,8,8}     // 8/1:
256 { .mmi; ld1     te32=[te32]             // 8/3:te0[s2]
257         add     te10=te10,te0           // 8/3:te0+s0>>16
258         shl     te00=te00,twenty4};;    // 8/0:
259 { .mii; ld1     te13=[te13]             // 9/2:te0[s3>>16]
260         dep     te33=te11,te33,16,8     // 9/0:
261         shl     te01=te01,twenty4};;    // 9/1:
262 { .mii; ld1     te10=[te10]             // 10/3:te0[s0>>16]
263         dep     te31=te20,te31,8,8      // 10/2:
264         shl     te02=te02,twenty4};;    // 10/2:
265 { .mii; xor     t0=t0,te33              // 11/0:
266         dep     te32=te21,te32,8,8      // 11/3:
267         shl     te12=te12,sixteen};;    // 11/1:
268 { .mii; xor     r16=t0,te00             // 12/0:done!
269         dep     te31=te13,te31,16,8     // 12/2:
270         shl     te03=te03,twenty4};;    // 12/3:
271 { .mmi; xor     t1=t1,te01              // 13/1:
272         xor     t2=t2,te02              // 13/2:
273         dep     te32=te10,te32,16,8};;  // 13/3:
274 { .mmi; xor     t1=t1,te30              // 14/1:
275         xor     r24=t2,te31             // 14/2:done!
276         xor     t3=t3,te32      };;     // 14/3:
277 { .mib; xor     r20=t1,te12             // 15/1:done!
278         xor     r28=t3,te03             // 15/3:done!
279         br.ret.sptk     b6      };;
280 .endp   _ia64_AES_encrypt#
281
282 // void AES_encrypt (const void *in,void *out,const AES_KEY *key);
283 .global AES_encrypt#
284 .proc   AES_encrypt#
285 .align  32
286 AES_encrypt:
287         .prologue
288         .save   ar.pfs,pfssave
289 { .mmi; alloc   pfssave=ar.pfs,3,1,12,0
290         and     out0=3,in0
291         mov     r3=ip                   }
292 { .mmi; ADDP    in0=0,in0
293         mov     loc0=psr.um
294         ADDP    out11=KSZ*60,in2        };;     // &AES_KEY->rounds
295
296 { .mmi; ld4     out11=[out11]                   // AES_KEY->rounds
297         add     out8=(AES_Te#-AES_encrypt#),r3  // Te0
298         .save   pr,prsave
299         mov     prsave=pr               }
300 { .mmi; rum     1<<3                            // clear um.ac
301         .save   ar.lc,lcsave
302         mov     lcsave=ar.lc            };;
303
304         .body
305 #if defined(_HPUX_SOURCE)       // HPUX is big-endian, cut 15+15 cycles...
306 { .mib; cmp.ne  p6,p0=out0,r0
307         add     out0=4,in0
308 (p6)    br.dpnt.many    .Le_i_unaligned };;
309
310 { .mmi; ld4     out1=[in0],8            // s0
311         and     out9=3,in1
312         mov     twenty4=24              }
313 { .mmi; ld4     out3=[out0],8           // s1
314         ADDP    rk0=0,in2
315         mov     sixteen=16              };;
316 { .mmi; ld4     out5=[in0]              // s2
317         cmp.ne  p6,p0=out9,r0
318         mov     maskff=0xff             }
319 { .mmb; ld4     out7=[out0]             // s3
320         ADDP    rk1=KSZ,in2
321         br.call.sptk.many       b6=_ia64_AES_encrypt    };;
322
323 { .mib; ADDP    in0=4,in1
324         ADDP    in1=0,in1
325 (p6)    br.spnt .Le_o_unaligned         };;
326
327 { .mii; mov     psr.um=loc0
328         mov     ar.pfs=pfssave
329         mov     ar.lc=lcsave            };;
330 { .mmi; st4     [in1]=r16,8             // s0
331         st4     [in0]=r20,8             // s1
332         mov     pr=prsave,0x1ffff       };;
333 { .mmb; st4     [in1]=r24               // s2
334         st4     [in0]=r28               // s3
335         br.ret.sptk.many        b0      };;
336 #endif
337
338 .align  32
339 .Le_i_unaligned:
340 { .mmi; add     out0=1,in0
341         add     out2=2,in0
342         add     out4=3,in0      };;
343 { .mmi; ld1     r16=[in0],4
344         ld1     r17=[out0],4    }//;;
345 { .mmi; ld1     r18=[out2],4
346         ld1     out1=[out4],4   };;     // s0
347 { .mmi; ld1     r20=[in0],4
348         ld1     r21=[out0],4    }//;;
349 { .mmi; ld1     r22=[out2],4
350         ld1     out3=[out4],4   };;     // s1
351 { .mmi; ld1     r24=[in0],4
352         ld1     r25=[out0],4    }//;;
353 { .mmi; ld1     r26=[out2],4
354         ld1     out5=[out4],4   };;     // s2
355 { .mmi; ld1     r28=[in0]
356         ld1     r29=[out0]      }//;;
357 { .mmi; ld1     r30=[out2]
358         ld1     out7=[out4]     };;     // s3
359
360 { .mii;
361         dep     out1=r16,out1,24,8      //;;
362         dep     out3=r20,out3,24,8      }//;;
363 { .mii; ADDP    rk0=0,in2
364         dep     out5=r24,out5,24,8      //;;
365         dep     out7=r28,out7,24,8      };;
366 { .mii; ADDP    rk1=KSZ,in2
367         dep     out1=r17,out1,16,8      //;;
368         dep     out3=r21,out3,16,8      }//;;
369 { .mii; mov     twenty4=24
370         dep     out5=r25,out5,16,8      //;;
371         dep     out7=r29,out7,16,8      };;
372 { .mii; mov     sixteen=16
373         dep     out1=r18,out1,8,8       //;;
374         dep     out3=r22,out3,8,8       }//;;
375 { .mii; mov     maskff=0xff
376         dep     out5=r26,out5,8,8       //;;
377         dep     out7=r30,out7,8,8       };;
378
379 { .mib; br.call.sptk.many       b6=_ia64_AES_encrypt    };;
380
381 .Le_o_unaligned:
382 { .mii; ADDP    out0=0,in1
383         extr.u  r17=r16,8,8                     // s0
384         shr.u   r19=r16,twenty4         }//;;
385 { .mii; ADDP    out1=1,in1
386         extr.u  r18=r16,16,8
387         shr.u   r23=r20,twenty4         }//;;   // s1
388 { .mii; ADDP    out2=2,in1
389         extr.u  r21=r20,8,8
390         shr.u   r22=r20,sixteen         }//;;
391 { .mii; ADDP    out3=3,in1
392         extr.u  r25=r24,8,8                     // s2
393         shr.u   r27=r24,twenty4         };;
394 { .mii; st1     [out3]=r16,4
395         extr.u  r26=r24,16,8
396         shr.u   r31=r28,twenty4         }//;;   // s3
397 { .mii; st1     [out2]=r17,4
398         extr.u  r29=r28,8,8
399         shr.u   r30=r28,sixteen         }//;;
400
401 { .mmi; st1     [out1]=r18,4
402         st1     [out0]=r19,4            };;
403 { .mmi; st1     [out3]=r20,4
404         st1     [out2]=r21,4            }//;;
405 { .mmi; st1     [out1]=r22,4
406         st1     [out0]=r23,4            };;
407 { .mmi; st1     [out3]=r24,4
408         st1     [out2]=r25,4
409         mov     pr=prsave,0x1ffff       }//;;
410 { .mmi; st1     [out1]=r26,4
411         st1     [out0]=r27,4
412         mov     ar.pfs=pfssave          };;
413 { .mmi; st1     [out3]=r28
414         st1     [out2]=r29
415         mov     ar.lc=lcsave            }//;;
416 { .mmi; st1     [out1]=r30
417         st1     [out0]=r31              }
418 { .mfb; mov     psr.um=loc0                     // restore user mask
419         br.ret.sptk.many        b0      };;
420 .endp   AES_encrypt#
421
422 // *AES_decrypt are autogenerated by the following script:
423 #if 0
424 #!/usr/bin/env perl
425 print "// *AES_decrypt are autogenerated by the following script:\n#if 0\n";
426 open(PROG,'<'.$0); while(<PROG>) { print; } close(PROG);
427 print "#endif\n";
428 while(<>) {
429         $process=1      if (/\.proc\s+_ia64_AES_encrypt/);
430         next            if (!$process);
431
432         #s/te00=s0/td00=s0/;    s/te00/td00/g;
433         s/te11=s1/td13=s3/;     s/te11/td13/g;
434         #s/te22=s2/td22=s2/;    s/te22/td22/g;
435         s/te33=s3/td31=s1/;     s/te33/td31/g;
436
437         #s/te01=s1/td01=s1/;    s/te01/td01/g;
438         s/te12=s2/td10=s0/;     s/te12/td10/g;
439         #s/te23=s3/td23=s3/;    s/te23/td23/g;
440         s/te30=s0/td32=s2/;     s/te30/td32/g;
441
442         #s/te02=s2/td02=s2/;    s/te02/td02/g;
443         s/te13=s3/td11=s1/;     s/te13/td11/g;
444         #s/te20=s0/td20=s0/;    s/te20/td20/g;
445         s/te31=s1/td33=s3/;     s/te31/td33/g;
446
447         #s/te03=s3/td03=s3/;    s/te03/td03/g;
448         s/te10=s0/td12=s2/;     s/te10/td12/g;
449         #s/te21=s1/td21=s1/;    s/te21/td21/g;
450         s/te32=s2/td30=s0/;     s/te32/td30/g;
451
452         s/td/te/g;
453
454         s/AES_encrypt/AES_decrypt/g;
455         s/\.Le_/.Ld_/g;
456         s/AES_Te#/AES_Td#/g;
457
458         print;
459
460         exit            if (/\.endp\s+AES_decrypt/);
461 }
462 #endif
463 .proc   _ia64_AES_decrypt#
464 // Input:       rk0-rk1
465 //              te0
466 //              te3     as AES_KEY->rounds!!!
467 //              s0-s3
468 //              maskff,twenty4,sixteen
469 // Output:      r16,r20,r24,r28 as s0-s3
470 // Clobber:     r16-r31,rk0-rk1,r32-r43
471 .align  32
472 _ia64_AES_decrypt:
473         .prologue
474         .altrp  b6
475         .body
476 { .mmi; alloc   r16=ar.pfs,12,0,0,8
477         LDKEY   t0=[rk0],2*KSZ
478         mov     pr.rot=1<<16    }
479 { .mmi; LDKEY   t1=[rk1],2*KSZ
480         add     te1=TE1,te0
481         add     te3=-3,te3      };;
482 { .mib; LDKEY   t2=[rk0],2*KSZ
483         mov     ar.ec=2         }
484 { .mib; LDKEY   t3=[rk1],2*KSZ
485         add     te2=TE2,te0
486         brp.loop.imp    .Ld_top,.Ld_end-16      };;
487
488 { .mmi; xor     s0=s0,t0
489         xor     s1=s1,t1
490         mov     ar.lc=te3       }
491 { .mmi; xor     s2=s2,t2
492         xor     s3=s3,t3
493         add     te3=TE3,te0     };;
494 \f
495 .align  32
496 .Ld_top:
497 { .mmi; (p0)    LDKEY   t0=[rk0],2*KSZ          // 0/0:rk[0]
498         (p0)    and     te31=s1,maskff          // 0/0:s3&0xff
499         (p0)    extr.u  te22=s2,8,8     }       // 0/0:s2>>8&0xff
500 { .mmi; (p0)    LDKEY   t1=[rk1],2*KSZ          // 0/1:rk[1]
501         (p0)    and     te32=s2,maskff          // 0/1:s0&0xff
502         (p0)    shr.u   te00=s0,twenty4 };;     // 0/0:s0>>24
503 { .mmi; (p0)    LDKEY   t2=[rk0],2*KSZ          // 1/2:rk[2]
504         (p0)    shladd  te31=te31,3,te3         // 1/0:te0+s0>>24
505         (p0)    extr.u  te23=s3,8,8     }       // 1/1:s3>>8&0xff
506 { .mmi; (p0)    LDKEY   t3=[rk1],2*KSZ          // 1/3:rk[3]
507         (p0)    shladd  te32=te32,3,te3         // 1/1:te3+s0
508         (p0)    shr.u   te01=s1,twenty4 };;     // 1/1:s1>>24
509 { .mmi; (p0)    ld4     te31=[te31]             // 2/0:te3[s3&0xff]
510         (p0)    shladd  te22=te22,3,te2         // 2/0:te2+s2>>8&0xff
511         (p0)    extr.u  te20=s0,8,8     }       // 2/2:s0>>8&0xff
512 { .mmi; (p0)    ld4     te32=[te32]             // 2/1:te3[s0]
513         (p0)    shladd  te23=te23,3,te2         // 2/1:te2+s3>>8
514         (p0)    shr.u   te02=s2,twenty4 };;     // 2/2:s2>>24
515 { .mmi; (p0)    ld4     te22=[te22]             // 3/0:te2[s2>>8]
516         (p0)    shladd  te20=te20,3,te2         // 3/2:te2+s0>>8
517         (p0)    extr.u  te21=s1,8,8     }       // 3/3:s1>>8&0xff
518 { .mmi; (p0)    ld4     te23=[te23]             // 3/1:te2[s3>>8]
519         (p0)    shladd  te00=te00,3,te0         // 3/0:te0+s0>>24
520         (p0)    shr.u   te03=s3,twenty4 };;     // 3/3:s3>>24
521 { .mmi; (p0)    ld4     te20=[te20]             // 4/2:te2[s0>>8]
522         (p0)    shladd  te21=te21,3,te2         // 4/3:te3+s2
523         (p0)    extr.u  te13=s3,16,8    }       // 4/0:s1>>16&0xff
524 { .mmi; (p0)    ld4     te00=[te00]             // 4/0:te0[s0>>24]
525         (p0)    shladd  te01=te01,3,te0         // 4/1:te0+s1>>24
526         (p0)    shr.u   te11=s1,sixteen };;     // 4/2:s3>>16
527 { .mmi; (p0)    ld4     te21=[te21]             // 5/3:te2[s1>>8]
528         (p0)    shladd  te13=te13,3,te1         // 5/0:te1+s1>>16
529         (p0)    extr.u  te10=s0,16,8    }       // 5/1:s2>>16&0xff
530 { .mmi; (p0)    ld4     te01=[te01]             // 5/1:te0[s1>>24]
531         (p0)    shladd  te02=te02,3,te0         // 5/2:te0+s2>>24
532         (p0)    and     te33=s3,maskff  };;     // 5/2:s1&0xff
533 { .mmi; (p0)    ld4     te13=[te13]             // 6/0:te1[s1>>16]
534         (p0)    shladd  te10=te10,3,te1         // 6/1:te1+s2>>16
535         (p0)    extr.u  te12=s2,16,8    }       // 6/3:s0>>16&0xff
536 { .mmi; (p0)    ld4     te02=[te02]             // 6/2:te0[s2>>24]
537         (p0)    shladd  te03=te03,3,te0         // 6/3:te1+s0>>16
538         (p0)    and     te30=s0,maskff  };;     // 6/3:s2&0xff
539
540 { .mmi; (p0)    ld4     te10=[te10]             // 7/1:te1[s2>>16]
541         (p0)    shladd  te33=te33,3,te3         // 7/2:te3+s1&0xff
542         (p0)    and     te11=te11,maskff}       // 7/2:s3>>16&0xff
543 { .mmi; (p0)    ld4     te03=[te03]             // 7/3:te0[s3>>24]
544         (p0)    shladd  te30=te30,3,te3         // 7/3:te3+s2
545         (p0)    xor     t0=t0,te31      };;     // 7/0:
546 { .mmi; (p0)    ld4     te33=[te33]             // 8/2:te3[s1]
547         (p0)    shladd  te11=te11,3,te1         // 8/2:te1+s3>>16
548         (p0)    xor     t0=t0,te22      }       // 8/0:
549 { .mmi; (p0)    ld4     te30=[te30]             // 8/3:te3[s2]
550         (p0)    shladd  te12=te12,3,te1         // 8/3:te1+s0>>16
551         (p0)    xor     t1=t1,te32      };;     // 8/1:
552 { .mmi; (p0)    ld4     te11=[te11]             // 9/2:te1[s3>>16]
553         (p0)    ld4     te12=[te12]             // 9/3:te1[s0>>16]
554         (p0)    xor     t0=t0,te00      };;     // 9/0:         !L2 scheduling
555 { .mmi; (p0)    xor     t1=t1,te23              // 10[9]/1:     
556         (p0)    xor     t2=t2,te20              // 10[9]/2:
557         (p0)    xor     t3=t3,te21      };;     // 10[9]/3:
558 { .mmi; (p0)    xor     t0=t0,te13              // 11[10]/0:done!
559         (p0)    xor     t1=t1,te01              // 11[10]/1:
560         (p0)    xor     t2=t2,te02      };;     // 11[10]/2:    !L2 scheduling
561 { .mmi; (p0)    xor     t3=t3,te03              // 12[10]/3:
562         (p16)   cmp.eq  p0,p17=r0,r0    };;     // 12[10]/clear (p17)
563 { .mmi; (p0)    xor     t1=t1,te10              // 13[11]/1:done!
564         (p0)    xor     t2=t2,te33              // 13[11]/2:
565         (p0)    xor     t3=t3,te30      }       // 13[11]/3:
566 { .mmi; (p17)   add     te0=2048,te0            // 13[11]/
567         (p17)   add     te1=2048+64-TE1,te1};;  // 13[11]/
568 { .mib; (p0)    xor     t2=t2,te11              // 14[12]/2:done!
569         (p17)   add     te2=2048+128-TE2,te2}   // 14[12]/
570 { .mib; (p0)    xor     t3=t3,te12              // 14[12]/3:done!
571         (p17)   add     te3=2048+192-TE3,te3    // 14[12]/
572         br.ctop.sptk    .Ld_top         };;
573 .Ld_end:
574 \f
575
576 { .mmi; ld8     te10=[te0]              // prefetch Td4
577         ld8     te33=[te1]      }
578 { .mmi; ld8     te12=[te2]
579         ld8     te30=[te3]      }
580
581 { .mmi; LDKEY   t0=[rk0],2*KSZ          // 0/0:rk[0]
582         and     te31=s1,maskff          // 0/0:s3&0xff
583         extr.u  te22=s2,8,8     }       // 0/0:s2>>8&0xff
584 { .mmi; LDKEY   t1=[rk1],2*KSZ          // 0/1:rk[1]
585         and     te32=s2,maskff          // 0/1:s0&0xff
586         shr.u   te00=s0,twenty4 };;     // 0/0:s0>>24
587 { .mmi; LDKEY   t2=[rk0],2*KSZ          // 1/2:rk[2]
588         add     te31=te31,te0           // 1/0:te0+s0>>24
589         extr.u  te23=s3,8,8     }       // 1/1:s3>>8&0xff
590 { .mmi; LDKEY   t3=[rk1],2*KSZ          // 1/3:rk[3]
591         add     te32=te32,te0           // 1/1:te0+s0
592         shr.u   te01=s1,twenty4 };;     // 1/1:s1>>24
593 { .mmi; ld1     te31=[te31]             // 2/0:te0[s3&0xff]
594         add     te22=te22,te0           // 2/0:te0+s2>>8&0xff
595         extr.u  te20=s0,8,8     }       // 2/2:s0>>8&0xff
596 { .mmi; ld1     te32=[te32]             // 2/1:te0[s0]
597         add     te23=te23,te0           // 2/1:te0+s3>>8
598         shr.u   te02=s2,twenty4 };;     // 2/2:s2>>24
599 { .mmi; ld1     te22=[te22]             // 3/0:te0[s2>>8]
600         add     te20=te20,te0           // 3/2:te0+s0>>8
601         extr.u  te21=s1,8,8     }       // 3/3:s1>>8&0xff
602 { .mmi; ld1     te23=[te23]             // 3/1:te0[s3>>8]
603         add     te00=te00,te0           // 3/0:te0+s0>>24
604         shr.u   te03=s3,twenty4 };;     // 3/3:s3>>24
605 { .mmi; ld1     te20=[te20]             // 4/2:te0[s0>>8]
606         add     te21=te21,te0           // 4/3:te0+s2
607         extr.u  te13=s3,16,8    }       // 4/0:s1>>16&0xff
608 { .mmi; ld1     te00=[te00]             // 4/0:te0[s0>>24]
609         add     te01=te01,te0           // 4/1:te0+s1>>24
610         shr.u   te11=s1,sixteen };;     // 4/2:s3>>16
611 { .mmi; ld1     te21=[te21]             // 5/3:te0[s1>>8]
612         add     te13=te13,te0           // 5/0:te0+s1>>16
613         extr.u  te10=s0,16,8    }       // 5/1:s2>>16&0xff
614 { .mmi; ld1     te01=[te01]             // 5/1:te0[s1>>24]
615         add     te02=te02,te0           // 5/2:te0+s2>>24
616         and     te33=s3,maskff  };;     // 5/2:s1&0xff
617 { .mmi; ld1     te13=[te13]             // 6/0:te0[s1>>16]
618         add     te10=te10,te0           // 6/1:te0+s2>>16
619         extr.u  te12=s2,16,8    }       // 6/3:s0>>16&0xff
620 { .mmi; ld1     te02=[te02]             // 6/2:te0[s2>>24]
621         add     te03=te03,te0           // 6/3:te0+s0>>16
622         and     te30=s0,maskff  };;     // 6/3:s2&0xff
623
624 { .mmi; ld1     te10=[te10]             // 7/1:te0[s2>>16]
625         add     te33=te33,te0           // 7/2:te0+s1&0xff
626         dep     te31=te22,te31,8,8}     // 7/0:
627 { .mmi; ld1     te03=[te03]             // 7/3:te0[s3>>24]
628         add     te30=te30,te0           // 7/3:te0+s2
629         and     te11=te11,maskff};;     // 7/2:s3>>16&0xff
630 { .mmi; ld1     te33=[te33]             // 8/2:te0[s1]
631         add     te11=te11,te0           // 8/2:te0+s3>>16
632         dep     te32=te23,te32,8,8}     // 8/1:
633 { .mmi; ld1     te30=[te30]             // 8/3:te0[s2]
634         add     te12=te12,te0           // 8/3:te0+s0>>16
635         shl     te00=te00,twenty4};;    // 8/0:
636 { .mii; ld1     te11=[te11]             // 9/2:te0[s3>>16]
637         dep     te31=te13,te31,16,8     // 9/0:
638         shl     te01=te01,twenty4};;    // 9/1:
639 { .mii; ld1     te12=[te12]             // 10/3:te0[s0>>16]
640         dep     te33=te20,te33,8,8      // 10/2:
641         shl     te02=te02,twenty4};;    // 10/2:
642 { .mii; xor     t0=t0,te31              // 11/0:
643         dep     te30=te21,te30,8,8      // 11/3:
644         shl     te10=te10,sixteen};;    // 11/1:
645 { .mii; xor     r16=t0,te00             // 12/0:done!
646         dep     te33=te11,te33,16,8     // 12/2:
647         shl     te03=te03,twenty4};;    // 12/3:
648 { .mmi; xor     t1=t1,te01              // 13/1:
649         xor     t2=t2,te02              // 13/2:
650         dep     te30=te12,te30,16,8};;  // 13/3:
651 { .mmi; xor     t1=t1,te32              // 14/1:
652         xor     r24=t2,te33             // 14/2:done!
653         xor     t3=t3,te30      };;     // 14/3:
654 { .mib; xor     r20=t1,te10             // 15/1:done!
655         xor     r28=t3,te03             // 15/3:done!
656         br.ret.sptk     b6      };;
657 .endp   _ia64_AES_decrypt#
658
659 // void AES_decrypt (const void *in,void *out,const AES_KEY *key);
660 .global AES_decrypt#
661 .proc   AES_decrypt#
662 .align  32
663 AES_decrypt:
664         .prologue
665         .save   ar.pfs,pfssave
666 { .mmi; alloc   pfssave=ar.pfs,3,1,12,0
667         and     out0=3,in0
668         mov     r3=ip                   }
669 { .mmi; ADDP    in0=0,in0
670         mov     loc0=psr.um
671         ADDP    out11=KSZ*60,in2        };;     // &AES_KEY->rounds
672
673 { .mmi; ld4     out11=[out11]                   // AES_KEY->rounds
674         add     out8=(AES_Td#-AES_decrypt#),r3  // Te0
675         .save   pr,prsave
676         mov     prsave=pr               }
677 { .mmi; rum     1<<3                            // clear um.ac
678         .save   ar.lc,lcsave
679         mov     lcsave=ar.lc            };;
680
681         .body
682 #if defined(_HPUX_SOURCE)       // HPUX is big-endian, cut 15+15 cycles...
683 { .mib; cmp.ne  p6,p0=out0,r0
684         add     out0=4,in0
685 (p6)    br.dpnt.many    .Ld_i_unaligned };;
686
687 { .mmi; ld4     out1=[in0],8            // s0
688         and     out9=3,in1
689         mov     twenty4=24              }
690 { .mmi; ld4     out3=[out0],8           // s1
691         ADDP    rk0=0,in2
692         mov     sixteen=16              };;
693 { .mmi; ld4     out5=[in0]              // s2
694         cmp.ne  p6,p0=out9,r0
695         mov     maskff=0xff             }
696 { .mmb; ld4     out7=[out0]             // s3
697         ADDP    rk1=KSZ,in2
698         br.call.sptk.many       b6=_ia64_AES_decrypt    };;
699
700 { .mib; ADDP    in0=4,in1
701         ADDP    in1=0,in1
702 (p6)    br.spnt .Ld_o_unaligned         };;
703
704 { .mii; mov     psr.um=loc0
705         mov     ar.pfs=pfssave
706         mov     ar.lc=lcsave            };;
707 { .mmi; st4     [in1]=r16,8             // s0
708         st4     [in0]=r20,8             // s1
709         mov     pr=prsave,0x1ffff       };;
710 { .mmb; st4     [in1]=r24               // s2
711         st4     [in0]=r28               // s3
712         br.ret.sptk.many        b0      };;
713 #endif
714
715 .align  32
716 .Ld_i_unaligned:
717 { .mmi; add     out0=1,in0
718         add     out2=2,in0
719         add     out4=3,in0      };;
720 { .mmi; ld1     r16=[in0],4
721         ld1     r17=[out0],4    }//;;
722 { .mmi; ld1     r18=[out2],4
723         ld1     out1=[out4],4   };;     // s0
724 { .mmi; ld1     r20=[in0],4
725         ld1     r21=[out0],4    }//;;
726 { .mmi; ld1     r22=[out2],4
727         ld1     out3=[out4],4   };;     // s1
728 { .mmi; ld1     r24=[in0],4
729         ld1     r25=[out0],4    }//;;
730 { .mmi; ld1     r26=[out2],4
731         ld1     out5=[out4],4   };;     // s2
732 { .mmi; ld1     r28=[in0]
733         ld1     r29=[out0]      }//;;
734 { .mmi; ld1     r30=[out2]
735         ld1     out7=[out4]     };;     // s3
736
737 { .mii;
738         dep     out1=r16,out1,24,8      //;;
739         dep     out3=r20,out3,24,8      }//;;
740 { .mii; ADDP    rk0=0,in2
741         dep     out5=r24,out5,24,8      //;;
742         dep     out7=r28,out7,24,8      };;
743 { .mii; ADDP    rk1=KSZ,in2
744         dep     out1=r17,out1,16,8      //;;
745         dep     out3=r21,out3,16,8      }//;;
746 { .mii; mov     twenty4=24
747         dep     out5=r25,out5,16,8      //;;
748         dep     out7=r29,out7,16,8      };;
749 { .mii; mov     sixteen=16
750         dep     out1=r18,out1,8,8       //;;
751         dep     out3=r22,out3,8,8       }//;;
752 { .mii; mov     maskff=0xff
753         dep     out5=r26,out5,8,8       //;;
754         dep     out7=r30,out7,8,8       };;
755
756 { .mib; br.call.sptk.many       b6=_ia64_AES_decrypt    };;
757
758 .Ld_o_unaligned:
759 { .mii; ADDP    out0=0,in1
760         extr.u  r17=r16,8,8                     // s0
761         shr.u   r19=r16,twenty4         }//;;
762 { .mii; ADDP    out1=1,in1
763         extr.u  r18=r16,16,8
764         shr.u   r23=r20,twenty4         }//;;   // s1
765 { .mii; ADDP    out2=2,in1
766         extr.u  r21=r20,8,8
767         shr.u   r22=r20,sixteen         }//;;
768 { .mii; ADDP    out3=3,in1
769         extr.u  r25=r24,8,8                     // s2
770         shr.u   r27=r24,twenty4         };;
771 { .mii; st1     [out3]=r16,4
772         extr.u  r26=r24,16,8
773         shr.u   r31=r28,twenty4         }//;;   // s3
774 { .mii; st1     [out2]=r17,4
775         extr.u  r29=r28,8,8
776         shr.u   r30=r28,sixteen         }//;;
777
778 { .mmi; st1     [out1]=r18,4
779         st1     [out0]=r19,4            };;
780 { .mmi; st1     [out3]=r20,4
781         st1     [out2]=r21,4            }//;;
782 { .mmi; st1     [out1]=r22,4
783         st1     [out0]=r23,4            };;
784 { .mmi; st1     [out3]=r24,4
785         st1     [out2]=r25,4
786         mov     pr=prsave,0x1ffff       }//;;
787 { .mmi; st1     [out1]=r26,4
788         st1     [out0]=r27,4
789         mov     ar.pfs=pfssave          };;
790 { .mmi; st1     [out3]=r28
791         st1     [out2]=r29
792         mov     ar.lc=lcsave            }//;;
793 { .mmi; st1     [out1]=r30
794         st1     [out0]=r31              }
795 { .mfb; mov     psr.um=loc0                     // restore user mask
796         br.ret.sptk.many        b0      };;
797 .endp   AES_decrypt#
798
799 // leave it in .text segment...
800 .align  64
801 .global AES_Te#
802 .type   AES_Te#,@object
803 AES_Te: data4   0xc66363a5,0xc66363a5, 0xf87c7c84,0xf87c7c84
804         data4   0xee777799,0xee777799, 0xf67b7b8d,0xf67b7b8d
805         data4   0xfff2f20d,0xfff2f20d, 0xd66b6bbd,0xd66b6bbd
806         data4   0xde6f6fb1,0xde6f6fb1, 0x91c5c554,0x91c5c554
807         data4   0x60303050,0x60303050, 0x02010103,0x02010103
808         data4   0xce6767a9,0xce6767a9, 0x562b2b7d,0x562b2b7d
809         data4   0xe7fefe19,0xe7fefe19, 0xb5d7d762,0xb5d7d762
810         data4   0x4dababe6,0x4dababe6, 0xec76769a,0xec76769a
811         data4   0x8fcaca45,0x8fcaca45, 0x1f82829d,0x1f82829d
812         data4   0x89c9c940,0x89c9c940, 0xfa7d7d87,0xfa7d7d87
813         data4   0xeffafa15,0xeffafa15, 0xb25959eb,0xb25959eb
814         data4   0x8e4747c9,0x8e4747c9, 0xfbf0f00b,0xfbf0f00b
815         data4   0x41adadec,0x41adadec, 0xb3d4d467,0xb3d4d467
816         data4   0x5fa2a2fd,0x5fa2a2fd, 0x45afafea,0x45afafea
817         data4   0x239c9cbf,0x239c9cbf, 0x53a4a4f7,0x53a4a4f7
818         data4   0xe4727296,0xe4727296, 0x9bc0c05b,0x9bc0c05b
819         data4   0x75b7b7c2,0x75b7b7c2, 0xe1fdfd1c,0xe1fdfd1c
820         data4   0x3d9393ae,0x3d9393ae, 0x4c26266a,0x4c26266a
821         data4   0x6c36365a,0x6c36365a, 0x7e3f3f41,0x7e3f3f41
822         data4   0xf5f7f702,0xf5f7f702, 0x83cccc4f,0x83cccc4f
823         data4   0x6834345c,0x6834345c, 0x51a5a5f4,0x51a5a5f4
824         data4   0xd1e5e534,0xd1e5e534, 0xf9f1f108,0xf9f1f108
825         data4   0xe2717193,0xe2717193, 0xabd8d873,0xabd8d873
826         data4   0x62313153,0x62313153, 0x2a15153f,0x2a15153f
827         data4   0x0804040c,0x0804040c, 0x95c7c752,0x95c7c752
828         data4   0x46232365,0x46232365, 0x9dc3c35e,0x9dc3c35e
829         data4   0x30181828,0x30181828, 0x379696a1,0x379696a1
830         data4   0x0a05050f,0x0a05050f, 0x2f9a9ab5,0x2f9a9ab5
831         data4   0x0e070709,0x0e070709, 0x24121236,0x24121236
832         data4   0x1b80809b,0x1b80809b, 0xdfe2e23d,0xdfe2e23d
833         data4   0xcdebeb26,0xcdebeb26, 0x4e272769,0x4e272769
834         data4   0x7fb2b2cd,0x7fb2b2cd, 0xea75759f,0xea75759f
835         data4   0x1209091b,0x1209091b, 0x1d83839e,0x1d83839e
836         data4   0x582c2c74,0x582c2c74, 0x341a1a2e,0x341a1a2e
837         data4   0x361b1b2d,0x361b1b2d, 0xdc6e6eb2,0xdc6e6eb2
838         data4   0xb45a5aee,0xb45a5aee, 0x5ba0a0fb,0x5ba0a0fb
839         data4   0xa45252f6,0xa45252f6, 0x763b3b4d,0x763b3b4d
840         data4   0xb7d6d661,0xb7d6d661, 0x7db3b3ce,0x7db3b3ce
841         data4   0x5229297b,0x5229297b, 0xdde3e33e,0xdde3e33e
842         data4   0x5e2f2f71,0x5e2f2f71, 0x13848497,0x13848497
843         data4   0xa65353f5,0xa65353f5, 0xb9d1d168,0xb9d1d168
844         data4   0x00000000,0x00000000, 0xc1eded2c,0xc1eded2c
845         data4   0x40202060,0x40202060, 0xe3fcfc1f,0xe3fcfc1f
846         data4   0x79b1b1c8,0x79b1b1c8, 0xb65b5bed,0xb65b5bed
847         data4   0xd46a6abe,0xd46a6abe, 0x8dcbcb46,0x8dcbcb46
848         data4   0x67bebed9,0x67bebed9, 0x7239394b,0x7239394b
849         data4   0x944a4ade,0x944a4ade, 0x984c4cd4,0x984c4cd4
850         data4   0xb05858e8,0xb05858e8, 0x85cfcf4a,0x85cfcf4a
851         data4   0xbbd0d06b,0xbbd0d06b, 0xc5efef2a,0xc5efef2a
852         data4   0x4faaaae5,0x4faaaae5, 0xedfbfb16,0xedfbfb16
853         data4   0x864343c5,0x864343c5, 0x9a4d4dd7,0x9a4d4dd7
854         data4   0x66333355,0x66333355, 0x11858594,0x11858594
855         data4   0x8a4545cf,0x8a4545cf, 0xe9f9f910,0xe9f9f910
856         data4   0x04020206,0x04020206, 0xfe7f7f81,0xfe7f7f81
857         data4   0xa05050f0,0xa05050f0, 0x783c3c44,0x783c3c44
858         data4   0x259f9fba,0x259f9fba, 0x4ba8a8e3,0x4ba8a8e3
859         data4   0xa25151f3,0xa25151f3, 0x5da3a3fe,0x5da3a3fe
860         data4   0x804040c0,0x804040c0, 0x058f8f8a,0x058f8f8a
861         data4   0x3f9292ad,0x3f9292ad, 0x219d9dbc,0x219d9dbc
862         data4   0x70383848,0x70383848, 0xf1f5f504,0xf1f5f504
863         data4   0x63bcbcdf,0x63bcbcdf, 0x77b6b6c1,0x77b6b6c1
864         data4   0xafdada75,0xafdada75, 0x42212163,0x42212163
865         data4   0x20101030,0x20101030, 0xe5ffff1a,0xe5ffff1a
866         data4   0xfdf3f30e,0xfdf3f30e, 0xbfd2d26d,0xbfd2d26d
867         data4   0x81cdcd4c,0x81cdcd4c, 0x180c0c14,0x180c0c14
868         data4   0x26131335,0x26131335, 0xc3ecec2f,0xc3ecec2f
869         data4   0xbe5f5fe1,0xbe5f5fe1, 0x359797a2,0x359797a2
870         data4   0x884444cc,0x884444cc, 0x2e171739,0x2e171739
871         data4   0x93c4c457,0x93c4c457, 0x55a7a7f2,0x55a7a7f2
872         data4   0xfc7e7e82,0xfc7e7e82, 0x7a3d3d47,0x7a3d3d47
873         data4   0xc86464ac,0xc86464ac, 0xba5d5de7,0xba5d5de7
874         data4   0x3219192b,0x3219192b, 0xe6737395,0xe6737395
875         data4   0xc06060a0,0xc06060a0, 0x19818198,0x19818198
876         data4   0x9e4f4fd1,0x9e4f4fd1, 0xa3dcdc7f,0xa3dcdc7f
877         data4   0x44222266,0x44222266, 0x542a2a7e,0x542a2a7e
878         data4   0x3b9090ab,0x3b9090ab, 0x0b888883,0x0b888883
879         data4   0x8c4646ca,0x8c4646ca, 0xc7eeee29,0xc7eeee29
880         data4   0x6bb8b8d3,0x6bb8b8d3, 0x2814143c,0x2814143c
881         data4   0xa7dede79,0xa7dede79, 0xbc5e5ee2,0xbc5e5ee2
882         data4   0x160b0b1d,0x160b0b1d, 0xaddbdb76,0xaddbdb76
883         data4   0xdbe0e03b,0xdbe0e03b, 0x64323256,0x64323256
884         data4   0x743a3a4e,0x743a3a4e, 0x140a0a1e,0x140a0a1e
885         data4   0x924949db,0x924949db, 0x0c06060a,0x0c06060a
886         data4   0x4824246c,0x4824246c, 0xb85c5ce4,0xb85c5ce4
887         data4   0x9fc2c25d,0x9fc2c25d, 0xbdd3d36e,0xbdd3d36e
888         data4   0x43acacef,0x43acacef, 0xc46262a6,0xc46262a6
889         data4   0x399191a8,0x399191a8, 0x319595a4,0x319595a4
890         data4   0xd3e4e437,0xd3e4e437, 0xf279798b,0xf279798b
891         data4   0xd5e7e732,0xd5e7e732, 0x8bc8c843,0x8bc8c843
892         data4   0x6e373759,0x6e373759, 0xda6d6db7,0xda6d6db7
893         data4   0x018d8d8c,0x018d8d8c, 0xb1d5d564,0xb1d5d564
894         data4   0x9c4e4ed2,0x9c4e4ed2, 0x49a9a9e0,0x49a9a9e0
895         data4   0xd86c6cb4,0xd86c6cb4, 0xac5656fa,0xac5656fa
896         data4   0xf3f4f407,0xf3f4f407, 0xcfeaea25,0xcfeaea25
897         data4   0xca6565af,0xca6565af, 0xf47a7a8e,0xf47a7a8e
898         data4   0x47aeaee9,0x47aeaee9, 0x10080818,0x10080818
899         data4   0x6fbabad5,0x6fbabad5, 0xf0787888,0xf0787888
900         data4   0x4a25256f,0x4a25256f, 0x5c2e2e72,0x5c2e2e72
901         data4   0x381c1c24,0x381c1c24, 0x57a6a6f1,0x57a6a6f1
902         data4   0x73b4b4c7,0x73b4b4c7, 0x97c6c651,0x97c6c651
903         data4   0xcbe8e823,0xcbe8e823, 0xa1dddd7c,0xa1dddd7c
904         data4   0xe874749c,0xe874749c, 0x3e1f1f21,0x3e1f1f21
905         data4   0x964b4bdd,0x964b4bdd, 0x61bdbddc,0x61bdbddc
906         data4   0x0d8b8b86,0x0d8b8b86, 0x0f8a8a85,0x0f8a8a85
907         data4   0xe0707090,0xe0707090, 0x7c3e3e42,0x7c3e3e42
908         data4   0x71b5b5c4,0x71b5b5c4, 0xcc6666aa,0xcc6666aa
909         data4   0x904848d8,0x904848d8, 0x06030305,0x06030305
910         data4   0xf7f6f601,0xf7f6f601, 0x1c0e0e12,0x1c0e0e12
911         data4   0xc26161a3,0xc26161a3, 0x6a35355f,0x6a35355f
912         data4   0xae5757f9,0xae5757f9, 0x69b9b9d0,0x69b9b9d0
913         data4   0x17868691,0x17868691, 0x99c1c158,0x99c1c158
914         data4   0x3a1d1d27,0x3a1d1d27, 0x279e9eb9,0x279e9eb9
915         data4   0xd9e1e138,0xd9e1e138, 0xebf8f813,0xebf8f813
916         data4   0x2b9898b3,0x2b9898b3, 0x22111133,0x22111133
917         data4   0xd26969bb,0xd26969bb, 0xa9d9d970,0xa9d9d970
918         data4   0x078e8e89,0x078e8e89, 0x339494a7,0x339494a7
919         data4   0x2d9b9bb6,0x2d9b9bb6, 0x3c1e1e22,0x3c1e1e22
920         data4   0x15878792,0x15878792, 0xc9e9e920,0xc9e9e920
921         data4   0x87cece49,0x87cece49, 0xaa5555ff,0xaa5555ff
922         data4   0x50282878,0x50282878, 0xa5dfdf7a,0xa5dfdf7a
923         data4   0x038c8c8f,0x038c8c8f, 0x59a1a1f8,0x59a1a1f8
924         data4   0x09898980,0x09898980, 0x1a0d0d17,0x1a0d0d17
925         data4   0x65bfbfda,0x65bfbfda, 0xd7e6e631,0xd7e6e631
926         data4   0x844242c6,0x844242c6, 0xd06868b8,0xd06868b8
927         data4   0x824141c3,0x824141c3, 0x299999b0,0x299999b0
928         data4   0x5a2d2d77,0x5a2d2d77, 0x1e0f0f11,0x1e0f0f11
929         data4   0x7bb0b0cb,0x7bb0b0cb, 0xa85454fc,0xa85454fc
930         data4   0x6dbbbbd6,0x6dbbbbd6, 0x2c16163a,0x2c16163a
931 // Te4:
932         data1   0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
933         data1   0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
934         data1   0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
935         data1   0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
936         data1   0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
937         data1   0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
938         data1   0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
939         data1   0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
940         data1   0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
941         data1   0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
942         data1   0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
943         data1   0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
944         data1   0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
945         data1   0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
946         data1   0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
947         data1   0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
948         data1   0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
949         data1   0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
950         data1   0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
951         data1   0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
952         data1   0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
953         data1   0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
954         data1   0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
955         data1   0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
956         data1   0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
957         data1   0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
958         data1   0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
959         data1   0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
960         data1   0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
961         data1   0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
962         data1   0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
963         data1   0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
964 .size   AES_Te#,2048+256        // HP-UX assembler fails to ".-AES_Te#"
965
966 .align  64
967 .global AES_Td#
968 .type   AES_Td#,@object
969 AES_Td: data4   0x51f4a750,0x51f4a750, 0x7e416553,0x7e416553
970         data4   0x1a17a4c3,0x1a17a4c3, 0x3a275e96,0x3a275e96
971         data4   0x3bab6bcb,0x3bab6bcb, 0x1f9d45f1,0x1f9d45f1
972         data4   0xacfa58ab,0xacfa58ab, 0x4be30393,0x4be30393
973         data4   0x2030fa55,0x2030fa55, 0xad766df6,0xad766df6
974         data4   0x88cc7691,0x88cc7691, 0xf5024c25,0xf5024c25
975         data4   0x4fe5d7fc,0x4fe5d7fc, 0xc52acbd7,0xc52acbd7
976         data4   0x26354480,0x26354480, 0xb562a38f,0xb562a38f
977         data4   0xdeb15a49,0xdeb15a49, 0x25ba1b67,0x25ba1b67
978         data4   0x45ea0e98,0x45ea0e98, 0x5dfec0e1,0x5dfec0e1
979         data4   0xc32f7502,0xc32f7502, 0x814cf012,0x814cf012
980         data4   0x8d4697a3,0x8d4697a3, 0x6bd3f9c6,0x6bd3f9c6
981         data4   0x038f5fe7,0x038f5fe7, 0x15929c95,0x15929c95
982         data4   0xbf6d7aeb,0xbf6d7aeb, 0x955259da,0x955259da
983         data4   0xd4be832d,0xd4be832d, 0x587421d3,0x587421d3
984         data4   0x49e06929,0x49e06929, 0x8ec9c844,0x8ec9c844
985         data4   0x75c2896a,0x75c2896a, 0xf48e7978,0xf48e7978
986         data4   0x99583e6b,0x99583e6b, 0x27b971dd,0x27b971dd
987         data4   0xbee14fb6,0xbee14fb6, 0xf088ad17,0xf088ad17
988         data4   0xc920ac66,0xc920ac66, 0x7dce3ab4,0x7dce3ab4
989         data4   0x63df4a18,0x63df4a18, 0xe51a3182,0xe51a3182
990         data4   0x97513360,0x97513360, 0x62537f45,0x62537f45
991         data4   0xb16477e0,0xb16477e0, 0xbb6bae84,0xbb6bae84
992         data4   0xfe81a01c,0xfe81a01c, 0xf9082b94,0xf9082b94
993         data4   0x70486858,0x70486858, 0x8f45fd19,0x8f45fd19
994         data4   0x94de6c87,0x94de6c87, 0x527bf8b7,0x527bf8b7
995         data4   0xab73d323,0xab73d323, 0x724b02e2,0x724b02e2
996         data4   0xe31f8f57,0xe31f8f57, 0x6655ab2a,0x6655ab2a
997         data4   0xb2eb2807,0xb2eb2807, 0x2fb5c203,0x2fb5c203
998         data4   0x86c57b9a,0x86c57b9a, 0xd33708a5,0xd33708a5
999         data4   0x302887f2,0x302887f2, 0x23bfa5b2,0x23bfa5b2
1000         data4   0x02036aba,0x02036aba, 0xed16825c,0xed16825c
1001         data4   0x8acf1c2b,0x8acf1c2b, 0xa779b492,0xa779b492
1002         data4   0xf307f2f0,0xf307f2f0, 0x4e69e2a1,0x4e69e2a1
1003         data4   0x65daf4cd,0x65daf4cd, 0x0605bed5,0x0605bed5
1004         data4   0xd134621f,0xd134621f, 0xc4a6fe8a,0xc4a6fe8a
1005         data4   0x342e539d,0x342e539d, 0xa2f355a0,0xa2f355a0
1006         data4   0x058ae132,0x058ae132, 0xa4f6eb75,0xa4f6eb75
1007         data4   0x0b83ec39,0x0b83ec39, 0x4060efaa,0x4060efaa
1008         data4   0x5e719f06,0x5e719f06, 0xbd6e1051,0xbd6e1051
1009         data4   0x3e218af9,0x3e218af9, 0x96dd063d,0x96dd063d
1010         data4   0xdd3e05ae,0xdd3e05ae, 0x4de6bd46,0x4de6bd46
1011         data4   0x91548db5,0x91548db5, 0x71c45d05,0x71c45d05
1012         data4   0x0406d46f,0x0406d46f, 0x605015ff,0x605015ff
1013         data4   0x1998fb24,0x1998fb24, 0xd6bde997,0xd6bde997
1014         data4   0x894043cc,0x894043cc, 0x67d99e77,0x67d99e77
1015         data4   0xb0e842bd,0xb0e842bd, 0x07898b88,0x07898b88
1016         data4   0xe7195b38,0xe7195b38, 0x79c8eedb,0x79c8eedb
1017         data4   0xa17c0a47,0xa17c0a47, 0x7c420fe9,0x7c420fe9
1018         data4   0xf8841ec9,0xf8841ec9, 0x00000000,0x00000000
1019         data4   0x09808683,0x09808683, 0x322bed48,0x322bed48
1020         data4   0x1e1170ac,0x1e1170ac, 0x6c5a724e,0x6c5a724e
1021         data4   0xfd0efffb,0xfd0efffb, 0x0f853856,0x0f853856
1022         data4   0x3daed51e,0x3daed51e, 0x362d3927,0x362d3927
1023         data4   0x0a0fd964,0x0a0fd964, 0x685ca621,0x685ca621
1024         data4   0x9b5b54d1,0x9b5b54d1, 0x24362e3a,0x24362e3a
1025         data4   0x0c0a67b1,0x0c0a67b1, 0x9357e70f,0x9357e70f
1026         data4   0xb4ee96d2,0xb4ee96d2, 0x1b9b919e,0x1b9b919e
1027         data4   0x80c0c54f,0x80c0c54f, 0x61dc20a2,0x61dc20a2
1028         data4   0x5a774b69,0x5a774b69, 0x1c121a16,0x1c121a16
1029         data4   0xe293ba0a,0xe293ba0a, 0xc0a02ae5,0xc0a02ae5
1030         data4   0x3c22e043,0x3c22e043, 0x121b171d,0x121b171d
1031         data4   0x0e090d0b,0x0e090d0b, 0xf28bc7ad,0xf28bc7ad
1032         data4   0x2db6a8b9,0x2db6a8b9, 0x141ea9c8,0x141ea9c8
1033         data4   0x57f11985,0x57f11985, 0xaf75074c,0xaf75074c
1034         data4   0xee99ddbb,0xee99ddbb, 0xa37f60fd,0xa37f60fd
1035         data4   0xf701269f,0xf701269f, 0x5c72f5bc,0x5c72f5bc
1036         data4   0x44663bc5,0x44663bc5, 0x5bfb7e34,0x5bfb7e34
1037         data4   0x8b432976,0x8b432976, 0xcb23c6dc,0xcb23c6dc
1038         data4   0xb6edfc68,0xb6edfc68, 0xb8e4f163,0xb8e4f163
1039         data4   0xd731dcca,0xd731dcca, 0x42638510,0x42638510
1040         data4   0x13972240,0x13972240, 0x84c61120,0x84c61120
1041         data4   0x854a247d,0x854a247d, 0xd2bb3df8,0xd2bb3df8
1042         data4   0xaef93211,0xaef93211, 0xc729a16d,0xc729a16d
1043         data4   0x1d9e2f4b,0x1d9e2f4b, 0xdcb230f3,0xdcb230f3
1044         data4   0x0d8652ec,0x0d8652ec, 0x77c1e3d0,0x77c1e3d0
1045         data4   0x2bb3166c,0x2bb3166c, 0xa970b999,0xa970b999
1046         data4   0x119448fa,0x119448fa, 0x47e96422,0x47e96422
1047         data4   0xa8fc8cc4,0xa8fc8cc4, 0xa0f03f1a,0xa0f03f1a
1048         data4   0x567d2cd8,0x567d2cd8, 0x223390ef,0x223390ef
1049         data4   0x87494ec7,0x87494ec7, 0xd938d1c1,0xd938d1c1
1050         data4   0x8ccaa2fe,0x8ccaa2fe, 0x98d40b36,0x98d40b36
1051         data4   0xa6f581cf,0xa6f581cf, 0xa57ade28,0xa57ade28
1052         data4   0xdab78e26,0xdab78e26, 0x3fadbfa4,0x3fadbfa4
1053         data4   0x2c3a9de4,0x2c3a9de4, 0x5078920d,0x5078920d
1054         data4   0x6a5fcc9b,0x6a5fcc9b, 0x547e4662,0x547e4662
1055         data4   0xf68d13c2,0xf68d13c2, 0x90d8b8e8,0x90d8b8e8
1056         data4   0x2e39f75e,0x2e39f75e, 0x82c3aff5,0x82c3aff5
1057         data4   0x9f5d80be,0x9f5d80be, 0x69d0937c,0x69d0937c
1058         data4   0x6fd52da9,0x6fd52da9, 0xcf2512b3,0xcf2512b3
1059         data4   0xc8ac993b,0xc8ac993b, 0x10187da7,0x10187da7
1060         data4   0xe89c636e,0xe89c636e, 0xdb3bbb7b,0xdb3bbb7b
1061         data4   0xcd267809,0xcd267809, 0x6e5918f4,0x6e5918f4
1062         data4   0xec9ab701,0xec9ab701, 0x834f9aa8,0x834f9aa8
1063         data4   0xe6956e65,0xe6956e65, 0xaaffe67e,0xaaffe67e
1064         data4   0x21bccf08,0x21bccf08, 0xef15e8e6,0xef15e8e6
1065         data4   0xbae79bd9,0xbae79bd9, 0x4a6f36ce,0x4a6f36ce
1066         data4   0xea9f09d4,0xea9f09d4, 0x29b07cd6,0x29b07cd6
1067         data4   0x31a4b2af,0x31a4b2af, 0x2a3f2331,0x2a3f2331
1068         data4   0xc6a59430,0xc6a59430, 0x35a266c0,0x35a266c0
1069         data4   0x744ebc37,0x744ebc37, 0xfc82caa6,0xfc82caa6
1070         data4   0xe090d0b0,0xe090d0b0, 0x33a7d815,0x33a7d815
1071         data4   0xf104984a,0xf104984a, 0x41ecdaf7,0x41ecdaf7
1072         data4   0x7fcd500e,0x7fcd500e, 0x1791f62f,0x1791f62f
1073         data4   0x764dd68d,0x764dd68d, 0x43efb04d,0x43efb04d
1074         data4   0xccaa4d54,0xccaa4d54, 0xe49604df,0xe49604df
1075         data4   0x9ed1b5e3,0x9ed1b5e3, 0x4c6a881b,0x4c6a881b
1076         data4   0xc12c1fb8,0xc12c1fb8, 0x4665517f,0x4665517f
1077         data4   0x9d5eea04,0x9d5eea04, 0x018c355d,0x018c355d
1078         data4   0xfa877473,0xfa877473, 0xfb0b412e,0xfb0b412e
1079         data4   0xb3671d5a,0xb3671d5a, 0x92dbd252,0x92dbd252
1080         data4   0xe9105633,0xe9105633, 0x6dd64713,0x6dd64713
1081         data4   0x9ad7618c,0x9ad7618c, 0x37a10c7a,0x37a10c7a
1082         data4   0x59f8148e,0x59f8148e, 0xeb133c89,0xeb133c89
1083         data4   0xcea927ee,0xcea927ee, 0xb761c935,0xb761c935
1084         data4   0xe11ce5ed,0xe11ce5ed, 0x7a47b13c,0x7a47b13c
1085         data4   0x9cd2df59,0x9cd2df59, 0x55f2733f,0x55f2733f
1086         data4   0x1814ce79,0x1814ce79, 0x73c737bf,0x73c737bf
1087         data4   0x53f7cdea,0x53f7cdea, 0x5ffdaa5b,0x5ffdaa5b
1088         data4   0xdf3d6f14,0xdf3d6f14, 0x7844db86,0x7844db86
1089         data4   0xcaaff381,0xcaaff381, 0xb968c43e,0xb968c43e
1090         data4   0x3824342c,0x3824342c, 0xc2a3405f,0xc2a3405f
1091         data4   0x161dc372,0x161dc372, 0xbce2250c,0xbce2250c
1092         data4   0x283c498b,0x283c498b, 0xff0d9541,0xff0d9541
1093         data4   0x39a80171,0x39a80171, 0x080cb3de,0x080cb3de
1094         data4   0xd8b4e49c,0xd8b4e49c, 0x6456c190,0x6456c190
1095         data4   0x7bcb8461,0x7bcb8461, 0xd532b670,0xd532b670
1096         data4   0x486c5c74,0x486c5c74, 0xd0b85742,0xd0b85742
1097 // Td4:
1098         data1   0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
1099         data1   0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
1100         data1   0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
1101         data1   0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
1102         data1   0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
1103         data1   0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
1104         data1   0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
1105         data1   0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
1106         data1   0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
1107         data1   0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
1108         data1   0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
1109         data1   0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
1110         data1   0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
1111         data1   0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
1112         data1   0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
1113         data1   0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
1114         data1   0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
1115         data1   0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
1116         data1   0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
1117         data1   0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
1118         data1   0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
1119         data1   0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
1120         data1   0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
1121         data1   0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
1122         data1   0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
1123         data1   0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
1124         data1   0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
1125         data1   0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
1126         data1   0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
1127         data1   0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
1128         data1   0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
1129         data1   0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
1130 .size   AES_Td#,2048+256        // HP-UX assembler fails to ".-AES_Td#"