This commit was manufactured by cvs2svn to create branch
[openssl.git] / crypto / aes / asm / aes-ia64.S
1 // ====================================================================
2 // Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
3 // project. Rights for redistribution and usage in source and binary
4 // forms are granted according to the OpenSSL license.
5 // ====================================================================
6 //
7 // What's wrong with compiler generated code? Compiler never uses
8 // variable 'shr' which is pairable with 'extr'/'dep' instructions.
9 // Then it uses 'zxt' which is an I-type, but can be replaced with
10 // 'and' which in turn can be assigned to M-port [there're double as
11 // much M-ports as there're I-ports on Itanium 2]. By sacrificing few
12 // registers for small constants (255, 24 and 16) to be used with
13 // 'shr' and 'and' instructions I can achieve better ILP, Intruction
14 // Level Parallelism, and performance. This code outperforms GCC 3.3
15 // generated code by over factor of 2 (two), GCC 3.4 - by 70% and
16 // HP C - by 40%. Measured best-case scenario, i.e. aligned
17 // big-endian input, ECB timing on Itanium 2 is (18 + 13*rounds)
18 // ticks per block, or 9.25 CPU cycles per byte for 128 bit key.
19
20 // Version 1.2 mitigates the hazard of cache-timing attacks by
21 // a) compressing S-boxes from 8KB to 2KB+256B, b) scheduling
22 // references to S-boxes for L2 cache latency, c) prefetching T[ed]4
23 // prior last round. As result performance dropped to (26 + 15*rounds)
24 // ticks per block or 11 cycles per byte processed with 128-bit key.
25 // This is ~16% deterioration. For reference Itanium 2 L1 cache has
26 // 64 bytes line size and L2 - 128 bytes...
27
28 .ident  "aes-ia64.S, version 1.2"
29 .ident  "IA-64 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
30 .explicit
31 .text
32
33 rk0=r8;     rk1=r9;
34
35 pfssave=r2;
36 lcsave=r10;
37 prsave=r3;
38 maskff=r11;
39 twenty4=r14;
40 sixteen=r15;
41
42 te00=r16;   te11=r17;   te22=r18;   te33=r19;
43 te01=r20;   te12=r21;   te23=r22;   te30=r23;
44 te02=r24;   te13=r25;   te20=r26;   te31=r27;
45 te03=r28;   te10=r29;   te21=r30;   te32=r31;
46
47 // these are rotating...
48 t0=r32;     s0=r33;
49 t1=r34;     s1=r35;
50 t2=r36;     s2=r37;
51 t3=r38;     s3=r39;
52
53 te0=r40;    te1=r41;    te2=r42;    te3=r43;
54
55 #if defined(_HPUX_SOURCE) && !defined(_LP64)
56 # define ADDP   addp4
57 #else
58 # define ADDP   add
59 #endif
60
61 // Offsets from Te0
62 #define TE0     0
63 #define TE2     2
64 #if defined(_HPUX_SOURCE) || defined(B_ENDIAN)
65 #define TE1     3
66 #define TE3     1
67 #else
68 #define TE1     1
69 #define TE3     3
70 #endif
71
72 // This implies that AES_KEY comprises 32-bit key schedule elements
73 // even on LP64 platforms.
74 #ifndef KSZ
75 # define KSZ    4
76 # define LDKEY  ld4
77 #endif
78
79 .proc   _ia64_AES_encrypt#
80 // Input:       rk0-rk1
81 //              te0
82 //              te3     as AES_KEY->rounds!!!
83 //              s0-s3
84 //              maskff,twenty4,sixteen
85 // Output:      r16,r20,r24,r28 as s0-s3
86 // Clobber:     r16-r31,rk0-rk1,r32-r43
87 .align  32
88 _ia64_AES_encrypt:
89         .prologue
90         .altrp  b6
91         .body
92 { .mmi; alloc   r16=ar.pfs,12,0,0,8
93         LDKEY   t0=[rk0],2*KSZ
94         mov     pr.rot=1<<16    }
95 { .mmi; LDKEY   t1=[rk1],2*KSZ
96         add     te1=TE1,te0
97         add     te3=-3,te3      };;
98 { .mib; LDKEY   t2=[rk0],2*KSZ
99         mov     ar.ec=2         }
100 { .mib; LDKEY   t3=[rk1],2*KSZ
101         add     te2=TE2,te0
102         brp.loop.imp    .Le_top,.Le_end-16      };;
103
104 { .mmi; xor     s0=s0,t0
105         xor     s1=s1,t1
106         mov     ar.lc=te3       }
107 { .mmi; xor     s2=s2,t2
108         xor     s3=s3,t3
109         add     te3=TE3,te0     };;
110 \f
111 .align  32
112 .Le_top:
113 { .mmi; (p0)    LDKEY   t0=[rk0],2*KSZ          // 0/0:rk[0]
114         (p0)    and     te33=s3,maskff          // 0/0:s3&0xff
115         (p0)    extr.u  te22=s2,8,8     }       // 0/0:s2>>8&0xff
116 { .mmi; (p0)    LDKEY   t1=[rk1],2*KSZ          // 0/1:rk[1]
117         (p0)    and     te30=s0,maskff          // 0/1:s0&0xff
118         (p0)    shr.u   te00=s0,twenty4 };;     // 0/0:s0>>24
119 { .mmi; (p0)    LDKEY   t2=[rk0],2*KSZ          // 1/2:rk[2]
120         (p0)    shladd  te33=te33,3,te3         // 1/0:te0+s0>>24
121         (p0)    extr.u  te23=s3,8,8     }       // 1/1:s3>>8&0xff
122 { .mmi; (p0)    LDKEY   t3=[rk1],2*KSZ          // 1/3:rk[3]
123         (p0)    shladd  te30=te30,3,te3         // 1/1:te3+s0
124         (p0)    shr.u   te01=s1,twenty4 };;     // 1/1:s1>>24
125 { .mmi; (p0)    ld4     te33=[te33]             // 2/0:te3[s3&0xff]
126         (p0)    shladd  te22=te22,3,te2         // 2/0:te2+s2>>8&0xff
127         (p0)    extr.u  te20=s0,8,8     }       // 2/2:s0>>8&0xff
128 { .mmi; (p0)    ld4     te30=[te30]             // 2/1:te3[s0]
129         (p0)    shladd  te23=te23,3,te2         // 2/1:te2+s3>>8
130         (p0)    shr.u   te02=s2,twenty4 };;     // 2/2:s2>>24
131 { .mmi; (p0)    ld4     te22=[te22]             // 3/0:te2[s2>>8]
132         (p0)    shladd  te20=te20,3,te2         // 3/2:te2+s0>>8
133         (p0)    extr.u  te21=s1,8,8     }       // 3/3:s1>>8&0xff
134 { .mmi; (p0)    ld4     te23=[te23]             // 3/1:te2[s3>>8]
135         (p0)    shladd  te00=te00,3,te0         // 3/0:te0+s0>>24
136         (p0)    shr.u   te03=s3,twenty4 };;     // 3/3:s3>>24
137 { .mmi; (p0)    ld4     te20=[te20]             // 4/2:te2[s0>>8]
138         (p0)    shladd  te21=te21,3,te2         // 4/3:te3+s2
139         (p0)    extr.u  te11=s1,16,8    }       // 4/0:s1>>16&0xff
140 { .mmi; (p0)    ld4     te00=[te00]             // 4/0:te0[s0>>24]
141         (p0)    shladd  te01=te01,3,te0         // 4/1:te0+s1>>24
142         (p0)    shr.u   te13=s3,sixteen };;     // 4/2:s3>>16
143 { .mmi; (p0)    ld4     te21=[te21]             // 5/3:te2[s1>>8]
144         (p0)    shladd  te11=te11,3,te1         // 5/0:te1+s1>>16
145         (p0)    extr.u  te12=s2,16,8    }       // 5/1:s2>>16&0xff
146 { .mmi; (p0)    ld4     te01=[te01]             // 5/1:te0[s1>>24]
147         (p0)    shladd  te02=te02,3,te0         // 5/2:te0+s2>>24
148         (p0)    and     te31=s1,maskff  };;     // 5/2:s1&0xff
149 { .mmi; (p0)    ld4     te11=[te11]             // 6/0:te1[s1>>16]
150         (p0)    shladd  te12=te12,3,te1         // 6/1:te1+s2>>16
151         (p0)    extr.u  te10=s0,16,8    }       // 6/3:s0>>16&0xff
152 { .mmi; (p0)    ld4     te02=[te02]             // 6/2:te0[s2>>24]
153         (p0)    shladd  te03=te03,3,te0         // 6/3:te1+s0>>16
154         (p0)    and     te32=s2,maskff  };;     // 6/3:s2&0xff
155
156 { .mmi; (p0)    ld4     te12=[te12]             // 7/1:te1[s2>>16]
157         (p0)    shladd  te31=te31,3,te3         // 7/2:te3+s1&0xff
158         (p0)    and     te13=te13,maskff}       // 7/2:s3>>16&0xff
159 { .mmi; (p0)    ld4     te03=[te03]             // 7/3:te0[s3>>24]
160         (p0)    shladd  te32=te32,3,te3         // 7/3:te3+s2
161         (p0)    xor     t0=t0,te33      };;     // 7/0:
162 { .mmi; (p0)    ld4     te31=[te31]             // 8/2:te3[s1]
163         (p0)    shladd  te13=te13,3,te1         // 8/2:te1+s3>>16
164         (p0)    xor     t0=t0,te22      }       // 8/0:
165 { .mmi; (p0)    ld4     te32=[te32]             // 8/3:te3[s2]
166         (p0)    shladd  te10=te10,3,te1         // 8/3:te1+s0>>16
167         (p0)    xor     t1=t1,te30      };;     // 8/1:
168 { .mmi; (p0)    ld4     te13=[te13]             // 9/2:te1[s3>>16]
169         (p0)    ld4     te10=[te10]             // 9/3:te1[s0>>16]
170         (p0)    xor     t0=t0,te00      };;     // 9/0:         !L2 scheduling
171 { .mmi; (p0)    xor     t1=t1,te23              // 10[9]/1:     
172         (p0)    xor     t2=t2,te20              // 10[9]/2:
173         (p0)    xor     t3=t3,te21      };;     // 10[9]/3:
174 { .mmi; (p0)    xor     t0=t0,te11              // 11[10]/0:done!
175         (p0)    xor     t1=t1,te01              // 11[10]/1:
176         (p0)    xor     t2=t2,te02      };;     // 11[10]/2:    !L2 scheduling
177 { .mmi; (p0)    xor     t3=t3,te03              // 12[10]/3:
178         (p16)   cmp.eq  p0,p17=r0,r0    };;     // 12[10]/clear (p17)
179 { .mmi; (p0)    xor     t1=t1,te12              // 13[11]/1:done!
180         (p0)    xor     t2=t2,te31              // 13[11]/2:
181         (p0)    xor     t3=t3,te32      }       // 13[11]/3:
182 { .mmi; (p17)   add     te0=2048,te0            // 13[11]/
183         (p17)   add     te1=2048+64-TE1,te1};;  // 13[11]/
184 { .mib; (p0)    xor     t2=t2,te13              // 14[12]/2:done!
185         (p17)   add     te2=2048+128-TE2,te2}   // 14[12]/
186 { .mib; (p0)    xor     t3=t3,te10              // 14[12]/3:done!
187         (p17)   add     te3=2048+192-TE3,te3    // 14[12]/
188         br.ctop.sptk    .Le_top         };;
189 .Le_end:
190 \f
191
192 { .mmi; ld8     te12=[te0]              // prefetch Te4
193         ld8     te31=[te1]      }
194 { .mmi; ld8     te10=[te2]
195         ld8     te32=[te3]      }
196
197 { .mmi; LDKEY   t0=[rk0],2*KSZ          // 0/0:rk[0]
198         and     te33=s3,maskff          // 0/0:s3&0xff
199         extr.u  te22=s2,8,8     }       // 0/0:s2>>8&0xff
200 { .mmi; LDKEY   t1=[rk1],2*KSZ          // 0/1:rk[1]
201         and     te30=s0,maskff          // 0/1:s0&0xff
202         shr.u   te00=s0,twenty4 };;     // 0/0:s0>>24
203 { .mmi; LDKEY   t2=[rk0],2*KSZ          // 1/2:rk[2]
204         add     te33=te33,te0           // 1/0:te0+s0>>24
205         extr.u  te23=s3,8,8     }       // 1/1:s3>>8&0xff
206 { .mmi; LDKEY   t3=[rk1],2*KSZ          // 1/3:rk[3]
207         add     te30=te30,te0           // 1/1:te0+s0
208         shr.u   te01=s1,twenty4 };;     // 1/1:s1>>24
209 { .mmi; ld1     te33=[te33]             // 2/0:te0[s3&0xff]
210         add     te22=te22,te0           // 2/0:te0+s2>>8&0xff
211         extr.u  te20=s0,8,8     }       // 2/2:s0>>8&0xff
212 { .mmi; ld1     te30=[te30]             // 2/1:te0[s0]
213         add     te23=te23,te0           // 2/1:te0+s3>>8
214         shr.u   te02=s2,twenty4 };;     // 2/2:s2>>24
215 { .mmi; ld1     te22=[te22]             // 3/0:te0[s2>>8]
216         add     te20=te20,te0           // 3/2:te0+s0>>8
217         extr.u  te21=s1,8,8     }       // 3/3:s1>>8&0xff
218 { .mmi; ld1     te23=[te23]             // 3/1:te0[s3>>8]
219         add     te00=te00,te0           // 3/0:te0+s0>>24
220         shr.u   te03=s3,twenty4 };;     // 3/3:s3>>24
221 { .mmi; ld1     te20=[te20]             // 4/2:te0[s0>>8]
222         add     te21=te21,te0           // 4/3:te0+s2
223         extr.u  te11=s1,16,8    }       // 4/0:s1>>16&0xff
224 { .mmi; ld1     te00=[te00]             // 4/0:te0[s0>>24]
225         add     te01=te01,te0           // 4/1:te0+s1>>24
226         shr.u   te13=s3,sixteen };;     // 4/2:s3>>16
227 { .mmi; ld1     te21=[te21]             // 5/3:te0[s1>>8]
228         add     te11=te11,te0           // 5/0:te0+s1>>16
229         extr.u  te12=s2,16,8    }       // 5/1:s2>>16&0xff
230 { .mmi; ld1     te01=[te01]             // 5/1:te0[s1>>24]
231         add     te02=te02,te0           // 5/2:te0+s2>>24
232         and     te31=s1,maskff  };;     // 5/2:s1&0xff
233 { .mmi; ld1     te11=[te11]             // 6/0:te0[s1>>16]
234         add     te12=te12,te0           // 6/1:te0+s2>>16
235         extr.u  te10=s0,16,8    }       // 6/3:s0>>16&0xff
236 { .mmi; ld1     te02=[te02]             // 6/2:te0[s2>>24]
237         add     te03=te03,te0           // 6/3:te0+s0>>16
238         and     te32=s2,maskff  };;     // 6/3:s2&0xff
239
240 { .mmi; ld1     te12=[te12]             // 7/1:te0[s2>>16]
241         add     te31=te31,te0           // 7/2:te0+s1&0xff
242         dep     te33=te22,te33,8,8}     // 7/0:
243 { .mmi; ld1     te03=[te03]             // 7/3:te0[s3>>24]
244         add     te32=te32,te0           // 7/3:te0+s2
245         and     te13=te13,maskff};;     // 7/2:s3>>16&0xff
246 { .mmi; ld1     te31=[te31]             // 8/2:te0[s1]
247         add     te13=te13,te0           // 8/2:te0+s3>>16
248         dep     te30=te23,te30,8,8}     // 8/1:
249 { .mmi; ld1     te32=[te32]             // 8/3:te0[s2]
250         add     te10=te10,te0           // 8/3:te0+s0>>16
251         shl     te00=te00,twenty4};;    // 8/0:
252 { .mii; ld1     te13=[te13]             // 9/2:te0[s3>>16]
253         dep     te33=te11,te33,16,8     // 9/0:
254         shl     te01=te01,twenty4};;    // 9/1:
255 { .mii; ld1     te10=[te10]             // 10/3:te0[s0>>16]
256         dep     te31=te20,te31,8,8      // 10/2:
257         shl     te02=te02,twenty4};;    // 10/2:
258 { .mii; xor     t0=t0,te33              // 11/0:
259         dep     te32=te21,te32,8,8      // 11/3:
260         shl     te12=te12,sixteen};;    // 11/1:
261 { .mii; xor     r16=t0,te00             // 12/0:done!
262         dep     te31=te13,te31,16,8     // 12/2:
263         shl     te03=te03,twenty4};;    // 12/3:
264 { .mmi; xor     t1=t1,te01              // 13/1:
265         xor     t2=t2,te02              // 13/2:
266         dep     te32=te10,te32,16,8};;  // 13/3:
267 { .mmi; xor     t1=t1,te30              // 14/1:
268         xor     r24=t2,te31             // 14/2:done!
269         xor     t3=t3,te32      };;     // 14/3:
270 { .mib; xor     r20=t1,te12             // 15/1:done!
271         xor     r28=t3,te03             // 15/3:done!
272         br.ret.sptk     b6      };;
273 .endp   _ia64_AES_encrypt#
274
275 // void AES_encrypt (const void *in,void *out,const AES_KEY *key);
276 .global AES_encrypt#
277 .proc   AES_encrypt#
278 .align  32
279 AES_encrypt:
280         .prologue
281         .save   ar.pfs,pfssave
282 { .mmi; alloc   pfssave=ar.pfs,3,1,12,0
283         and     out0=3,in0
284         mov     r3=ip                   }
285 { .mmi; ADDP    in0=0,in0
286         mov     loc0=psr.um
287         ADDP    out11=KSZ*60,in2        };;     // &AES_KEY->rounds
288
289 { .mmi; ld4     out11=[out11]                   // AES_KEY->rounds
290         add     out8=(AES_Te#-AES_encrypt#),r3  // Te0
291         .save   pr,prsave
292         mov     prsave=pr               }
293 { .mmi; rum     1<<3                            // clear um.ac
294         .save   ar.lc,lcsave
295         mov     lcsave=ar.lc            };;
296
297         .body
298 #if defined(_HPUX_SOURCE)       // HPUX is big-endian, cut 15+15 cycles...
299 { .mib; cmp.ne  p6,p0=out0,r0
300         add     out0=4,in0
301 (p6)    br.dpnt.many    .Le_i_unaligned };;
302
303 { .mmi; ld4     out1=[in0],8            // s0
304         and     out9=3,in1
305         mov     twenty4=24              }
306 { .mmi; ld4     out3=[out0],8           // s1
307         ADDP    rk0=0,in2
308         mov     sixteen=16              };;
309 { .mmi; ld4     out5=[in0]              // s2
310         cmp.ne  p6,p0=out9,r0
311         mov     maskff=0xff             }
312 { .mmb; ld4     out7=[out0]             // s3
313         ADDP    rk1=KSZ,in2
314         br.call.sptk.many       b6=_ia64_AES_encrypt    };;
315
316 { .mib; ADDP    in0=4,in1
317         ADDP    in1=0,in1
318 (p6)    br.spnt .Le_o_unaligned         };;
319
320 { .mii; mov     psr.um=loc0
321         mov     ar.pfs=pfssave
322         mov     ar.lc=lcsave            };;
323 { .mmi; st4     [in1]=r16,8             // s0
324         st4     [in0]=r20,8             // s1
325         mov     pr=prsave,0x1ffff       };;
326 { .mmb; st4     [in1]=r24               // s2
327         st4     [in0]=r28               // s3
328         br.ret.sptk.many        b0      };;
329 #endif
330
331 .align  32
332 .Le_i_unaligned:
333 { .mmi; add     out0=1,in0
334         add     out2=2,in0
335         add     out4=3,in0      };;
336 { .mmi; ld1     r16=[in0],4
337         ld1     r17=[out0],4    }//;;
338 { .mmi; ld1     r18=[out2],4
339         ld1     out1=[out4],4   };;     // s0
340 { .mmi; ld1     r20=[in0],4
341         ld1     r21=[out0],4    }//;;
342 { .mmi; ld1     r22=[out2],4
343         ld1     out3=[out4],4   };;     // s1
344 { .mmi; ld1     r24=[in0],4
345         ld1     r25=[out0],4    }//;;
346 { .mmi; ld1     r26=[out2],4
347         ld1     out5=[out4],4   };;     // s2
348 { .mmi; ld1     r28=[in0]
349         ld1     r29=[out0]      }//;;
350 { .mmi; ld1     r30=[out2]
351         ld1     out7=[out4]     };;     // s3
352
353 { .mii;
354         dep     out1=r16,out1,24,8      //;;
355         dep     out3=r20,out3,24,8      }//;;
356 { .mii; ADDP    rk0=0,in2
357         dep     out5=r24,out5,24,8      //;;
358         dep     out7=r28,out7,24,8      };;
359 { .mii; ADDP    rk1=KSZ,in2
360         dep     out1=r17,out1,16,8      //;;
361         dep     out3=r21,out3,16,8      }//;;
362 { .mii; mov     twenty4=24
363         dep     out5=r25,out5,16,8      //;;
364         dep     out7=r29,out7,16,8      };;
365 { .mii; mov     sixteen=16
366         dep     out1=r18,out1,8,8       //;;
367         dep     out3=r22,out3,8,8       }//;;
368 { .mii; mov     maskff=0xff
369         dep     out5=r26,out5,8,8       //;;
370         dep     out7=r30,out7,8,8       };;
371
372 { .mib; br.call.sptk.many       b6=_ia64_AES_encrypt    };;
373
374 .Le_o_unaligned:
375 { .mii; ADDP    out0=0,in1
376         extr.u  r17=r16,8,8                     // s0
377         shr.u   r19=r16,twenty4         }//;;
378 { .mii; ADDP    out1=1,in1
379         extr.u  r18=r16,16,8
380         shr.u   r23=r20,twenty4         }//;;   // s1
381 { .mii; ADDP    out2=2,in1
382         extr.u  r21=r20,8,8
383         shr.u   r22=r20,sixteen         }//;;
384 { .mii; ADDP    out3=3,in1
385         extr.u  r25=r24,8,8                     // s2
386         shr.u   r27=r24,twenty4         };;
387 { .mii; st1     [out3]=r16,4
388         extr.u  r26=r24,16,8
389         shr.u   r31=r28,twenty4         }//;;   // s3
390 { .mii; st1     [out2]=r17,4
391         extr.u  r29=r28,8,8
392         shr.u   r30=r28,sixteen         }//;;
393
394 { .mmi; st1     [out1]=r18,4
395         st1     [out0]=r19,4            };;
396 { .mmi; st1     [out3]=r20,4
397         st1     [out2]=r21,4            }//;;
398 { .mmi; st1     [out1]=r22,4
399         st1     [out0]=r23,4            };;
400 { .mmi; st1     [out3]=r24,4
401         st1     [out2]=r25,4
402         mov     pr=prsave,0x1ffff       }//;;
403 { .mmi; st1     [out1]=r26,4
404         st1     [out0]=r27,4
405         mov     ar.pfs=pfssave          };;
406 { .mmi; st1     [out3]=r28
407         st1     [out2]=r29
408         mov     ar.lc=lcsave            }//;;
409 { .mmi; st1     [out1]=r30
410         st1     [out0]=r31              }
411 { .mfb; mov     psr.um=loc0                     // restore user mask
412         br.ret.sptk.many        b0      };;
413 .endp   AES_encrypt#
414
415 // *AES_decrypt are autogenerated by the following script:
416 #if 0
417 #!/usr/bin/env perl
418 print "// *AES_decrypt are autogenerated by the following script:\n#if 0\n";
419 open(PROG,'<'.$0); while(<PROG>) { print; } close(PROG);
420 print "#endif\n";
421 while(<>) {
422         $process=1      if (/\.proc\s+_ia64_AES_encrypt/);
423         next            if (!$process);
424
425         #s/te00=s0/td00=s0/;    s/te00/td00/g;
426         s/te11=s1/td13=s3/;     s/te11/td13/g;
427         #s/te22=s2/td22=s2/;    s/te22/td22/g;
428         s/te33=s3/td31=s1/;     s/te33/td31/g;
429
430         #s/te01=s1/td01=s1/;    s/te01/td01/g;
431         s/te12=s2/td10=s0/;     s/te12/td10/g;
432         #s/te23=s3/td23=s3/;    s/te23/td23/g;
433         s/te30=s0/td32=s2/;     s/te30/td32/g;
434
435         #s/te02=s2/td02=s2/;    s/te02/td02/g;
436         s/te13=s3/td11=s1/;     s/te13/td11/g;
437         #s/te20=s0/td20=s0/;    s/te20/td20/g;
438         s/te31=s1/td33=s3/;     s/te31/td33/g;
439
440         #s/te03=s3/td03=s3/;    s/te03/td03/g;
441         s/te10=s0/td12=s2/;     s/te10/td12/g;
442         #s/te21=s1/td21=s1/;    s/te21/td21/g;
443         s/te32=s2/td30=s0/;     s/te32/td30/g;
444
445         s/td/te/g;
446
447         s/AES_encrypt/AES_decrypt/g;
448         s/\.Le_/.Ld_/g;
449         s/AES_Te#/AES_Td#/g;
450
451         print;
452
453         exit            if (/\.endp\s+AES_decrypt/);
454 }
455 #endif
456 .proc   _ia64_AES_decrypt#
457 // Input:       rk0-rk1
458 //              te0
459 //              te3     as AES_KEY->rounds!!!
460 //              s0-s3
461 //              maskff,twenty4,sixteen
462 // Output:      r16,r20,r24,r28 as s0-s3
463 // Clobber:     r16-r31,rk0-rk1,r32-r43
464 .align  32
465 _ia64_AES_decrypt:
466         .prologue
467         .altrp  b6
468         .body
469 { .mmi; alloc   r16=ar.pfs,12,0,0,8
470         LDKEY   t0=[rk0],2*KSZ
471         mov     pr.rot=1<<16    }
472 { .mmi; LDKEY   t1=[rk1],2*KSZ
473         add     te1=TE1,te0
474         add     te3=-3,te3      };;
475 { .mib; LDKEY   t2=[rk0],2*KSZ
476         mov     ar.ec=2         }
477 { .mib; LDKEY   t3=[rk1],2*KSZ
478         add     te2=TE2,te0
479         brp.loop.imp    .Ld_top,.Ld_end-16      };;
480
481 { .mmi; xor     s0=s0,t0
482         xor     s1=s1,t1
483         mov     ar.lc=te3       }
484 { .mmi; xor     s2=s2,t2
485         xor     s3=s3,t3
486         add     te3=TE3,te0     };;
487 \f
488 .align  32
489 .Ld_top:
490 { .mmi; (p0)    LDKEY   t0=[rk0],2*KSZ          // 0/0:rk[0]
491         (p0)    and     te31=s1,maskff          // 0/0:s3&0xff
492         (p0)    extr.u  te22=s2,8,8     }       // 0/0:s2>>8&0xff
493 { .mmi; (p0)    LDKEY   t1=[rk1],2*KSZ          // 0/1:rk[1]
494         (p0)    and     te32=s2,maskff          // 0/1:s0&0xff
495         (p0)    shr.u   te00=s0,twenty4 };;     // 0/0:s0>>24
496 { .mmi; (p0)    LDKEY   t2=[rk0],2*KSZ          // 1/2:rk[2]
497         (p0)    shladd  te31=te31,3,te3         // 1/0:te0+s0>>24
498         (p0)    extr.u  te23=s3,8,8     }       // 1/1:s3>>8&0xff
499 { .mmi; (p0)    LDKEY   t3=[rk1],2*KSZ          // 1/3:rk[3]
500         (p0)    shladd  te32=te32,3,te3         // 1/1:te3+s0
501         (p0)    shr.u   te01=s1,twenty4 };;     // 1/1:s1>>24
502 { .mmi; (p0)    ld4     te31=[te31]             // 2/0:te3[s3&0xff]
503         (p0)    shladd  te22=te22,3,te2         // 2/0:te2+s2>>8&0xff
504         (p0)    extr.u  te20=s0,8,8     }       // 2/2:s0>>8&0xff
505 { .mmi; (p0)    ld4     te32=[te32]             // 2/1:te3[s0]
506         (p0)    shladd  te23=te23,3,te2         // 2/1:te2+s3>>8
507         (p0)    shr.u   te02=s2,twenty4 };;     // 2/2:s2>>24
508 { .mmi; (p0)    ld4     te22=[te22]             // 3/0:te2[s2>>8]
509         (p0)    shladd  te20=te20,3,te2         // 3/2:te2+s0>>8
510         (p0)    extr.u  te21=s1,8,8     }       // 3/3:s1>>8&0xff
511 { .mmi; (p0)    ld4     te23=[te23]             // 3/1:te2[s3>>8]
512         (p0)    shladd  te00=te00,3,te0         // 3/0:te0+s0>>24
513         (p0)    shr.u   te03=s3,twenty4 };;     // 3/3:s3>>24
514 { .mmi; (p0)    ld4     te20=[te20]             // 4/2:te2[s0>>8]
515         (p0)    shladd  te21=te21,3,te2         // 4/3:te3+s2
516         (p0)    extr.u  te13=s3,16,8    }       // 4/0:s1>>16&0xff
517 { .mmi; (p0)    ld4     te00=[te00]             // 4/0:te0[s0>>24]
518         (p0)    shladd  te01=te01,3,te0         // 4/1:te0+s1>>24
519         (p0)    shr.u   te11=s1,sixteen };;     // 4/2:s3>>16
520 { .mmi; (p0)    ld4     te21=[te21]             // 5/3:te2[s1>>8]
521         (p0)    shladd  te13=te13,3,te1         // 5/0:te1+s1>>16
522         (p0)    extr.u  te10=s0,16,8    }       // 5/1:s2>>16&0xff
523 { .mmi; (p0)    ld4     te01=[te01]             // 5/1:te0[s1>>24]
524         (p0)    shladd  te02=te02,3,te0         // 5/2:te0+s2>>24
525         (p0)    and     te33=s3,maskff  };;     // 5/2:s1&0xff
526 { .mmi; (p0)    ld4     te13=[te13]             // 6/0:te1[s1>>16]
527         (p0)    shladd  te10=te10,3,te1         // 6/1:te1+s2>>16
528         (p0)    extr.u  te12=s2,16,8    }       // 6/3:s0>>16&0xff
529 { .mmi; (p0)    ld4     te02=[te02]             // 6/2:te0[s2>>24]
530         (p0)    shladd  te03=te03,3,te0         // 6/3:te1+s0>>16
531         (p0)    and     te30=s0,maskff  };;     // 6/3:s2&0xff
532
533 { .mmi; (p0)    ld4     te10=[te10]             // 7/1:te1[s2>>16]
534         (p0)    shladd  te33=te33,3,te3         // 7/2:te3+s1&0xff
535         (p0)    and     te11=te11,maskff}       // 7/2:s3>>16&0xff
536 { .mmi; (p0)    ld4     te03=[te03]             // 7/3:te0[s3>>24]
537         (p0)    shladd  te30=te30,3,te3         // 7/3:te3+s2
538         (p0)    xor     t0=t0,te31      };;     // 7/0:
539 { .mmi; (p0)    ld4     te33=[te33]             // 8/2:te3[s1]
540         (p0)    shladd  te11=te11,3,te1         // 8/2:te1+s3>>16
541         (p0)    xor     t0=t0,te22      }       // 8/0:
542 { .mmi; (p0)    ld4     te30=[te30]             // 8/3:te3[s2]
543         (p0)    shladd  te12=te12,3,te1         // 8/3:te1+s0>>16
544         (p0)    xor     t1=t1,te32      };;     // 8/1:
545 { .mmi; (p0)    ld4     te11=[te11]             // 9/2:te1[s3>>16]
546         (p0)    ld4     te12=[te12]             // 9/3:te1[s0>>16]
547         (p0)    xor     t0=t0,te00      };;     // 9/0:         !L2 scheduling
548 { .mmi; (p0)    xor     t1=t1,te23              // 10[9]/1:     
549         (p0)    xor     t2=t2,te20              // 10[9]/2:
550         (p0)    xor     t3=t3,te21      };;     // 10[9]/3:
551 { .mmi; (p0)    xor     t0=t0,te13              // 11[10]/0:done!
552         (p0)    xor     t1=t1,te01              // 11[10]/1:
553         (p0)    xor     t2=t2,te02      };;     // 11[10]/2:    !L2 scheduling
554 { .mmi; (p0)    xor     t3=t3,te03              // 12[10]/3:
555         (p16)   cmp.eq  p0,p17=r0,r0    };;     // 12[10]/clear (p17)
556 { .mmi; (p0)    xor     t1=t1,te10              // 13[11]/1:done!
557         (p0)    xor     t2=t2,te33              // 13[11]/2:
558         (p0)    xor     t3=t3,te30      }       // 13[11]/3:
559 { .mmi; (p17)   add     te0=2048,te0            // 13[11]/
560         (p17)   add     te1=2048+64-TE1,te1};;  // 13[11]/
561 { .mib; (p0)    xor     t2=t2,te11              // 14[12]/2:done!
562         (p17)   add     te2=2048+128-TE2,te2}   // 14[12]/
563 { .mib; (p0)    xor     t3=t3,te12              // 14[12]/3:done!
564         (p17)   add     te3=2048+192-TE3,te3    // 14[12]/
565         br.ctop.sptk    .Ld_top         };;
566 .Ld_end:
567 \f
568
569 { .mmi; ld8     te10=[te0]              // prefetch Td4
570         ld8     te33=[te1]      }
571 { .mmi; ld8     te12=[te2]
572         ld8     te30=[te3]      }
573
574 { .mmi; LDKEY   t0=[rk0],2*KSZ          // 0/0:rk[0]
575         and     te31=s1,maskff          // 0/0:s3&0xff
576         extr.u  te22=s2,8,8     }       // 0/0:s2>>8&0xff
577 { .mmi; LDKEY   t1=[rk1],2*KSZ          // 0/1:rk[1]
578         and     te32=s2,maskff          // 0/1:s0&0xff
579         shr.u   te00=s0,twenty4 };;     // 0/0:s0>>24
580 { .mmi; LDKEY   t2=[rk0],2*KSZ          // 1/2:rk[2]
581         add     te31=te31,te0           // 1/0:te0+s0>>24
582         extr.u  te23=s3,8,8     }       // 1/1:s3>>8&0xff
583 { .mmi; LDKEY   t3=[rk1],2*KSZ          // 1/3:rk[3]
584         add     te32=te32,te0           // 1/1:te0+s0
585         shr.u   te01=s1,twenty4 };;     // 1/1:s1>>24
586 { .mmi; ld1     te31=[te31]             // 2/0:te0[s3&0xff]
587         add     te22=te22,te0           // 2/0:te0+s2>>8&0xff
588         extr.u  te20=s0,8,8     }       // 2/2:s0>>8&0xff
589 { .mmi; ld1     te32=[te32]             // 2/1:te0[s0]
590         add     te23=te23,te0           // 2/1:te0+s3>>8
591         shr.u   te02=s2,twenty4 };;     // 2/2:s2>>24
592 { .mmi; ld1     te22=[te22]             // 3/0:te0[s2>>8]
593         add     te20=te20,te0           // 3/2:te0+s0>>8
594         extr.u  te21=s1,8,8     }       // 3/3:s1>>8&0xff
595 { .mmi; ld1     te23=[te23]             // 3/1:te0[s3>>8]
596         add     te00=te00,te0           // 3/0:te0+s0>>24
597         shr.u   te03=s3,twenty4 };;     // 3/3:s3>>24
598 { .mmi; ld1     te20=[te20]             // 4/2:te0[s0>>8]
599         add     te21=te21,te0           // 4/3:te0+s2
600         extr.u  te13=s3,16,8    }       // 4/0:s1>>16&0xff
601 { .mmi; ld1     te00=[te00]             // 4/0:te0[s0>>24]
602         add     te01=te01,te0           // 4/1:te0+s1>>24
603         shr.u   te11=s1,sixteen };;     // 4/2:s3>>16
604 { .mmi; ld1     te21=[te21]             // 5/3:te0[s1>>8]
605         add     te13=te13,te0           // 5/0:te0+s1>>16
606         extr.u  te10=s0,16,8    }       // 5/1:s2>>16&0xff
607 { .mmi; ld1     te01=[te01]             // 5/1:te0[s1>>24]
608         add     te02=te02,te0           // 5/2:te0+s2>>24
609         and     te33=s3,maskff  };;     // 5/2:s1&0xff
610 { .mmi; ld1     te13=[te13]             // 6/0:te0[s1>>16]
611         add     te10=te10,te0           // 6/1:te0+s2>>16
612         extr.u  te12=s2,16,8    }       // 6/3:s0>>16&0xff
613 { .mmi; ld1     te02=[te02]             // 6/2:te0[s2>>24]
614         add     te03=te03,te0           // 6/3:te0+s0>>16
615         and     te30=s0,maskff  };;     // 6/3:s2&0xff
616
617 { .mmi; ld1     te10=[te10]             // 7/1:te0[s2>>16]
618         add     te33=te33,te0           // 7/2:te0+s1&0xff
619         dep     te31=te22,te31,8,8}     // 7/0:
620 { .mmi; ld1     te03=[te03]             // 7/3:te0[s3>>24]
621         add     te30=te30,te0           // 7/3:te0+s2
622         and     te11=te11,maskff};;     // 7/2:s3>>16&0xff
623 { .mmi; ld1     te33=[te33]             // 8/2:te0[s1]
624         add     te11=te11,te0           // 8/2:te0+s3>>16
625         dep     te32=te23,te32,8,8}     // 8/1:
626 { .mmi; ld1     te30=[te30]             // 8/3:te0[s2]
627         add     te12=te12,te0           // 8/3:te0+s0>>16
628         shl     te00=te00,twenty4};;    // 8/0:
629 { .mii; ld1     te11=[te11]             // 9/2:te0[s3>>16]
630         dep     te31=te13,te31,16,8     // 9/0:
631         shl     te01=te01,twenty4};;    // 9/1:
632 { .mii; ld1     te12=[te12]             // 10/3:te0[s0>>16]
633         dep     te33=te20,te33,8,8      // 10/2:
634         shl     te02=te02,twenty4};;    // 10/2:
635 { .mii; xor     t0=t0,te31              // 11/0:
636         dep     te30=te21,te30,8,8      // 11/3:
637         shl     te10=te10,sixteen};;    // 11/1:
638 { .mii; xor     r16=t0,te00             // 12/0:done!
639         dep     te33=te11,te33,16,8     // 12/2:
640         shl     te03=te03,twenty4};;    // 12/3:
641 { .mmi; xor     t1=t1,te01              // 13/1:
642         xor     t2=t2,te02              // 13/2:
643         dep     te30=te12,te30,16,8};;  // 13/3:
644 { .mmi; xor     t1=t1,te32              // 14/1:
645         xor     r24=t2,te33             // 14/2:done!
646         xor     t3=t3,te30      };;     // 14/3:
647 { .mib; xor     r20=t1,te10             // 15/1:done!
648         xor     r28=t3,te03             // 15/3:done!
649         br.ret.sptk     b6      };;
650 .endp   _ia64_AES_decrypt#
651
652 // void AES_decrypt (const void *in,void *out,const AES_KEY *key);
653 .global AES_decrypt#
654 .proc   AES_decrypt#
655 .align  32
656 AES_decrypt:
657         .prologue
658         .save   ar.pfs,pfssave
659 { .mmi; alloc   pfssave=ar.pfs,3,1,12,0
660         and     out0=3,in0
661         mov     r3=ip                   }
662 { .mmi; ADDP    in0=0,in0
663         mov     loc0=psr.um
664         ADDP    out11=KSZ*60,in2        };;     // &AES_KEY->rounds
665
666 { .mmi; ld4     out11=[out11]                   // AES_KEY->rounds
667         add     out8=(AES_Td#-AES_decrypt#),r3  // Te0
668         .save   pr,prsave
669         mov     prsave=pr               }
670 { .mmi; rum     1<<3                            // clear um.ac
671         .save   ar.lc,lcsave
672         mov     lcsave=ar.lc            };;
673
674         .body
675 #if defined(_HPUX_SOURCE)       // HPUX is big-endian, cut 15+15 cycles...
676 { .mib; cmp.ne  p6,p0=out0,r0
677         add     out0=4,in0
678 (p6)    br.dpnt.many    .Ld_i_unaligned };;
679
680 { .mmi; ld4     out1=[in0],8            // s0
681         and     out9=3,in1
682         mov     twenty4=24              }
683 { .mmi; ld4     out3=[out0],8           // s1
684         ADDP    rk0=0,in2
685         mov     sixteen=16              };;
686 { .mmi; ld4     out5=[in0]              // s2
687         cmp.ne  p6,p0=out9,r0
688         mov     maskff=0xff             }
689 { .mmb; ld4     out7=[out0]             // s3
690         ADDP    rk1=KSZ,in2
691         br.call.sptk.many       b6=_ia64_AES_decrypt    };;
692
693 { .mib; ADDP    in0=4,in1
694         ADDP    in1=0,in1
695 (p6)    br.spnt .Ld_o_unaligned         };;
696
697 { .mii; mov     psr.um=loc0
698         mov     ar.pfs=pfssave
699         mov     ar.lc=lcsave            };;
700 { .mmi; st4     [in1]=r16,8             // s0
701         st4     [in0]=r20,8             // s1
702         mov     pr=prsave,0x1ffff       };;
703 { .mmb; st4     [in1]=r24               // s2
704         st4     [in0]=r28               // s3
705         br.ret.sptk.many        b0      };;
706 #endif
707
708 .align  32
709 .Ld_i_unaligned:
710 { .mmi; add     out0=1,in0
711         add     out2=2,in0
712         add     out4=3,in0      };;
713 { .mmi; ld1     r16=[in0],4
714         ld1     r17=[out0],4    }//;;
715 { .mmi; ld1     r18=[out2],4
716         ld1     out1=[out4],4   };;     // s0
717 { .mmi; ld1     r20=[in0],4
718         ld1     r21=[out0],4    }//;;
719 { .mmi; ld1     r22=[out2],4
720         ld1     out3=[out4],4   };;     // s1
721 { .mmi; ld1     r24=[in0],4
722         ld1     r25=[out0],4    }//;;
723 { .mmi; ld1     r26=[out2],4
724         ld1     out5=[out4],4   };;     // s2
725 { .mmi; ld1     r28=[in0]
726         ld1     r29=[out0]      }//;;
727 { .mmi; ld1     r30=[out2]
728         ld1     out7=[out4]     };;     // s3
729
730 { .mii;
731         dep     out1=r16,out1,24,8      //;;
732         dep     out3=r20,out3,24,8      }//;;
733 { .mii; ADDP    rk0=0,in2
734         dep     out5=r24,out5,24,8      //;;
735         dep     out7=r28,out7,24,8      };;
736 { .mii; ADDP    rk1=KSZ,in2
737         dep     out1=r17,out1,16,8      //;;
738         dep     out3=r21,out3,16,8      }//;;
739 { .mii; mov     twenty4=24
740         dep     out5=r25,out5,16,8      //;;
741         dep     out7=r29,out7,16,8      };;
742 { .mii; mov     sixteen=16
743         dep     out1=r18,out1,8,8       //;;
744         dep     out3=r22,out3,8,8       }//;;
745 { .mii; mov     maskff=0xff
746         dep     out5=r26,out5,8,8       //;;
747         dep     out7=r30,out7,8,8       };;
748
749 { .mib; br.call.sptk.many       b6=_ia64_AES_decrypt    };;
750
751 .Ld_o_unaligned:
752 { .mii; ADDP    out0=0,in1
753         extr.u  r17=r16,8,8                     // s0
754         shr.u   r19=r16,twenty4         }//;;
755 { .mii; ADDP    out1=1,in1
756         extr.u  r18=r16,16,8
757         shr.u   r23=r20,twenty4         }//;;   // s1
758 { .mii; ADDP    out2=2,in1
759         extr.u  r21=r20,8,8
760         shr.u   r22=r20,sixteen         }//;;
761 { .mii; ADDP    out3=3,in1
762         extr.u  r25=r24,8,8                     // s2
763         shr.u   r27=r24,twenty4         };;
764 { .mii; st1     [out3]=r16,4
765         extr.u  r26=r24,16,8
766         shr.u   r31=r28,twenty4         }//;;   // s3
767 { .mii; st1     [out2]=r17,4
768         extr.u  r29=r28,8,8
769         shr.u   r30=r28,sixteen         }//;;
770
771 { .mmi; st1     [out1]=r18,4
772         st1     [out0]=r19,4            };;
773 { .mmi; st1     [out3]=r20,4
774         st1     [out2]=r21,4            }//;;
775 { .mmi; st1     [out1]=r22,4
776         st1     [out0]=r23,4            };;
777 { .mmi; st1     [out3]=r24,4
778         st1     [out2]=r25,4
779         mov     pr=prsave,0x1ffff       }//;;
780 { .mmi; st1     [out1]=r26,4
781         st1     [out0]=r27,4
782         mov     ar.pfs=pfssave          };;
783 { .mmi; st1     [out3]=r28
784         st1     [out2]=r29
785         mov     ar.lc=lcsave            }//;;
786 { .mmi; st1     [out1]=r30
787         st1     [out0]=r31              }
788 { .mfb; mov     psr.um=loc0                     // restore user mask
789         br.ret.sptk.many        b0      };;
790 .endp   AES_decrypt#
791
792 // leave it in .text segment...
793 .align  64
794 .global AES_Te#
795 .type   AES_Te#,@object
796 AES_Te: data4   0xc66363a5,0xc66363a5, 0xf87c7c84,0xf87c7c84
797         data4   0xee777799,0xee777799, 0xf67b7b8d,0xf67b7b8d
798         data4   0xfff2f20d,0xfff2f20d, 0xd66b6bbd,0xd66b6bbd
799         data4   0xde6f6fb1,0xde6f6fb1, 0x91c5c554,0x91c5c554
800         data4   0x60303050,0x60303050, 0x02010103,0x02010103
801         data4   0xce6767a9,0xce6767a9, 0x562b2b7d,0x562b2b7d
802         data4   0xe7fefe19,0xe7fefe19, 0xb5d7d762,0xb5d7d762
803         data4   0x4dababe6,0x4dababe6, 0xec76769a,0xec76769a
804         data4   0x8fcaca45,0x8fcaca45, 0x1f82829d,0x1f82829d
805         data4   0x89c9c940,0x89c9c940, 0xfa7d7d87,0xfa7d7d87
806         data4   0xeffafa15,0xeffafa15, 0xb25959eb,0xb25959eb
807         data4   0x8e4747c9,0x8e4747c9, 0xfbf0f00b,0xfbf0f00b
808         data4   0x41adadec,0x41adadec, 0xb3d4d467,0xb3d4d467
809         data4   0x5fa2a2fd,0x5fa2a2fd, 0x45afafea,0x45afafea
810         data4   0x239c9cbf,0x239c9cbf, 0x53a4a4f7,0x53a4a4f7
811         data4   0xe4727296,0xe4727296, 0x9bc0c05b,0x9bc0c05b
812         data4   0x75b7b7c2,0x75b7b7c2, 0xe1fdfd1c,0xe1fdfd1c
813         data4   0x3d9393ae,0x3d9393ae, 0x4c26266a,0x4c26266a
814         data4   0x6c36365a,0x6c36365a, 0x7e3f3f41,0x7e3f3f41
815         data4   0xf5f7f702,0xf5f7f702, 0x83cccc4f,0x83cccc4f
816         data4   0x6834345c,0x6834345c, 0x51a5a5f4,0x51a5a5f4
817         data4   0xd1e5e534,0xd1e5e534, 0xf9f1f108,0xf9f1f108
818         data4   0xe2717193,0xe2717193, 0xabd8d873,0xabd8d873
819         data4   0x62313153,0x62313153, 0x2a15153f,0x2a15153f
820         data4   0x0804040c,0x0804040c, 0x95c7c752,0x95c7c752
821         data4   0x46232365,0x46232365, 0x9dc3c35e,0x9dc3c35e
822         data4   0x30181828,0x30181828, 0x379696a1,0x379696a1
823         data4   0x0a05050f,0x0a05050f, 0x2f9a9ab5,0x2f9a9ab5
824         data4   0x0e070709,0x0e070709, 0x24121236,0x24121236
825         data4   0x1b80809b,0x1b80809b, 0xdfe2e23d,0xdfe2e23d
826         data4   0xcdebeb26,0xcdebeb26, 0x4e272769,0x4e272769
827         data4   0x7fb2b2cd,0x7fb2b2cd, 0xea75759f,0xea75759f
828         data4   0x1209091b,0x1209091b, 0x1d83839e,0x1d83839e
829         data4   0x582c2c74,0x582c2c74, 0x341a1a2e,0x341a1a2e
830         data4   0x361b1b2d,0x361b1b2d, 0xdc6e6eb2,0xdc6e6eb2
831         data4   0xb45a5aee,0xb45a5aee, 0x5ba0a0fb,0x5ba0a0fb
832         data4   0xa45252f6,0xa45252f6, 0x763b3b4d,0x763b3b4d
833         data4   0xb7d6d661,0xb7d6d661, 0x7db3b3ce,0x7db3b3ce
834         data4   0x5229297b,0x5229297b, 0xdde3e33e,0xdde3e33e
835         data4   0x5e2f2f71,0x5e2f2f71, 0x13848497,0x13848497
836         data4   0xa65353f5,0xa65353f5, 0xb9d1d168,0xb9d1d168
837         data4   0x00000000,0x00000000, 0xc1eded2c,0xc1eded2c
838         data4   0x40202060,0x40202060, 0xe3fcfc1f,0xe3fcfc1f
839         data4   0x79b1b1c8,0x79b1b1c8, 0xb65b5bed,0xb65b5bed
840         data4   0xd46a6abe,0xd46a6abe, 0x8dcbcb46,0x8dcbcb46
841         data4   0x67bebed9,0x67bebed9, 0x7239394b,0x7239394b
842         data4   0x944a4ade,0x944a4ade, 0x984c4cd4,0x984c4cd4
843         data4   0xb05858e8,0xb05858e8, 0x85cfcf4a,0x85cfcf4a
844         data4   0xbbd0d06b,0xbbd0d06b, 0xc5efef2a,0xc5efef2a
845         data4   0x4faaaae5,0x4faaaae5, 0xedfbfb16,0xedfbfb16
846         data4   0x864343c5,0x864343c5, 0x9a4d4dd7,0x9a4d4dd7
847         data4   0x66333355,0x66333355, 0x11858594,0x11858594
848         data4   0x8a4545cf,0x8a4545cf, 0xe9f9f910,0xe9f9f910
849         data4   0x04020206,0x04020206, 0xfe7f7f81,0xfe7f7f81
850         data4   0xa05050f0,0xa05050f0, 0x783c3c44,0x783c3c44
851         data4   0x259f9fba,0x259f9fba, 0x4ba8a8e3,0x4ba8a8e3
852         data4   0xa25151f3,0xa25151f3, 0x5da3a3fe,0x5da3a3fe
853         data4   0x804040c0,0x804040c0, 0x058f8f8a,0x058f8f8a
854         data4   0x3f9292ad,0x3f9292ad, 0x219d9dbc,0x219d9dbc
855         data4   0x70383848,0x70383848, 0xf1f5f504,0xf1f5f504
856         data4   0x63bcbcdf,0x63bcbcdf, 0x77b6b6c1,0x77b6b6c1
857         data4   0xafdada75,0xafdada75, 0x42212163,0x42212163
858         data4   0x20101030,0x20101030, 0xe5ffff1a,0xe5ffff1a
859         data4   0xfdf3f30e,0xfdf3f30e, 0xbfd2d26d,0xbfd2d26d
860         data4   0x81cdcd4c,0x81cdcd4c, 0x180c0c14,0x180c0c14
861         data4   0x26131335,0x26131335, 0xc3ecec2f,0xc3ecec2f
862         data4   0xbe5f5fe1,0xbe5f5fe1, 0x359797a2,0x359797a2
863         data4   0x884444cc,0x884444cc, 0x2e171739,0x2e171739
864         data4   0x93c4c457,0x93c4c457, 0x55a7a7f2,0x55a7a7f2
865         data4   0xfc7e7e82,0xfc7e7e82, 0x7a3d3d47,0x7a3d3d47
866         data4   0xc86464ac,0xc86464ac, 0xba5d5de7,0xba5d5de7
867         data4   0x3219192b,0x3219192b, 0xe6737395,0xe6737395
868         data4   0xc06060a0,0xc06060a0, 0x19818198,0x19818198
869         data4   0x9e4f4fd1,0x9e4f4fd1, 0xa3dcdc7f,0xa3dcdc7f
870         data4   0x44222266,0x44222266, 0x542a2a7e,0x542a2a7e
871         data4   0x3b9090ab,0x3b9090ab, 0x0b888883,0x0b888883
872         data4   0x8c4646ca,0x8c4646ca, 0xc7eeee29,0xc7eeee29
873         data4   0x6bb8b8d3,0x6bb8b8d3, 0x2814143c,0x2814143c
874         data4   0xa7dede79,0xa7dede79, 0xbc5e5ee2,0xbc5e5ee2
875         data4   0x160b0b1d,0x160b0b1d, 0xaddbdb76,0xaddbdb76
876         data4   0xdbe0e03b,0xdbe0e03b, 0x64323256,0x64323256
877         data4   0x743a3a4e,0x743a3a4e, 0x140a0a1e,0x140a0a1e
878         data4   0x924949db,0x924949db, 0x0c06060a,0x0c06060a
879         data4   0x4824246c,0x4824246c, 0xb85c5ce4,0xb85c5ce4
880         data4   0x9fc2c25d,0x9fc2c25d, 0xbdd3d36e,0xbdd3d36e
881         data4   0x43acacef,0x43acacef, 0xc46262a6,0xc46262a6
882         data4   0x399191a8,0x399191a8, 0x319595a4,0x319595a4
883         data4   0xd3e4e437,0xd3e4e437, 0xf279798b,0xf279798b
884         data4   0xd5e7e732,0xd5e7e732, 0x8bc8c843,0x8bc8c843
885         data4   0x6e373759,0x6e373759, 0xda6d6db7,0xda6d6db7
886         data4   0x018d8d8c,0x018d8d8c, 0xb1d5d564,0xb1d5d564
887         data4   0x9c4e4ed2,0x9c4e4ed2, 0x49a9a9e0,0x49a9a9e0
888         data4   0xd86c6cb4,0xd86c6cb4, 0xac5656fa,0xac5656fa
889         data4   0xf3f4f407,0xf3f4f407, 0xcfeaea25,0xcfeaea25
890         data4   0xca6565af,0xca6565af, 0xf47a7a8e,0xf47a7a8e
891         data4   0x47aeaee9,0x47aeaee9, 0x10080818,0x10080818
892         data4   0x6fbabad5,0x6fbabad5, 0xf0787888,0xf0787888
893         data4   0x4a25256f,0x4a25256f, 0x5c2e2e72,0x5c2e2e72
894         data4   0x381c1c24,0x381c1c24, 0x57a6a6f1,0x57a6a6f1
895         data4   0x73b4b4c7,0x73b4b4c7, 0x97c6c651,0x97c6c651
896         data4   0xcbe8e823,0xcbe8e823, 0xa1dddd7c,0xa1dddd7c
897         data4   0xe874749c,0xe874749c, 0x3e1f1f21,0x3e1f1f21
898         data4   0x964b4bdd,0x964b4bdd, 0x61bdbddc,0x61bdbddc
899         data4   0x0d8b8b86,0x0d8b8b86, 0x0f8a8a85,0x0f8a8a85
900         data4   0xe0707090,0xe0707090, 0x7c3e3e42,0x7c3e3e42
901         data4   0x71b5b5c4,0x71b5b5c4, 0xcc6666aa,0xcc6666aa
902         data4   0x904848d8,0x904848d8, 0x06030305,0x06030305
903         data4   0xf7f6f601,0xf7f6f601, 0x1c0e0e12,0x1c0e0e12
904         data4   0xc26161a3,0xc26161a3, 0x6a35355f,0x6a35355f
905         data4   0xae5757f9,0xae5757f9, 0x69b9b9d0,0x69b9b9d0
906         data4   0x17868691,0x17868691, 0x99c1c158,0x99c1c158
907         data4   0x3a1d1d27,0x3a1d1d27, 0x279e9eb9,0x279e9eb9
908         data4   0xd9e1e138,0xd9e1e138, 0xebf8f813,0xebf8f813
909         data4   0x2b9898b3,0x2b9898b3, 0x22111133,0x22111133
910         data4   0xd26969bb,0xd26969bb, 0xa9d9d970,0xa9d9d970
911         data4   0x078e8e89,0x078e8e89, 0x339494a7,0x339494a7
912         data4   0x2d9b9bb6,0x2d9b9bb6, 0x3c1e1e22,0x3c1e1e22
913         data4   0x15878792,0x15878792, 0xc9e9e920,0xc9e9e920
914         data4   0x87cece49,0x87cece49, 0xaa5555ff,0xaa5555ff
915         data4   0x50282878,0x50282878, 0xa5dfdf7a,0xa5dfdf7a
916         data4   0x038c8c8f,0x038c8c8f, 0x59a1a1f8,0x59a1a1f8
917         data4   0x09898980,0x09898980, 0x1a0d0d17,0x1a0d0d17
918         data4   0x65bfbfda,0x65bfbfda, 0xd7e6e631,0xd7e6e631
919         data4   0x844242c6,0x844242c6, 0xd06868b8,0xd06868b8
920         data4   0x824141c3,0x824141c3, 0x299999b0,0x299999b0
921         data4   0x5a2d2d77,0x5a2d2d77, 0x1e0f0f11,0x1e0f0f11
922         data4   0x7bb0b0cb,0x7bb0b0cb, 0xa85454fc,0xa85454fc
923         data4   0x6dbbbbd6,0x6dbbbbd6, 0x2c16163a,0x2c16163a
924 // Te4:
925         data1   0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
926         data1   0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
927         data1   0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
928         data1   0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
929         data1   0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
930         data1   0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
931         data1   0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
932         data1   0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
933         data1   0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
934         data1   0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
935         data1   0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
936         data1   0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
937         data1   0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
938         data1   0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
939         data1   0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
940         data1   0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
941         data1   0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
942         data1   0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
943         data1   0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
944         data1   0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
945         data1   0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
946         data1   0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
947         data1   0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
948         data1   0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
949         data1   0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
950         data1   0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
951         data1   0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
952         data1   0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
953         data1   0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
954         data1   0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
955         data1   0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
956         data1   0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
957 .size   AES_Te#,2048+256        // HP-UX assembler fails to ".-AES_Te#"
958
959 .align  64
960 .global AES_Td#
961 .type   AES_Td#,@object
962 AES_Td: data4   0x51f4a750,0x51f4a750, 0x7e416553,0x7e416553
963         data4   0x1a17a4c3,0x1a17a4c3, 0x3a275e96,0x3a275e96
964         data4   0x3bab6bcb,0x3bab6bcb, 0x1f9d45f1,0x1f9d45f1
965         data4   0xacfa58ab,0xacfa58ab, 0x4be30393,0x4be30393
966         data4   0x2030fa55,0x2030fa55, 0xad766df6,0xad766df6
967         data4   0x88cc7691,0x88cc7691, 0xf5024c25,0xf5024c25
968         data4   0x4fe5d7fc,0x4fe5d7fc, 0xc52acbd7,0xc52acbd7
969         data4   0x26354480,0x26354480, 0xb562a38f,0xb562a38f
970         data4   0xdeb15a49,0xdeb15a49, 0x25ba1b67,0x25ba1b67
971         data4   0x45ea0e98,0x45ea0e98, 0x5dfec0e1,0x5dfec0e1
972         data4   0xc32f7502,0xc32f7502, 0x814cf012,0x814cf012
973         data4   0x8d4697a3,0x8d4697a3, 0x6bd3f9c6,0x6bd3f9c6
974         data4   0x038f5fe7,0x038f5fe7, 0x15929c95,0x15929c95
975         data4   0xbf6d7aeb,0xbf6d7aeb, 0x955259da,0x955259da
976         data4   0xd4be832d,0xd4be832d, 0x587421d3,0x587421d3
977         data4   0x49e06929,0x49e06929, 0x8ec9c844,0x8ec9c844
978         data4   0x75c2896a,0x75c2896a, 0xf48e7978,0xf48e7978
979         data4   0x99583e6b,0x99583e6b, 0x27b971dd,0x27b971dd
980         data4   0xbee14fb6,0xbee14fb6, 0xf088ad17,0xf088ad17
981         data4   0xc920ac66,0xc920ac66, 0x7dce3ab4,0x7dce3ab4
982         data4   0x63df4a18,0x63df4a18, 0xe51a3182,0xe51a3182
983         data4   0x97513360,0x97513360, 0x62537f45,0x62537f45
984         data4   0xb16477e0,0xb16477e0, 0xbb6bae84,0xbb6bae84
985         data4   0xfe81a01c,0xfe81a01c, 0xf9082b94,0xf9082b94
986         data4   0x70486858,0x70486858, 0x8f45fd19,0x8f45fd19
987         data4   0x94de6c87,0x94de6c87, 0x527bf8b7,0x527bf8b7
988         data4   0xab73d323,0xab73d323, 0x724b02e2,0x724b02e2
989         data4   0xe31f8f57,0xe31f8f57, 0x6655ab2a,0x6655ab2a
990         data4   0xb2eb2807,0xb2eb2807, 0x2fb5c203,0x2fb5c203
991         data4   0x86c57b9a,0x86c57b9a, 0xd33708a5,0xd33708a5
992         data4   0x302887f2,0x302887f2, 0x23bfa5b2,0x23bfa5b2
993         data4   0x02036aba,0x02036aba, 0xed16825c,0xed16825c
994         data4   0x8acf1c2b,0x8acf1c2b, 0xa779b492,0xa779b492
995         data4   0xf307f2f0,0xf307f2f0, 0x4e69e2a1,0x4e69e2a1
996         data4   0x65daf4cd,0x65daf4cd, 0x0605bed5,0x0605bed5
997         data4   0xd134621f,0xd134621f, 0xc4a6fe8a,0xc4a6fe8a
998         data4   0x342e539d,0x342e539d, 0xa2f355a0,0xa2f355a0
999         data4   0x058ae132,0x058ae132, 0xa4f6eb75,0xa4f6eb75
1000         data4   0x0b83ec39,0x0b83ec39, 0x4060efaa,0x4060efaa
1001         data4   0x5e719f06,0x5e719f06, 0xbd6e1051,0xbd6e1051
1002         data4   0x3e218af9,0x3e218af9, 0x96dd063d,0x96dd063d
1003         data4   0xdd3e05ae,0xdd3e05ae, 0x4de6bd46,0x4de6bd46
1004         data4   0x91548db5,0x91548db5, 0x71c45d05,0x71c45d05
1005         data4   0x0406d46f,0x0406d46f, 0x605015ff,0x605015ff
1006         data4   0x1998fb24,0x1998fb24, 0xd6bde997,0xd6bde997
1007         data4   0x894043cc,0x894043cc, 0x67d99e77,0x67d99e77
1008         data4   0xb0e842bd,0xb0e842bd, 0x07898b88,0x07898b88
1009         data4   0xe7195b38,0xe7195b38, 0x79c8eedb,0x79c8eedb
1010         data4   0xa17c0a47,0xa17c0a47, 0x7c420fe9,0x7c420fe9
1011         data4   0xf8841ec9,0xf8841ec9, 0x00000000,0x00000000
1012         data4   0x09808683,0x09808683, 0x322bed48,0x322bed48
1013         data4   0x1e1170ac,0x1e1170ac, 0x6c5a724e,0x6c5a724e
1014         data4   0xfd0efffb,0xfd0efffb, 0x0f853856,0x0f853856
1015         data4   0x3daed51e,0x3daed51e, 0x362d3927,0x362d3927
1016         data4   0x0a0fd964,0x0a0fd964, 0x685ca621,0x685ca621
1017         data4   0x9b5b54d1,0x9b5b54d1, 0x24362e3a,0x24362e3a
1018         data4   0x0c0a67b1,0x0c0a67b1, 0x9357e70f,0x9357e70f
1019         data4   0xb4ee96d2,0xb4ee96d2, 0x1b9b919e,0x1b9b919e
1020         data4   0x80c0c54f,0x80c0c54f, 0x61dc20a2,0x61dc20a2
1021         data4   0x5a774b69,0x5a774b69, 0x1c121a16,0x1c121a16
1022         data4   0xe293ba0a,0xe293ba0a, 0xc0a02ae5,0xc0a02ae5
1023         data4   0x3c22e043,0x3c22e043, 0x121b171d,0x121b171d
1024         data4   0x0e090d0b,0x0e090d0b, 0xf28bc7ad,0xf28bc7ad
1025         data4   0x2db6a8b9,0x2db6a8b9, 0x141ea9c8,0x141ea9c8
1026         data4   0x57f11985,0x57f11985, 0xaf75074c,0xaf75074c
1027         data4   0xee99ddbb,0xee99ddbb, 0xa37f60fd,0xa37f60fd
1028         data4   0xf701269f,0xf701269f, 0x5c72f5bc,0x5c72f5bc
1029         data4   0x44663bc5,0x44663bc5, 0x5bfb7e34,0x5bfb7e34
1030         data4   0x8b432976,0x8b432976, 0xcb23c6dc,0xcb23c6dc
1031         data4   0xb6edfc68,0xb6edfc68, 0xb8e4f163,0xb8e4f163
1032         data4   0xd731dcca,0xd731dcca, 0x42638510,0x42638510
1033         data4   0x13972240,0x13972240, 0x84c61120,0x84c61120
1034         data4   0x854a247d,0x854a247d, 0xd2bb3df8,0xd2bb3df8
1035         data4   0xaef93211,0xaef93211, 0xc729a16d,0xc729a16d
1036         data4   0x1d9e2f4b,0x1d9e2f4b, 0xdcb230f3,0xdcb230f3
1037         data4   0x0d8652ec,0x0d8652ec, 0x77c1e3d0,0x77c1e3d0
1038         data4   0x2bb3166c,0x2bb3166c, 0xa970b999,0xa970b999
1039         data4   0x119448fa,0x119448fa, 0x47e96422,0x47e96422
1040         data4   0xa8fc8cc4,0xa8fc8cc4, 0xa0f03f1a,0xa0f03f1a
1041         data4   0x567d2cd8,0x567d2cd8, 0x223390ef,0x223390ef
1042         data4   0x87494ec7,0x87494ec7, 0xd938d1c1,0xd938d1c1
1043         data4   0x8ccaa2fe,0x8ccaa2fe, 0x98d40b36,0x98d40b36
1044         data4   0xa6f581cf,0xa6f581cf, 0xa57ade28,0xa57ade28
1045         data4   0xdab78e26,0xdab78e26, 0x3fadbfa4,0x3fadbfa4
1046         data4   0x2c3a9de4,0x2c3a9de4, 0x5078920d,0x5078920d
1047         data4   0x6a5fcc9b,0x6a5fcc9b, 0x547e4662,0x547e4662
1048         data4   0xf68d13c2,0xf68d13c2, 0x90d8b8e8,0x90d8b8e8
1049         data4   0x2e39f75e,0x2e39f75e, 0x82c3aff5,0x82c3aff5
1050         data4   0x9f5d80be,0x9f5d80be, 0x69d0937c,0x69d0937c
1051         data4   0x6fd52da9,0x6fd52da9, 0xcf2512b3,0xcf2512b3
1052         data4   0xc8ac993b,0xc8ac993b, 0x10187da7,0x10187da7
1053         data4   0xe89c636e,0xe89c636e, 0xdb3bbb7b,0xdb3bbb7b
1054         data4   0xcd267809,0xcd267809, 0x6e5918f4,0x6e5918f4
1055         data4   0xec9ab701,0xec9ab701, 0x834f9aa8,0x834f9aa8
1056         data4   0xe6956e65,0xe6956e65, 0xaaffe67e,0xaaffe67e
1057         data4   0x21bccf08,0x21bccf08, 0xef15e8e6,0xef15e8e6
1058         data4   0xbae79bd9,0xbae79bd9, 0x4a6f36ce,0x4a6f36ce
1059         data4   0xea9f09d4,0xea9f09d4, 0x29b07cd6,0x29b07cd6
1060         data4   0x31a4b2af,0x31a4b2af, 0x2a3f2331,0x2a3f2331
1061         data4   0xc6a59430,0xc6a59430, 0x35a266c0,0x35a266c0
1062         data4   0x744ebc37,0x744ebc37, 0xfc82caa6,0xfc82caa6
1063         data4   0xe090d0b0,0xe090d0b0, 0x33a7d815,0x33a7d815
1064         data4   0xf104984a,0xf104984a, 0x41ecdaf7,0x41ecdaf7
1065         data4   0x7fcd500e,0x7fcd500e, 0x1791f62f,0x1791f62f
1066         data4   0x764dd68d,0x764dd68d, 0x43efb04d,0x43efb04d
1067         data4   0xccaa4d54,0xccaa4d54, 0xe49604df,0xe49604df
1068         data4   0x9ed1b5e3,0x9ed1b5e3, 0x4c6a881b,0x4c6a881b
1069         data4   0xc12c1fb8,0xc12c1fb8, 0x4665517f,0x4665517f
1070         data4   0x9d5eea04,0x9d5eea04, 0x018c355d,0x018c355d
1071         data4   0xfa877473,0xfa877473, 0xfb0b412e,0xfb0b412e
1072         data4   0xb3671d5a,0xb3671d5a, 0x92dbd252,0x92dbd252
1073         data4   0xe9105633,0xe9105633, 0x6dd64713,0x6dd64713
1074         data4   0x9ad7618c,0x9ad7618c, 0x37a10c7a,0x37a10c7a
1075         data4   0x59f8148e,0x59f8148e, 0xeb133c89,0xeb133c89
1076         data4   0xcea927ee,0xcea927ee, 0xb761c935,0xb761c935
1077         data4   0xe11ce5ed,0xe11ce5ed, 0x7a47b13c,0x7a47b13c
1078         data4   0x9cd2df59,0x9cd2df59, 0x55f2733f,0x55f2733f
1079         data4   0x1814ce79,0x1814ce79, 0x73c737bf,0x73c737bf
1080         data4   0x53f7cdea,0x53f7cdea, 0x5ffdaa5b,0x5ffdaa5b
1081         data4   0xdf3d6f14,0xdf3d6f14, 0x7844db86,0x7844db86
1082         data4   0xcaaff381,0xcaaff381, 0xb968c43e,0xb968c43e
1083         data4   0x3824342c,0x3824342c, 0xc2a3405f,0xc2a3405f
1084         data4   0x161dc372,0x161dc372, 0xbce2250c,0xbce2250c
1085         data4   0x283c498b,0x283c498b, 0xff0d9541,0xff0d9541
1086         data4   0x39a80171,0x39a80171, 0x080cb3de,0x080cb3de
1087         data4   0xd8b4e49c,0xd8b4e49c, 0x6456c190,0x6456c190
1088         data4   0x7bcb8461,0x7bcb8461, 0xd532b670,0xd532b670
1089         data4   0x486c5c74,0x486c5c74, 0xd0b85742,0xd0b85742
1090 // Td4:
1091         data1   0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
1092         data1   0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
1093         data1   0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
1094         data1   0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
1095         data1   0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
1096         data1   0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
1097         data1   0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
1098         data1   0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
1099         data1   0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
1100         data1   0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
1101         data1   0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
1102         data1   0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
1103         data1   0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
1104         data1   0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
1105         data1   0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
1106         data1   0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
1107         data1   0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
1108         data1   0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
1109         data1   0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
1110         data1   0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
1111         data1   0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
1112         data1   0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
1113         data1   0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
1114         data1   0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
1115         data1   0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
1116         data1   0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
1117         data1   0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
1118         data1   0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
1119         data1   0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
1120         data1   0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
1121         data1   0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
1122         data1   0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
1123 .size   AES_Td#,2048+256        // HP-UX assembler fails to ".-AES_Td#"