Clean-up *_DEBUG options.
[openssl.git] / crypto / aes / aes_core.c
1 /**
2  * rijndael-alg-fst.c
3  *
4  * @version 3.0 (December 2000)
5  *
6  * Optimised ANSI C code for the Rijndael cipher (now AES)
7  *
8  * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
9  * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
10  * @author Paulo Barreto <paulo.barreto@terra.com.br>
11  *
12  * This code is hereby placed in the public domain.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
15  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
18  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
21  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
23  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26
27 /* Note: rewritten a little bit to provide error control and an OpenSSL-
28    compatible API */
29
30 #include <assert.h>
31
32 #include <stdlib.h>
33 #include <openssl/crypto.h>
34 #include <openssl/aes.h>
35 #include "aes_locl.h"
36
37 #ifndef AES_ASM
38 /*-
39 Te0[x] = S [x].[02, 01, 01, 03];
40 Te1[x] = S [x].[03, 02, 01, 01];
41 Te2[x] = S [x].[01, 03, 02, 01];
42 Te3[x] = S [x].[01, 01, 03, 02];
43
44 Td0[x] = Si[x].[0e, 09, 0d, 0b];
45 Td1[x] = Si[x].[0b, 0e, 09, 0d];
46 Td2[x] = Si[x].[0d, 0b, 0e, 09];
47 Td3[x] = Si[x].[09, 0d, 0b, 0e];
48 Td4[x] = Si[x].[01];
49 */
50
51 static const u32 Te0[256] = {
52     0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
53     0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
54     0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
55     0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
56     0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
57     0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
58     0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
59     0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
60     0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
61     0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
62     0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
63     0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
64     0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
65     0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
66     0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
67     0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
68     0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
69     0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
70     0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
71     0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
72     0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
73     0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
74     0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
75     0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
76     0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
77     0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
78     0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
79     0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
80     0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
81     0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
82     0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
83     0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
84     0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
85     0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
86     0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
87     0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
88     0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
89     0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
90     0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
91     0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
92     0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
93     0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
94     0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
95     0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
96     0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
97     0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
98     0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
99     0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
100     0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
101     0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
102     0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
103     0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
104     0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
105     0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
106     0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
107     0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
108     0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
109     0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
110     0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
111     0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
112     0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
113     0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
114     0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
115     0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
116 };
117 static const u32 Te1[256] = {
118     0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
119     0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
120     0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
121     0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
122     0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU,
123     0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
124     0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU,
125     0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
126     0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U,
127     0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
128     0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U,
129     0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
130     0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U,
131     0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
132     0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U,
133     0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
134     0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU,
135     0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
136     0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U,
137     0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
138     0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU,
139     0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
140     0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U,
141     0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
142     0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU,
143     0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
144     0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU,
145     0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
146     0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU,
147     0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
148     0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U,
149     0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
150     0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU,
151     0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
152     0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU,
153     0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
154     0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU,
155     0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
156     0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U,
157     0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
158     0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU,
159     0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
160     0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U,
161     0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
162     0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU,
163     0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
164     0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU,
165     0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
166     0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU,
167     0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
168     0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU,
169     0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
170     0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U,
171     0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
172     0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U,
173     0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
174     0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U,
175     0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
176     0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U,
177     0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
178     0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU,
179     0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
180     0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
181     0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U,
182 };
183 static const u32 Te2[256] = {
184     0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
185     0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
186     0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
187     0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
188     0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU,
189     0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
190     0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU,
191     0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
192     0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U,
193     0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
194     0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U,
195     0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
196     0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U,
197     0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
198     0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U,
199     0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
200     0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU,
201     0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
202     0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U,
203     0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
204     0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU,
205     0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
206     0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U,
207     0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
208     0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU,
209     0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
210     0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU,
211     0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
212     0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU,
213     0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
214     0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U,
215     0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
216     0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU,
217     0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
218     0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU,
219     0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
220     0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU,
221     0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
222     0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U,
223     0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
224     0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU,
225     0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
226     0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U,
227     0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
228     0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU,
229     0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
230     0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU,
231     0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
232     0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU,
233     0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
234     0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU,
235     0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
236     0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U,
237     0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
238     0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U,
239     0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
240     0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U,
241     0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
242     0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U,
243     0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
244     0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU,
245     0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
246     0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
247     0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
248 };
249 static const u32 Te3[256] = {
250     0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
251     0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
252     0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
253     0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
254     0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU,
255     0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
256     0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U,
257     0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
258     0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU,
259     0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
260     0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U,
261     0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
262     0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU,
263     0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
264     0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU,
265     0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
266     0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U,
267     0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
268     0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU,
269     0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
270     0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U,
271     0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
272     0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U,
273     0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
274     0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU,
275     0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
276     0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU,
277     0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
278     0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U,
279     0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
280     0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U,
281     0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
282     0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U,
283     0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
284     0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU,
285     0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
286     0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U,
287     0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
288     0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U,
289     0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
290     0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U,
291     0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
292     0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U,
293     0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
294     0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU,
295     0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
296     0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU,
297     0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
298     0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU,
299     0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
300     0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU,
301     0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
302     0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU,
303     0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
304     0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U,
305     0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
306     0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U,
307     0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
308     0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U,
309     0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
310     0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU,
311     0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
312     0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
313     0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
314 };
315
316 static const u32 Td0[256] = {
317     0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
318     0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
319     0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
320     0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
321     0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
322     0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
323     0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
324     0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
325     0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
326     0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
327     0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
328     0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
329     0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
330     0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
331     0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
332     0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
333     0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
334     0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
335     0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
336     0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
337     0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
338     0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
339     0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
340     0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
341     0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
342     0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
343     0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
344     0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
345     0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
346     0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
347     0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
348     0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
349     0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
350     0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
351     0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
352     0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
353     0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
354     0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
355     0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
356     0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
357     0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
358     0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
359     0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
360     0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
361     0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
362     0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
363     0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
364     0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
365     0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
366     0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
367     0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
368     0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
369     0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
370     0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
371     0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
372     0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
373     0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
374     0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
375     0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
376     0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
377     0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
378     0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
379     0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
380     0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
381 };
382 static const u32 Td1[256] = {
383     0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
384     0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
385     0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
386     0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
387     0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U,
388     0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
389     0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U,
390     0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
391     0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U,
392     0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
393     0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU,
394     0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
395     0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U,
396     0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
397     0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U,
398     0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
399     0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U,
400     0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
401     0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU,
402     0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
403     0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU,
404     0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
405     0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU,
406     0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
407     0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U,
408     0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
409     0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U,
410     0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
411     0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U,
412     0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
413     0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U,
414     0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
415     0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U,
416     0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
417     0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U,
418     0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
419     0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U,
420     0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
421     0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U,
422     0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
423     0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU,
424     0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
425     0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U,
426     0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
427     0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U,
428     0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
429     0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU,
430     0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
431     0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU,
432     0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
433     0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U,
434     0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
435     0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U,
436     0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
437     0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U,
438     0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
439     0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU,
440     0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
441     0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U,
442     0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
443     0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U,
444     0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
445     0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
446     0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U,
447 };
448 static const u32 Td2[256] = {
449     0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
450     0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
451     0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
452     0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
453     0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU,
454     0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
455     0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U,
456     0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
457     0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U,
458     0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
459     0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U,
460     0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
461     0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU,
462     0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
463     0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U,
464     0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
465     0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U,
466     0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
467     0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
468     0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
469     0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
470     0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
471     0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
472     0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
473     0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U,
474     0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
475     0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU,
476     0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
477     0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU,
478     0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
479     0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU,
480     0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
481     0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU,
482     0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
483     0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U,
484     0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
485     0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U,
486     0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
487     0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U,
488     0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
489     0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U,
490     0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
491     0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU,
492     0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
493     0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U,
494     0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
495     0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU,
496     0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
497     0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U,
498     0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
499     0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U,
500     0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
501     0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U,
502     0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
503     0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU,
504     0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
505     0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U,
506     0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
507     0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U,
508     0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
509     0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U,
510     0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
511     0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
512     0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U,
513 };
514 static const u32 Td3[256] = {
515     0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
516     0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
517     0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
518     0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
519     0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU,
520     0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
521     0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U,
522     0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
523     0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U,
524     0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
525     0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U,
526     0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
527     0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U,
528     0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
529     0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U,
530     0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
531     0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU,
532     0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
533     0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U,
534     0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
535     0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU,
536     0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
537     0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U,
538     0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
539     0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U,
540     0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
541     0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U,
542     0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
543     0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU,
544     0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
545     0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U,
546     0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
547     0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U,
548     0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
549     0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U,
550     0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
551     0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U,
552     0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
553     0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U,
554     0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
555     0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U,
556     0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
557     0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U,
558     0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
559     0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU,
560     0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
561     0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U,
562     0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
563     0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U,
564     0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
565     0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U,
566     0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
567     0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U,
568     0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
569     0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU,
570     0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
571     0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU,
572     0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
573     0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U,
574     0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
575     0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U,
576     0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
577     0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
578     0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
579 };
580 static const u8 Td4[256] = {
581     0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U,
582     0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU,
583     0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U,
584     0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU,
585     0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU,
586     0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU,
587     0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U,
588     0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U,
589     0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U,
590     0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U,
591     0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU,
592     0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U,
593     0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU,
594     0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U,
595     0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U,
596     0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU,
597     0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU,
598     0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U,
599     0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U,
600     0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU,
601     0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U,
602     0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU,
603     0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U,
604     0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U,
605     0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U,
606     0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU,
607     0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU,
608     0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU,
609     0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U,
610     0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U,
611     0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U,
612     0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU,
613 };
614 static const u32 rcon[] = {
615     0x01000000, 0x02000000, 0x04000000, 0x08000000,
616     0x10000000, 0x20000000, 0x40000000, 0x80000000,
617     0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
618 };
619
620 /**
621  * Expand the cipher key into the encryption key schedule.
622  */
623 int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
624                         AES_KEY *key)
625 {
626
627     u32 *rk;
628     int i = 0;
629     u32 temp;
630
631     if (!userKey || !key)
632         return -1;
633     if (bits != 128 && bits != 192 && bits != 256)
634         return -2;
635
636     rk = key->rd_key;
637
638     if (bits==128)
639         key->rounds = 10;
640     else if (bits==192)
641         key->rounds = 12;
642     else
643         key->rounds = 14;
644
645     rk[0] = GETU32(userKey     );
646     rk[1] = GETU32(userKey +  4);
647     rk[2] = GETU32(userKey +  8);
648     rk[3] = GETU32(userKey + 12);
649     if (bits == 128) {
650         while (1) {
651             temp  = rk[3];
652             rk[4] = rk[0] ^
653                 (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
654                 (Te3[(temp >>  8) & 0xff] & 0x00ff0000) ^
655                 (Te0[(temp      ) & 0xff] & 0x0000ff00) ^
656                 (Te1[(temp >> 24)       ] & 0x000000ff) ^
657                 rcon[i];
658             rk[5] = rk[1] ^ rk[4];
659             rk[6] = rk[2] ^ rk[5];
660             rk[7] = rk[3] ^ rk[6];
661             if (++i == 10) {
662                 return 0;
663             }
664             rk += 4;
665         }
666     }
667     rk[4] = GETU32(userKey + 16);
668     rk[5] = GETU32(userKey + 20);
669     if (bits == 192) {
670         while (1) {
671             temp = rk[ 5];
672             rk[ 6] = rk[ 0] ^
673                 (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
674                 (Te3[(temp >>  8) & 0xff] & 0x00ff0000) ^
675                 (Te0[(temp      ) & 0xff] & 0x0000ff00) ^
676                 (Te1[(temp >> 24)       ] & 0x000000ff) ^
677                 rcon[i];
678             rk[ 7] = rk[ 1] ^ rk[ 6];
679             rk[ 8] = rk[ 2] ^ rk[ 7];
680             rk[ 9] = rk[ 3] ^ rk[ 8];
681             if (++i == 8) {
682                 return 0;
683             }
684             rk[10] = rk[ 4] ^ rk[ 9];
685             rk[11] = rk[ 5] ^ rk[10];
686             rk += 6;
687         }
688     }
689     rk[6] = GETU32(userKey + 24);
690     rk[7] = GETU32(userKey + 28);
691     if (bits == 256) {
692         while (1) {
693             temp = rk[ 7];
694             rk[ 8] = rk[ 0] ^
695                 (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
696                 (Te3[(temp >>  8) & 0xff] & 0x00ff0000) ^
697                 (Te0[(temp      ) & 0xff] & 0x0000ff00) ^
698                 (Te1[(temp >> 24)       ] & 0x000000ff) ^
699                 rcon[i];
700             rk[ 9] = rk[ 1] ^ rk[ 8];
701             rk[10] = rk[ 2] ^ rk[ 9];
702             rk[11] = rk[ 3] ^ rk[10];
703             if (++i == 7) {
704                 return 0;
705             }
706             temp = rk[11];
707             rk[12] = rk[ 4] ^
708                 (Te2[(temp >> 24)       ] & 0xff000000) ^
709                 (Te3[(temp >> 16) & 0xff] & 0x00ff0000) ^
710                 (Te0[(temp >>  8) & 0xff] & 0x0000ff00) ^
711                 (Te1[(temp      ) & 0xff] & 0x000000ff);
712             rk[13] = rk[ 5] ^ rk[12];
713             rk[14] = rk[ 6] ^ rk[13];
714             rk[15] = rk[ 7] ^ rk[14];
715
716             rk += 8;
717             }
718     }
719     return 0;
720 }
721
722 /**
723  * Expand the cipher key into the decryption key schedule.
724  */
725 int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
726                         AES_KEY *key)
727 {
728
729     u32 *rk;
730     int i, j, status;
731     u32 temp;
732
733     /* first, start with an encryption schedule */
734     status = AES_set_encrypt_key(userKey, bits, key);
735     if (status < 0)
736         return status;
737
738     rk = key->rd_key;
739
740     /* invert the order of the round keys: */
741     for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
742         temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
743         temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
744         temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
745         temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
746     }
747     /* apply the inverse MixColumn transform to all round keys but the first and the last: */
748     for (i = 1; i < (key->rounds); i++) {
749         rk += 4;
750         rk[0] =
751             Td0[Te1[(rk[0] >> 24)       ] & 0xff] ^
752             Td1[Te1[(rk[0] >> 16) & 0xff] & 0xff] ^
753             Td2[Te1[(rk[0] >>  8) & 0xff] & 0xff] ^
754             Td3[Te1[(rk[0]      ) & 0xff] & 0xff];
755         rk[1] =
756             Td0[Te1[(rk[1] >> 24)       ] & 0xff] ^
757             Td1[Te1[(rk[1] >> 16) & 0xff] & 0xff] ^
758             Td2[Te1[(rk[1] >>  8) & 0xff] & 0xff] ^
759             Td3[Te1[(rk[1]      ) & 0xff] & 0xff];
760         rk[2] =
761             Td0[Te1[(rk[2] >> 24)       ] & 0xff] ^
762             Td1[Te1[(rk[2] >> 16) & 0xff] & 0xff] ^
763             Td2[Te1[(rk[2] >>  8) & 0xff] & 0xff] ^
764             Td3[Te1[(rk[2]      ) & 0xff] & 0xff];
765         rk[3] =
766             Td0[Te1[(rk[3] >> 24)       ] & 0xff] ^
767             Td1[Te1[(rk[3] >> 16) & 0xff] & 0xff] ^
768             Td2[Te1[(rk[3] >>  8) & 0xff] & 0xff] ^
769             Td3[Te1[(rk[3]      ) & 0xff] & 0xff];
770     }
771     return 0;
772 }
773
774 /*
775  * Encrypt a single block
776  * in and out can overlap
777  */
778 void AES_encrypt(const unsigned char *in, unsigned char *out,
779                  const AES_KEY *key) {
780
781     const u32 *rk;
782     u32 s0, s1, s2, s3, t0, t1, t2, t3;
783 #ifndef FULL_UNROLL
784     int r;
785 #endif /* ?FULL_UNROLL */
786
787     assert(in && out && key);
788     rk = key->rd_key;
789
790     /*
791      * map byte array block to cipher state
792      * and add initial round key:
793      */
794     s0 = GETU32(in     ) ^ rk[0];
795     s1 = GETU32(in +  4) ^ rk[1];
796     s2 = GETU32(in +  8) ^ rk[2];
797     s3 = GETU32(in + 12) ^ rk[3];
798 #ifdef FULL_UNROLL
799     /* round 1: */
800     t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4];
801     t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5];
802     t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6];
803     t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7];
804     /* round 2: */
805     s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 8];
806     s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 9];
807     s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[10];
808     s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[11];
809     /* round 3: */
810     t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[12];
811     t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[13];
812     t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[14];
813     t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[15];
814     /* round 4: */
815     s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[16];
816     s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[17];
817     s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[18];
818     s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[19];
819     /* round 5: */
820     t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[20];
821     t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[21];
822     t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[22];
823     t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[23];
824     /* round 6: */
825     s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[24];
826     s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[25];
827     s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[26];
828     s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[27];
829     /* round 7: */
830     t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[28];
831     t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[29];
832     t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[30];
833     t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[31];
834     /* round 8: */
835     s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[32];
836     s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[33];
837     s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[34];
838     s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[35];
839     /* round 9: */
840     t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[36];
841     t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[37];
842     t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[38];
843     t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[39];
844     if (key->rounds > 10) {
845         /* round 10: */
846         s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[40];
847         s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[41];
848         s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[42];
849         s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[43];
850         /* round 11: */
851         t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[44];
852         t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[45];
853         t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[46];
854         t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[47];
855         if (key->rounds > 12) {
856             /* round 12: */
857             s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[48];
858             s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[49];
859             s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[50];
860             s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[51];
861             /* round 13: */
862             t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[52];
863             t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[53];
864             t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[54];
865             t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[55];
866         }
867     }
868     rk += key->rounds << 2;
869 #else  /* !FULL_UNROLL */
870     /*
871      * Nr - 1 full rounds:
872      */
873     r = key->rounds >> 1;
874     for (;;) {
875         t0 =
876             Te0[(s0 >> 24)       ] ^
877             Te1[(s1 >> 16) & 0xff] ^
878             Te2[(s2 >>  8) & 0xff] ^
879             Te3[(s3      ) & 0xff] ^
880             rk[4];
881         t1 =
882             Te0[(s1 >> 24)       ] ^
883             Te1[(s2 >> 16) & 0xff] ^
884             Te2[(s3 >>  8) & 0xff] ^
885             Te3[(s0      ) & 0xff] ^
886             rk[5];
887         t2 =
888             Te0[(s2 >> 24)       ] ^
889             Te1[(s3 >> 16) & 0xff] ^
890             Te2[(s0 >>  8) & 0xff] ^
891             Te3[(s1      ) & 0xff] ^
892             rk[6];
893         t3 =
894             Te0[(s3 >> 24)       ] ^
895             Te1[(s0 >> 16) & 0xff] ^
896             Te2[(s1 >>  8) & 0xff] ^
897             Te3[(s2      ) & 0xff] ^
898             rk[7];
899
900         rk += 8;
901         if (--r == 0) {
902             break;
903         }
904
905         s0 =
906             Te0[(t0 >> 24)       ] ^
907             Te1[(t1 >> 16) & 0xff] ^
908             Te2[(t2 >>  8) & 0xff] ^
909             Te3[(t3      ) & 0xff] ^
910             rk[0];
911         s1 =
912             Te0[(t1 >> 24)       ] ^
913             Te1[(t2 >> 16) & 0xff] ^
914             Te2[(t3 >>  8) & 0xff] ^
915             Te3[(t0      ) & 0xff] ^
916             rk[1];
917         s2 =
918             Te0[(t2 >> 24)       ] ^
919             Te1[(t3 >> 16) & 0xff] ^
920             Te2[(t0 >>  8) & 0xff] ^
921             Te3[(t1      ) & 0xff] ^
922             rk[2];
923         s3 =
924             Te0[(t3 >> 24)       ] ^
925             Te1[(t0 >> 16) & 0xff] ^
926             Te2[(t1 >>  8) & 0xff] ^
927             Te3[(t2      ) & 0xff] ^
928             rk[3];
929     }
930 #endif /* ?FULL_UNROLL */
931     /*
932      * apply last round and
933      * map cipher state to byte array block:
934      */
935     s0 =
936         (Te2[(t0 >> 24)       ] & 0xff000000) ^
937         (Te3[(t1 >> 16) & 0xff] & 0x00ff0000) ^
938         (Te0[(t2 >>  8) & 0xff] & 0x0000ff00) ^
939         (Te1[(t3      ) & 0xff] & 0x000000ff) ^
940         rk[0];
941     PUTU32(out     , s0);
942     s1 =
943         (Te2[(t1 >> 24)       ] & 0xff000000) ^
944         (Te3[(t2 >> 16) & 0xff] & 0x00ff0000) ^
945         (Te0[(t3 >>  8) & 0xff] & 0x0000ff00) ^
946         (Te1[(t0      ) & 0xff] & 0x000000ff) ^
947         rk[1];
948     PUTU32(out +  4, s1);
949     s2 =
950         (Te2[(t2 >> 24)       ] & 0xff000000) ^
951         (Te3[(t3 >> 16) & 0xff] & 0x00ff0000) ^
952         (Te0[(t0 >>  8) & 0xff] & 0x0000ff00) ^
953         (Te1[(t1      ) & 0xff] & 0x000000ff) ^
954         rk[2];
955     PUTU32(out +  8, s2);
956     s3 =
957         (Te2[(t3 >> 24)       ] & 0xff000000) ^
958         (Te3[(t0 >> 16) & 0xff] & 0x00ff0000) ^
959         (Te0[(t1 >>  8) & 0xff] & 0x0000ff00) ^
960         (Te1[(t2      ) & 0xff] & 0x000000ff) ^
961         rk[3];
962     PUTU32(out + 12, s3);
963 }
964
965 /*
966  * Decrypt a single block
967  * in and out can overlap
968  */
969 void AES_decrypt(const unsigned char *in, unsigned char *out,
970                  const AES_KEY *key)
971 {
972
973     const u32 *rk;
974     u32 s0, s1, s2, s3, t0, t1, t2, t3;
975 #ifndef FULL_UNROLL
976     int r;
977 #endif /* ?FULL_UNROLL */
978
979     assert(in && out && key);
980     rk = key->rd_key;
981
982     /*
983      * map byte array block to cipher state
984      * and add initial round key:
985      */
986     s0 = GETU32(in     ) ^ rk[0];
987     s1 = GETU32(in +  4) ^ rk[1];
988     s2 = GETU32(in +  8) ^ rk[2];
989     s3 = GETU32(in + 12) ^ rk[3];
990 #ifdef FULL_UNROLL
991     /* round 1: */
992     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[ 4];
993     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[ 5];
994     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[ 6];
995     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[ 7];
996     /* round 2: */
997     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[ 8];
998     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[ 9];
999     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[10];
1000     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[11];
1001     /* round 3: */
1002     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[12];
1003     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[13];
1004     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[14];
1005     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[15];
1006     /* round 4: */
1007     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[16];
1008     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[17];
1009     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[18];
1010     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[19];
1011     /* round 5: */
1012     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[20];
1013     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[21];
1014     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[22];
1015     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[23];
1016     /* round 6: */
1017     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[24];
1018     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[25];
1019     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[26];
1020     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[27];
1021     /* round 7: */
1022     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[28];
1023     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[29];
1024     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[30];
1025     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[31];
1026     /* round 8: */
1027     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[32];
1028     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[33];
1029     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[34];
1030     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[35];
1031     /* round 9: */
1032     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[36];
1033     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[37];
1034     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[38];
1035     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[39];
1036     if (key->rounds > 10) {
1037         /* round 10: */
1038         s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[40];
1039         s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[41];
1040         s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[42];
1041         s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[43];
1042         /* round 11: */
1043         t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[44];
1044         t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[45];
1045         t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[46];
1046         t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[47];
1047         if (key->rounds > 12) {
1048             /* round 12: */
1049             s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[48];
1050             s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[49];
1051             s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[50];
1052             s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[51];
1053             /* round 13: */
1054             t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[52];
1055             t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[53];
1056             t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[54];
1057             t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[55];
1058         }
1059     }
1060     rk += key->rounds << 2;
1061 #else  /* !FULL_UNROLL */
1062     /*
1063      * Nr - 1 full rounds:
1064      */
1065     r = key->rounds >> 1;
1066     for (;;) {
1067         t0 =
1068             Td0[(s0 >> 24)       ] ^
1069             Td1[(s3 >> 16) & 0xff] ^
1070             Td2[(s2 >>  8) & 0xff] ^
1071             Td3[(s1      ) & 0xff] ^
1072             rk[4];
1073         t1 =
1074             Td0[(s1 >> 24)       ] ^
1075             Td1[(s0 >> 16) & 0xff] ^
1076             Td2[(s3 >>  8) & 0xff] ^
1077             Td3[(s2      ) & 0xff] ^
1078             rk[5];
1079         t2 =
1080             Td0[(s2 >> 24)       ] ^
1081             Td1[(s1 >> 16) & 0xff] ^
1082             Td2[(s0 >>  8) & 0xff] ^
1083             Td3[(s3      ) & 0xff] ^
1084             rk[6];
1085         t3 =
1086             Td0[(s3 >> 24)       ] ^
1087             Td1[(s2 >> 16) & 0xff] ^
1088             Td2[(s1 >>  8) & 0xff] ^
1089             Td3[(s0      ) & 0xff] ^
1090             rk[7];
1091
1092         rk += 8;
1093         if (--r == 0) {
1094             break;
1095         }
1096
1097         s0 =
1098             Td0[(t0 >> 24)       ] ^
1099             Td1[(t3 >> 16) & 0xff] ^
1100             Td2[(t2 >>  8) & 0xff] ^
1101             Td3[(t1      ) & 0xff] ^
1102             rk[0];
1103         s1 =
1104             Td0[(t1 >> 24)       ] ^
1105             Td1[(t0 >> 16) & 0xff] ^
1106             Td2[(t3 >>  8) & 0xff] ^
1107             Td3[(t2      ) & 0xff] ^
1108             rk[1];
1109         s2 =
1110             Td0[(t2 >> 24)       ] ^
1111             Td1[(t1 >> 16) & 0xff] ^
1112             Td2[(t0 >>  8) & 0xff] ^
1113             Td3[(t3      ) & 0xff] ^
1114             rk[2];
1115         s3 =
1116             Td0[(t3 >> 24)       ] ^
1117             Td1[(t2 >> 16) & 0xff] ^
1118             Td2[(t1 >>  8) & 0xff] ^
1119             Td3[(t0      ) & 0xff] ^
1120             rk[3];
1121     }
1122 #endif /* ?FULL_UNROLL */
1123     /*
1124      * apply last round and
1125      * map cipher state to byte array block:
1126      */
1127     s0 =
1128         ((u32)Td4[(t0 >> 24)       ] << 24) ^
1129         ((u32)Td4[(t3 >> 16) & 0xff] << 16) ^
1130         ((u32)Td4[(t2 >>  8) & 0xff] <<  8) ^
1131         ((u32)Td4[(t1      ) & 0xff])       ^
1132         rk[0];
1133     PUTU32(out     , s0);
1134     s1 =
1135         ((u32)Td4[(t1 >> 24)       ] << 24) ^
1136         ((u32)Td4[(t0 >> 16) & 0xff] << 16) ^
1137         ((u32)Td4[(t3 >>  8) & 0xff] <<  8) ^
1138         ((u32)Td4[(t2      ) & 0xff])       ^
1139         rk[1];
1140     PUTU32(out +  4, s1);
1141     s2 =
1142         ((u32)Td4[(t2 >> 24)       ] << 24) ^
1143         ((u32)Td4[(t1 >> 16) & 0xff] << 16) ^
1144         ((u32)Td4[(t0 >>  8) & 0xff] <<  8) ^
1145         ((u32)Td4[(t3      ) & 0xff])       ^
1146         rk[2];
1147     PUTU32(out +  8, s2);
1148     s3 =
1149         ((u32)Td4[(t3 >> 24)       ] << 24) ^
1150         ((u32)Td4[(t2 >> 16) & 0xff] << 16) ^
1151         ((u32)Td4[(t1 >>  8) & 0xff] <<  8) ^
1152         ((u32)Td4[(t0      ) & 0xff])       ^
1153         rk[3];
1154     PUTU32(out + 12, s3);
1155 }
1156
1157 #else /* AES_ASM */
1158
1159 static const u8 Te4[256] = {
1160     0x63U, 0x7cU, 0x77U, 0x7bU, 0xf2U, 0x6bU, 0x6fU, 0xc5U,
1161     0x30U, 0x01U, 0x67U, 0x2bU, 0xfeU, 0xd7U, 0xabU, 0x76U,
1162     0xcaU, 0x82U, 0xc9U, 0x7dU, 0xfaU, 0x59U, 0x47U, 0xf0U,
1163     0xadU, 0xd4U, 0xa2U, 0xafU, 0x9cU, 0xa4U, 0x72U, 0xc0U,
1164     0xb7U, 0xfdU, 0x93U, 0x26U, 0x36U, 0x3fU, 0xf7U, 0xccU,
1165     0x34U, 0xa5U, 0xe5U, 0xf1U, 0x71U, 0xd8U, 0x31U, 0x15U,
1166     0x04U, 0xc7U, 0x23U, 0xc3U, 0x18U, 0x96U, 0x05U, 0x9aU,
1167     0x07U, 0x12U, 0x80U, 0xe2U, 0xebU, 0x27U, 0xb2U, 0x75U,
1168     0x09U, 0x83U, 0x2cU, 0x1aU, 0x1bU, 0x6eU, 0x5aU, 0xa0U,
1169     0x52U, 0x3bU, 0xd6U, 0xb3U, 0x29U, 0xe3U, 0x2fU, 0x84U,
1170     0x53U, 0xd1U, 0x00U, 0xedU, 0x20U, 0xfcU, 0xb1U, 0x5bU,
1171     0x6aU, 0xcbU, 0xbeU, 0x39U, 0x4aU, 0x4cU, 0x58U, 0xcfU,
1172     0xd0U, 0xefU, 0xaaU, 0xfbU, 0x43U, 0x4dU, 0x33U, 0x85U,
1173     0x45U, 0xf9U, 0x02U, 0x7fU, 0x50U, 0x3cU, 0x9fU, 0xa8U,
1174     0x51U, 0xa3U, 0x40U, 0x8fU, 0x92U, 0x9dU, 0x38U, 0xf5U,
1175     0xbcU, 0xb6U, 0xdaU, 0x21U, 0x10U, 0xffU, 0xf3U, 0xd2U,
1176     0xcdU, 0x0cU, 0x13U, 0xecU, 0x5fU, 0x97U, 0x44U, 0x17U,
1177     0xc4U, 0xa7U, 0x7eU, 0x3dU, 0x64U, 0x5dU, 0x19U, 0x73U,
1178     0x60U, 0x81U, 0x4fU, 0xdcU, 0x22U, 0x2aU, 0x90U, 0x88U,
1179     0x46U, 0xeeU, 0xb8U, 0x14U, 0xdeU, 0x5eU, 0x0bU, 0xdbU,
1180     0xe0U, 0x32U, 0x3aU, 0x0aU, 0x49U, 0x06U, 0x24U, 0x5cU,
1181     0xc2U, 0xd3U, 0xacU, 0x62U, 0x91U, 0x95U, 0xe4U, 0x79U,
1182     0xe7U, 0xc8U, 0x37U, 0x6dU, 0x8dU, 0xd5U, 0x4eU, 0xa9U,
1183     0x6cU, 0x56U, 0xf4U, 0xeaU, 0x65U, 0x7aU, 0xaeU, 0x08U,
1184     0xbaU, 0x78U, 0x25U, 0x2eU, 0x1cU, 0xa6U, 0xb4U, 0xc6U,
1185     0xe8U, 0xddU, 0x74U, 0x1fU, 0x4bU, 0xbdU, 0x8bU, 0x8aU,
1186     0x70U, 0x3eU, 0xb5U, 0x66U, 0x48U, 0x03U, 0xf6U, 0x0eU,
1187     0x61U, 0x35U, 0x57U, 0xb9U, 0x86U, 0xc1U, 0x1dU, 0x9eU,
1188     0xe1U, 0xf8U, 0x98U, 0x11U, 0x69U, 0xd9U, 0x8eU, 0x94U,
1189     0x9bU, 0x1eU, 0x87U, 0xe9U, 0xceU, 0x55U, 0x28U, 0xdfU,
1190     0x8cU, 0xa1U, 0x89U, 0x0dU, 0xbfU, 0xe6U, 0x42U, 0x68U,
1191     0x41U, 0x99U, 0x2dU, 0x0fU, 0xb0U, 0x54U, 0xbbU, 0x16U
1192 };
1193 static const u32 rcon[] = {
1194     0x01000000, 0x02000000, 0x04000000, 0x08000000,
1195     0x10000000, 0x20000000, 0x40000000, 0x80000000,
1196     0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
1197 };
1198
1199 /**
1200  * Expand the cipher key into the encryption key schedule.
1201  */
1202 int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
1203                         AES_KEY *key)
1204 {
1205     u32 *rk;
1206     int i = 0;
1207     u32 temp;
1208
1209     if (!userKey || !key)
1210         return -1;
1211     if (bits != 128 && bits != 192 && bits != 256)
1212         return -2;
1213
1214     rk = key->rd_key;
1215
1216     if (bits==128)
1217         key->rounds = 10;
1218     else if (bits==192)
1219         key->rounds = 12;
1220     else
1221         key->rounds = 14;
1222
1223     rk[0] = GETU32(userKey     );
1224     rk[1] = GETU32(userKey +  4);
1225     rk[2] = GETU32(userKey +  8);
1226     rk[3] = GETU32(userKey + 12);
1227     if (bits == 128) {
1228         while (1) {
1229             temp  = rk[3];
1230             rk[4] = rk[0] ^
1231                 ((u32)Te4[(temp >> 16) & 0xff] << 24) ^
1232                 ((u32)Te4[(temp >>  8) & 0xff] << 16) ^
1233                 ((u32)Te4[(temp      ) & 0xff] << 8) ^
1234                 ((u32)Te4[(temp >> 24)       ]) ^
1235                 rcon[i];
1236             rk[5] = rk[1] ^ rk[4];
1237             rk[6] = rk[2] ^ rk[5];
1238             rk[7] = rk[3] ^ rk[6];
1239             if (++i == 10) {
1240                 return 0;
1241             }
1242             rk += 4;
1243         }
1244     }
1245     rk[4] = GETU32(userKey + 16);
1246     rk[5] = GETU32(userKey + 20);
1247     if (bits == 192) {
1248         while (1) {
1249             temp = rk[ 5];
1250             rk[ 6] = rk[ 0] ^
1251                 ((u32)Te4[(temp >> 16) & 0xff] << 24) ^
1252                 ((u32)Te4[(temp >>  8) & 0xff] << 16) ^
1253                 ((u32)Te4[(temp      ) & 0xff] << 8) ^
1254                 ((u32)Te4[(temp >> 24)       ]) ^
1255                 rcon[i];
1256             rk[ 7] = rk[ 1] ^ rk[ 6];
1257             rk[ 8] = rk[ 2] ^ rk[ 7];
1258             rk[ 9] = rk[ 3] ^ rk[ 8];
1259             if (++i == 8) {
1260                 return 0;
1261             }
1262             rk[10] = rk[ 4] ^ rk[ 9];
1263             rk[11] = rk[ 5] ^ rk[10];
1264             rk += 6;
1265         }
1266     }
1267     rk[6] = GETU32(userKey + 24);
1268     rk[7] = GETU32(userKey + 28);
1269     if (bits == 256) {
1270         while (1) {
1271             temp = rk[ 7];
1272             rk[ 8] = rk[ 0] ^
1273                 ((u32)Te4[(temp >> 16) & 0xff] << 24) ^
1274                 ((u32)Te4[(temp >>  8) & 0xff] << 16) ^
1275                 ((u32)Te4[(temp      ) & 0xff] << 8) ^
1276                 ((u32)Te4[(temp >> 24)       ]) ^
1277                 rcon[i];
1278             rk[ 9] = rk[ 1] ^ rk[ 8];
1279             rk[10] = rk[ 2] ^ rk[ 9];
1280             rk[11] = rk[ 3] ^ rk[10];
1281             if (++i == 7) {
1282                 return 0;
1283             }
1284             temp = rk[11];
1285             rk[12] = rk[ 4] ^
1286                 ((u32)Te4[(temp >> 24)       ] << 24) ^
1287                 ((u32)Te4[(temp >> 16) & 0xff] << 16) ^
1288                 ((u32)Te4[(temp >>  8) & 0xff] << 8) ^
1289                 ((u32)Te4[(temp      ) & 0xff]);
1290             rk[13] = rk[ 5] ^ rk[12];
1291             rk[14] = rk[ 6] ^ rk[13];
1292             rk[15] = rk[ 7] ^ rk[14];
1293
1294             rk += 8;
1295         }
1296     }
1297     return 0;
1298 }
1299
1300 /**
1301  * Expand the cipher key into the decryption key schedule.
1302  */
1303 int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
1304                         AES_KEY *key)
1305 {
1306
1307     u32 *rk;
1308     int i, j, status;
1309     u32 temp;
1310
1311     /* first, start with an encryption schedule */
1312     status = AES_set_encrypt_key(userKey, bits, key);
1313     if (status < 0)
1314         return status;
1315
1316     rk = key->rd_key;
1317
1318     /* invert the order of the round keys: */
1319     for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
1320         temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
1321         temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
1322         temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
1323         temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
1324     }
1325     /* apply the inverse MixColumn transform to all round keys but the first and the last: */
1326     for (i = 1; i < (key->rounds); i++) {
1327         rk += 4;
1328         for (j = 0; j < 4; j++) {
1329             u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
1330
1331             tp1 = rk[j];
1332             m = tp1 & 0x80808080;
1333             tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
1334                 ((m - (m >> 7)) & 0x1b1b1b1b);
1335             m = tp2 & 0x80808080;
1336             tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
1337                 ((m - (m >> 7)) & 0x1b1b1b1b);
1338             m = tp4 & 0x80808080;
1339             tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
1340                 ((m - (m >> 7)) & 0x1b1b1b1b);
1341             tp9 = tp8 ^ tp1;
1342             tpb = tp9 ^ tp2;
1343             tpd = tp9 ^ tp4;
1344             tpe = tp8 ^ tp4 ^ tp2;
1345 #if defined(ROTATE)
1346             rk[j] = tpe ^ ROTATE(tpd,16) ^
1347                 ROTATE(tp9,24) ^ ROTATE(tpb,8);
1348 #else
1349             rk[j] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^ 
1350                 (tp9 >> 8) ^ (tp9 << 24) ^
1351                 (tpb >> 24) ^ (tpb << 8);
1352 #endif
1353         }
1354     }
1355     return 0;
1356 }
1357
1358 #endif /* AES_ASM */