AArch64 assembly pack: authenticate return addresses.
[openssl.git] / crypto / aes / aes_core.c
1 /*
2  * Copyright 2002-2016 The OpenSSL Project Authors. All Rights Reserved.
3  *
4  * Licensed under the OpenSSL license (the "License").  You may not use
5  * this file except in compliance with the License.  You can obtain a copy
6  * in the file LICENSE in the source distribution or at
7  * https://www.openssl.org/source/license.html
8  */
9
10 /**
11  * rijndael-alg-fst.c
12  *
13  * @version 3.0 (December 2000)
14  *
15  * Optimised ANSI C code for the Rijndael cipher (now AES)
16  *
17  * @author Vincent Rijmen
18  * @author Antoon Bosselaers
19  * @author Paulo Barreto
20  *
21  * This code is hereby placed in the public domain.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
24  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
25  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
27  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
30  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
31  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
32  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
33  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35
36 /* Note: rewritten a little bit to provide error control and an OpenSSL-
37    compatible API */
38
39 #include <assert.h>
40
41 #include <stdlib.h>
42 #include <openssl/crypto.h>
43 #include <openssl/aes.h>
44 #include "aes_locl.h"
45
46 #ifndef AES_ASM
47 /*-
48 Te0[x] = S [x].[02, 01, 01, 03];
49 Te1[x] = S [x].[03, 02, 01, 01];
50 Te2[x] = S [x].[01, 03, 02, 01];
51 Te3[x] = S [x].[01, 01, 03, 02];
52
53 Td0[x] = Si[x].[0e, 09, 0d, 0b];
54 Td1[x] = Si[x].[0b, 0e, 09, 0d];
55 Td2[x] = Si[x].[0d, 0b, 0e, 09];
56 Td3[x] = Si[x].[09, 0d, 0b, 0e];
57 Td4[x] = Si[x].[01];
58 */
59
60 static const u32 Te0[256] = {
61     0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
62     0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
63     0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
64     0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
65     0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
66     0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
67     0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
68     0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
69     0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
70     0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
71     0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
72     0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
73     0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
74     0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
75     0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
76     0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
77     0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
78     0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
79     0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
80     0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
81     0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
82     0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
83     0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
84     0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
85     0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
86     0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
87     0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
88     0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
89     0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
90     0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
91     0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
92     0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
93     0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
94     0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
95     0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
96     0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
97     0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
98     0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
99     0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
100     0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
101     0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
102     0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
103     0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
104     0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
105     0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
106     0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
107     0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
108     0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
109     0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
110     0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
111     0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
112     0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
113     0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
114     0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
115     0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
116     0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
117     0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
118     0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
119     0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
120     0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
121     0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
122     0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
123     0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
124     0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
125 };
126 static const u32 Te1[256] = {
127     0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
128     0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
129     0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
130     0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
131     0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU,
132     0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
133     0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU,
134     0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
135     0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U,
136     0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
137     0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U,
138     0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
139     0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U,
140     0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
141     0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U,
142     0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
143     0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU,
144     0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
145     0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U,
146     0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
147     0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU,
148     0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
149     0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U,
150     0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
151     0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU,
152     0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
153     0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU,
154     0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
155     0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU,
156     0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
157     0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U,
158     0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
159     0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU,
160     0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
161     0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU,
162     0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
163     0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU,
164     0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
165     0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U,
166     0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
167     0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU,
168     0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
169     0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U,
170     0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
171     0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU,
172     0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
173     0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU,
174     0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
175     0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU,
176     0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
177     0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU,
178     0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
179     0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U,
180     0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
181     0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U,
182     0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
183     0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U,
184     0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
185     0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U,
186     0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
187     0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU,
188     0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
189     0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
190     0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U,
191 };
192 static const u32 Te2[256] = {
193     0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
194     0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
195     0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
196     0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
197     0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU,
198     0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
199     0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU,
200     0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
201     0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U,
202     0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
203     0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U,
204     0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
205     0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U,
206     0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
207     0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U,
208     0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
209     0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU,
210     0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
211     0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U,
212     0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
213     0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU,
214     0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
215     0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U,
216     0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
217     0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU,
218     0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
219     0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU,
220     0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
221     0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU,
222     0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
223     0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U,
224     0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
225     0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU,
226     0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
227     0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU,
228     0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
229     0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU,
230     0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
231     0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U,
232     0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
233     0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU,
234     0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
235     0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U,
236     0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
237     0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU,
238     0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
239     0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU,
240     0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
241     0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU,
242     0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
243     0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU,
244     0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
245     0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U,
246     0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
247     0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U,
248     0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
249     0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U,
250     0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
251     0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U,
252     0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
253     0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU,
254     0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
255     0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
256     0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
257 };
258 static const u32 Te3[256] = {
259     0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
260     0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
261     0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
262     0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
263     0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU,
264     0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
265     0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U,
266     0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
267     0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU,
268     0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
269     0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U,
270     0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
271     0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU,
272     0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
273     0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU,
274     0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
275     0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U,
276     0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
277     0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU,
278     0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
279     0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U,
280     0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
281     0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U,
282     0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
283     0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU,
284     0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
285     0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU,
286     0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
287     0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U,
288     0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
289     0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U,
290     0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
291     0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U,
292     0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
293     0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU,
294     0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
295     0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U,
296     0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
297     0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U,
298     0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
299     0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U,
300     0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
301     0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U,
302     0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
303     0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU,
304     0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
305     0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU,
306     0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
307     0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU,
308     0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
309     0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU,
310     0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
311     0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU,
312     0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
313     0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U,
314     0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
315     0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U,
316     0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
317     0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U,
318     0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
319     0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU,
320     0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
321     0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
322     0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
323 };
324
325 static const u32 Td0[256] = {
326     0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
327     0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
328     0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
329     0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
330     0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
331     0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
332     0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
333     0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
334     0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
335     0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
336     0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
337     0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
338     0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
339     0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
340     0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
341     0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
342     0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
343     0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
344     0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
345     0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
346     0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
347     0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
348     0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
349     0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
350     0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
351     0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
352     0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
353     0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
354     0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
355     0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
356     0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
357     0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
358     0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
359     0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
360     0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
361     0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
362     0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
363     0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
364     0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
365     0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
366     0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
367     0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
368     0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
369     0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
370     0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
371     0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
372     0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
373     0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
374     0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
375     0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
376     0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
377     0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
378     0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
379     0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
380     0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
381     0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
382     0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
383     0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
384     0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
385     0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
386     0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
387     0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
388     0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
389     0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
390 };
391 static const u32 Td1[256] = {
392     0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
393     0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
394     0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
395     0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
396     0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U,
397     0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
398     0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U,
399     0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
400     0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U,
401     0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
402     0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU,
403     0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
404     0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U,
405     0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
406     0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U,
407     0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
408     0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U,
409     0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
410     0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU,
411     0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
412     0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU,
413     0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
414     0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU,
415     0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
416     0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U,
417     0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
418     0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U,
419     0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
420     0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U,
421     0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
422     0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U,
423     0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
424     0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U,
425     0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
426     0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U,
427     0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
428     0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U,
429     0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
430     0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U,
431     0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
432     0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU,
433     0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
434     0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U,
435     0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
436     0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U,
437     0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
438     0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU,
439     0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
440     0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU,
441     0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
442     0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U,
443     0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
444     0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U,
445     0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
446     0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U,
447     0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
448     0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU,
449     0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
450     0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U,
451     0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
452     0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U,
453     0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
454     0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
455     0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U,
456 };
457 static const u32 Td2[256] = {
458     0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
459     0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
460     0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
461     0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
462     0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU,
463     0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
464     0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U,
465     0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
466     0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U,
467     0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
468     0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U,
469     0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
470     0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU,
471     0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
472     0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U,
473     0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
474     0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U,
475     0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
476     0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
477     0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
478     0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
479     0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
480     0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
481     0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
482     0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U,
483     0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
484     0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU,
485     0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
486     0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU,
487     0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
488     0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU,
489     0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
490     0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU,
491     0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
492     0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U,
493     0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
494     0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U,
495     0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
496     0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U,
497     0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
498     0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U,
499     0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
500     0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU,
501     0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
502     0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U,
503     0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
504     0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU,
505     0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
506     0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U,
507     0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
508     0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U,
509     0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
510     0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U,
511     0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
512     0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU,
513     0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
514     0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U,
515     0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
516     0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U,
517     0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
518     0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U,
519     0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
520     0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
521     0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U,
522 };
523 static const u32 Td3[256] = {
524     0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
525     0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
526     0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
527     0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
528     0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU,
529     0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
530     0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U,
531     0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
532     0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U,
533     0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
534     0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U,
535     0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
536     0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U,
537     0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
538     0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U,
539     0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
540     0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU,
541     0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
542     0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U,
543     0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
544     0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU,
545     0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
546     0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U,
547     0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
548     0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U,
549     0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
550     0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U,
551     0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
552     0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU,
553     0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
554     0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U,
555     0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
556     0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U,
557     0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
558     0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U,
559     0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
560     0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U,
561     0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
562     0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U,
563     0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
564     0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U,
565     0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
566     0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U,
567     0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
568     0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU,
569     0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
570     0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U,
571     0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
572     0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U,
573     0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
574     0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U,
575     0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
576     0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U,
577     0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
578     0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU,
579     0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
580     0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU,
581     0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
582     0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U,
583     0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
584     0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U,
585     0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
586     0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
587     0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
588 };
589 static const u8 Td4[256] = {
590     0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U,
591     0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU,
592     0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U,
593     0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU,
594     0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU,
595     0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU,
596     0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U,
597     0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U,
598     0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U,
599     0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U,
600     0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU,
601     0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U,
602     0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU,
603     0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U,
604     0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U,
605     0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU,
606     0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU,
607     0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U,
608     0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U,
609     0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU,
610     0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U,
611     0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU,
612     0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U,
613     0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U,
614     0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U,
615     0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU,
616     0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU,
617     0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU,
618     0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U,
619     0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U,
620     0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U,
621     0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU,
622 };
623 static const u32 rcon[] = {
624     0x01000000, 0x02000000, 0x04000000, 0x08000000,
625     0x10000000, 0x20000000, 0x40000000, 0x80000000,
626     0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
627 };
628
629 /**
630  * Expand the cipher key into the encryption key schedule.
631  */
632 int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
633                         AES_KEY *key)
634 {
635
636     u32 *rk;
637     int i = 0;
638     u32 temp;
639
640     if (!userKey || !key)
641         return -1;
642     if (bits != 128 && bits != 192 && bits != 256)
643         return -2;
644
645     rk = key->rd_key;
646
647     if (bits == 128)
648         key->rounds = 10;
649     else if (bits == 192)
650         key->rounds = 12;
651     else
652         key->rounds = 14;
653
654     rk[0] = GETU32(userKey     );
655     rk[1] = GETU32(userKey +  4);
656     rk[2] = GETU32(userKey +  8);
657     rk[3] = GETU32(userKey + 12);
658     if (bits == 128) {
659         while (1) {
660             temp  = rk[3];
661             rk[4] = rk[0] ^
662                 (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
663                 (Te3[(temp >>  8) & 0xff] & 0x00ff0000) ^
664                 (Te0[(temp      ) & 0xff] & 0x0000ff00) ^
665                 (Te1[(temp >> 24)       ] & 0x000000ff) ^
666                 rcon[i];
667             rk[5] = rk[1] ^ rk[4];
668             rk[6] = rk[2] ^ rk[5];
669             rk[7] = rk[3] ^ rk[6];
670             if (++i == 10) {
671                 return 0;
672             }
673             rk += 4;
674         }
675     }
676     rk[4] = GETU32(userKey + 16);
677     rk[5] = GETU32(userKey + 20);
678     if (bits == 192) {
679         while (1) {
680             temp = rk[ 5];
681             rk[ 6] = rk[ 0] ^
682                 (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
683                 (Te3[(temp >>  8) & 0xff] & 0x00ff0000) ^
684                 (Te0[(temp      ) & 0xff] & 0x0000ff00) ^
685                 (Te1[(temp >> 24)       ] & 0x000000ff) ^
686                 rcon[i];
687             rk[ 7] = rk[ 1] ^ rk[ 6];
688             rk[ 8] = rk[ 2] ^ rk[ 7];
689             rk[ 9] = rk[ 3] ^ rk[ 8];
690             if (++i == 8) {
691                 return 0;
692             }
693             rk[10] = rk[ 4] ^ rk[ 9];
694             rk[11] = rk[ 5] ^ rk[10];
695             rk += 6;
696         }
697     }
698     rk[6] = GETU32(userKey + 24);
699     rk[7] = GETU32(userKey + 28);
700     if (bits == 256) {
701         while (1) {
702             temp = rk[ 7];
703             rk[ 8] = rk[ 0] ^
704                 (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
705                 (Te3[(temp >>  8) & 0xff] & 0x00ff0000) ^
706                 (Te0[(temp      ) & 0xff] & 0x0000ff00) ^
707                 (Te1[(temp >> 24)       ] & 0x000000ff) ^
708                 rcon[i];
709             rk[ 9] = rk[ 1] ^ rk[ 8];
710             rk[10] = rk[ 2] ^ rk[ 9];
711             rk[11] = rk[ 3] ^ rk[10];
712             if (++i == 7) {
713                 return 0;
714             }
715             temp = rk[11];
716             rk[12] = rk[ 4] ^
717                 (Te2[(temp >> 24)       ] & 0xff000000) ^
718                 (Te3[(temp >> 16) & 0xff] & 0x00ff0000) ^
719                 (Te0[(temp >>  8) & 0xff] & 0x0000ff00) ^
720                 (Te1[(temp      ) & 0xff] & 0x000000ff);
721             rk[13] = rk[ 5] ^ rk[12];
722             rk[14] = rk[ 6] ^ rk[13];
723             rk[15] = rk[ 7] ^ rk[14];
724
725             rk += 8;
726             }
727     }
728     return 0;
729 }
730
731 /**
732  * Expand the cipher key into the decryption key schedule.
733  */
734 int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
735                         AES_KEY *key)
736 {
737
738     u32 *rk;
739     int i, j, status;
740     u32 temp;
741
742     /* first, start with an encryption schedule */
743     status = AES_set_encrypt_key(userKey, bits, key);
744     if (status < 0)
745         return status;
746
747     rk = key->rd_key;
748
749     /* invert the order of the round keys: */
750     for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
751         temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
752         temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
753         temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
754         temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
755     }
756     /* apply the inverse MixColumn transform to all round keys but the first and the last: */
757     for (i = 1; i < (key->rounds); i++) {
758         rk += 4;
759         rk[0] =
760             Td0[Te1[(rk[0] >> 24)       ] & 0xff] ^
761             Td1[Te1[(rk[0] >> 16) & 0xff] & 0xff] ^
762             Td2[Te1[(rk[0] >>  8) & 0xff] & 0xff] ^
763             Td3[Te1[(rk[0]      ) & 0xff] & 0xff];
764         rk[1] =
765             Td0[Te1[(rk[1] >> 24)       ] & 0xff] ^
766             Td1[Te1[(rk[1] >> 16) & 0xff] & 0xff] ^
767             Td2[Te1[(rk[1] >>  8) & 0xff] & 0xff] ^
768             Td3[Te1[(rk[1]      ) & 0xff] & 0xff];
769         rk[2] =
770             Td0[Te1[(rk[2] >> 24)       ] & 0xff] ^
771             Td1[Te1[(rk[2] >> 16) & 0xff] & 0xff] ^
772             Td2[Te1[(rk[2] >>  8) & 0xff] & 0xff] ^
773             Td3[Te1[(rk[2]      ) & 0xff] & 0xff];
774         rk[3] =
775             Td0[Te1[(rk[3] >> 24)       ] & 0xff] ^
776             Td1[Te1[(rk[3] >> 16) & 0xff] & 0xff] ^
777             Td2[Te1[(rk[3] >>  8) & 0xff] & 0xff] ^
778             Td3[Te1[(rk[3]      ) & 0xff] & 0xff];
779     }
780     return 0;
781 }
782
783 /*
784  * Encrypt a single block
785  * in and out can overlap
786  */
787 void AES_encrypt(const unsigned char *in, unsigned char *out,
788                  const AES_KEY *key) {
789
790     const u32 *rk;
791     u32 s0, s1, s2, s3, t0, t1, t2, t3;
792 #ifndef FULL_UNROLL
793     int r;
794 #endif /* ?FULL_UNROLL */
795
796     assert(in && out && key);
797     rk = key->rd_key;
798
799     /*
800      * map byte array block to cipher state
801      * and add initial round key:
802      */
803     s0 = GETU32(in     ) ^ rk[0];
804     s1 = GETU32(in +  4) ^ rk[1];
805     s2 = GETU32(in +  8) ^ rk[2];
806     s3 = GETU32(in + 12) ^ rk[3];
807 #ifdef FULL_UNROLL
808     /* round 1: */
809     t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4];
810     t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5];
811     t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6];
812     t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7];
813     /* round 2: */
814     s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 8];
815     s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 9];
816     s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[10];
817     s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[11];
818     /* round 3: */
819     t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[12];
820     t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[13];
821     t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[14];
822     t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[15];
823     /* round 4: */
824     s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[16];
825     s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[17];
826     s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[18];
827     s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[19];
828     /* round 5: */
829     t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[20];
830     t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[21];
831     t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[22];
832     t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[23];
833     /* round 6: */
834     s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[24];
835     s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[25];
836     s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[26];
837     s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[27];
838     /* round 7: */
839     t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[28];
840     t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[29];
841     t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[30];
842     t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[31];
843     /* round 8: */
844     s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[32];
845     s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[33];
846     s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[34];
847     s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[35];
848     /* round 9: */
849     t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[36];
850     t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[37];
851     t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[38];
852     t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[39];
853     if (key->rounds > 10) {
854         /* round 10: */
855         s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[40];
856         s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[41];
857         s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[42];
858         s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[43];
859         /* round 11: */
860         t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[44];
861         t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[45];
862         t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[46];
863         t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[47];
864         if (key->rounds > 12) {
865             /* round 12: */
866             s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[48];
867             s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[49];
868             s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[50];
869             s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[51];
870             /* round 13: */
871             t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[52];
872             t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[53];
873             t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[54];
874             t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[55];
875         }
876     }
877     rk += key->rounds << 2;
878 #else  /* !FULL_UNROLL */
879     /*
880      * Nr - 1 full rounds:
881      */
882     r = key->rounds >> 1;
883     for (;;) {
884         t0 =
885             Te0[(s0 >> 24)       ] ^
886             Te1[(s1 >> 16) & 0xff] ^
887             Te2[(s2 >>  8) & 0xff] ^
888             Te3[(s3      ) & 0xff] ^
889             rk[4];
890         t1 =
891             Te0[(s1 >> 24)       ] ^
892             Te1[(s2 >> 16) & 0xff] ^
893             Te2[(s3 >>  8) & 0xff] ^
894             Te3[(s0      ) & 0xff] ^
895             rk[5];
896         t2 =
897             Te0[(s2 >> 24)       ] ^
898             Te1[(s3 >> 16) & 0xff] ^
899             Te2[(s0 >>  8) & 0xff] ^
900             Te3[(s1      ) & 0xff] ^
901             rk[6];
902         t3 =
903             Te0[(s3 >> 24)       ] ^
904             Te1[(s0 >> 16) & 0xff] ^
905             Te2[(s1 >>  8) & 0xff] ^
906             Te3[(s2      ) & 0xff] ^
907             rk[7];
908
909         rk += 8;
910         if (--r == 0) {
911             break;
912         }
913
914         s0 =
915             Te0[(t0 >> 24)       ] ^
916             Te1[(t1 >> 16) & 0xff] ^
917             Te2[(t2 >>  8) & 0xff] ^
918             Te3[(t3      ) & 0xff] ^
919             rk[0];
920         s1 =
921             Te0[(t1 >> 24)       ] ^
922             Te1[(t2 >> 16) & 0xff] ^
923             Te2[(t3 >>  8) & 0xff] ^
924             Te3[(t0      ) & 0xff] ^
925             rk[1];
926         s2 =
927             Te0[(t2 >> 24)       ] ^
928             Te1[(t3 >> 16) & 0xff] ^
929             Te2[(t0 >>  8) & 0xff] ^
930             Te3[(t1      ) & 0xff] ^
931             rk[2];
932         s3 =
933             Te0[(t3 >> 24)       ] ^
934             Te1[(t0 >> 16) & 0xff] ^
935             Te2[(t1 >>  8) & 0xff] ^
936             Te3[(t2      ) & 0xff] ^
937             rk[3];
938     }
939 #endif /* ?FULL_UNROLL */
940     /*
941      * apply last round and
942      * map cipher state to byte array block:
943      */
944     s0 =
945         (Te2[(t0 >> 24)       ] & 0xff000000) ^
946         (Te3[(t1 >> 16) & 0xff] & 0x00ff0000) ^
947         (Te0[(t2 >>  8) & 0xff] & 0x0000ff00) ^
948         (Te1[(t3      ) & 0xff] & 0x000000ff) ^
949         rk[0];
950     PUTU32(out     , s0);
951     s1 =
952         (Te2[(t1 >> 24)       ] & 0xff000000) ^
953         (Te3[(t2 >> 16) & 0xff] & 0x00ff0000) ^
954         (Te0[(t3 >>  8) & 0xff] & 0x0000ff00) ^
955         (Te1[(t0      ) & 0xff] & 0x000000ff) ^
956         rk[1];
957     PUTU32(out +  4, s1);
958     s2 =
959         (Te2[(t2 >> 24)       ] & 0xff000000) ^
960         (Te3[(t3 >> 16) & 0xff] & 0x00ff0000) ^
961         (Te0[(t0 >>  8) & 0xff] & 0x0000ff00) ^
962         (Te1[(t1      ) & 0xff] & 0x000000ff) ^
963         rk[2];
964     PUTU32(out +  8, s2);
965     s3 =
966         (Te2[(t3 >> 24)       ] & 0xff000000) ^
967         (Te3[(t0 >> 16) & 0xff] & 0x00ff0000) ^
968         (Te0[(t1 >>  8) & 0xff] & 0x0000ff00) ^
969         (Te1[(t2      ) & 0xff] & 0x000000ff) ^
970         rk[3];
971     PUTU32(out + 12, s3);
972 }
973
974 /*
975  * Decrypt a single block
976  * in and out can overlap
977  */
978 void AES_decrypt(const unsigned char *in, unsigned char *out,
979                  const AES_KEY *key)
980 {
981
982     const u32 *rk;
983     u32 s0, s1, s2, s3, t0, t1, t2, t3;
984 #ifndef FULL_UNROLL
985     int r;
986 #endif /* ?FULL_UNROLL */
987
988     assert(in && out && key);
989     rk = key->rd_key;
990
991     /*
992      * map byte array block to cipher state
993      * and add initial round key:
994      */
995     s0 = GETU32(in     ) ^ rk[0];
996     s1 = GETU32(in +  4) ^ rk[1];
997     s2 = GETU32(in +  8) ^ rk[2];
998     s3 = GETU32(in + 12) ^ rk[3];
999 #ifdef FULL_UNROLL
1000     /* round 1: */
1001     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[ 4];
1002     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[ 5];
1003     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[ 6];
1004     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[ 7];
1005     /* round 2: */
1006     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[ 8];
1007     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[ 9];
1008     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[10];
1009     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[11];
1010     /* round 3: */
1011     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[12];
1012     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[13];
1013     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[14];
1014     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[15];
1015     /* round 4: */
1016     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[16];
1017     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[17];
1018     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[18];
1019     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[19];
1020     /* round 5: */
1021     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[20];
1022     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[21];
1023     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[22];
1024     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[23];
1025     /* round 6: */
1026     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[24];
1027     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[25];
1028     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[26];
1029     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[27];
1030     /* round 7: */
1031     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[28];
1032     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[29];
1033     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[30];
1034     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[31];
1035     /* round 8: */
1036     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[32];
1037     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[33];
1038     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[34];
1039     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[35];
1040     /* round 9: */
1041     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[36];
1042     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[37];
1043     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[38];
1044     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[39];
1045     if (key->rounds > 10) {
1046         /* round 10: */
1047         s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[40];
1048         s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[41];
1049         s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[42];
1050         s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[43];
1051         /* round 11: */
1052         t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[44];
1053         t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[45];
1054         t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[46];
1055         t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[47];
1056         if (key->rounds > 12) {
1057             /* round 12: */
1058             s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[48];
1059             s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[49];
1060             s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[50];
1061             s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[51];
1062             /* round 13: */
1063             t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[52];
1064             t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[53];
1065             t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[54];
1066             t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[55];
1067         }
1068     }
1069     rk += key->rounds << 2;
1070 #else  /* !FULL_UNROLL */
1071     /*
1072      * Nr - 1 full rounds:
1073      */
1074     r = key->rounds >> 1;
1075     for (;;) {
1076         t0 =
1077             Td0[(s0 >> 24)       ] ^
1078             Td1[(s3 >> 16) & 0xff] ^
1079             Td2[(s2 >>  8) & 0xff] ^
1080             Td3[(s1      ) & 0xff] ^
1081             rk[4];
1082         t1 =
1083             Td0[(s1 >> 24)       ] ^
1084             Td1[(s0 >> 16) & 0xff] ^
1085             Td2[(s3 >>  8) & 0xff] ^
1086             Td3[(s2      ) & 0xff] ^
1087             rk[5];
1088         t2 =
1089             Td0[(s2 >> 24)       ] ^
1090             Td1[(s1 >> 16) & 0xff] ^
1091             Td2[(s0 >>  8) & 0xff] ^
1092             Td3[(s3      ) & 0xff] ^
1093             rk[6];
1094         t3 =
1095             Td0[(s3 >> 24)       ] ^
1096             Td1[(s2 >> 16) & 0xff] ^
1097             Td2[(s1 >>  8) & 0xff] ^
1098             Td3[(s0      ) & 0xff] ^
1099             rk[7];
1100
1101         rk += 8;
1102         if (--r == 0) {
1103             break;
1104         }
1105
1106         s0 =
1107             Td0[(t0 >> 24)       ] ^
1108             Td1[(t3 >> 16) & 0xff] ^
1109             Td2[(t2 >>  8) & 0xff] ^
1110             Td3[(t1      ) & 0xff] ^
1111             rk[0];
1112         s1 =
1113             Td0[(t1 >> 24)       ] ^
1114             Td1[(t0 >> 16) & 0xff] ^
1115             Td2[(t3 >>  8) & 0xff] ^
1116             Td3[(t2      ) & 0xff] ^
1117             rk[1];
1118         s2 =
1119             Td0[(t2 >> 24)       ] ^
1120             Td1[(t1 >> 16) & 0xff] ^
1121             Td2[(t0 >>  8) & 0xff] ^
1122             Td3[(t3      ) & 0xff] ^
1123             rk[2];
1124         s3 =
1125             Td0[(t3 >> 24)       ] ^
1126             Td1[(t2 >> 16) & 0xff] ^
1127             Td2[(t1 >>  8) & 0xff] ^
1128             Td3[(t0      ) & 0xff] ^
1129             rk[3];
1130     }
1131 #endif /* ?FULL_UNROLL */
1132     /*
1133      * apply last round and
1134      * map cipher state to byte array block:
1135      */
1136     s0 =
1137         ((u32)Td4[(t0 >> 24)       ] << 24) ^
1138         ((u32)Td4[(t3 >> 16) & 0xff] << 16) ^
1139         ((u32)Td4[(t2 >>  8) & 0xff] <<  8) ^
1140         ((u32)Td4[(t1      ) & 0xff])       ^
1141         rk[0];
1142     PUTU32(out     , s0);
1143     s1 =
1144         ((u32)Td4[(t1 >> 24)       ] << 24) ^
1145         ((u32)Td4[(t0 >> 16) & 0xff] << 16) ^
1146         ((u32)Td4[(t3 >>  8) & 0xff] <<  8) ^
1147         ((u32)Td4[(t2      ) & 0xff])       ^
1148         rk[1];
1149     PUTU32(out +  4, s1);
1150     s2 =
1151         ((u32)Td4[(t2 >> 24)       ] << 24) ^
1152         ((u32)Td4[(t1 >> 16) & 0xff] << 16) ^
1153         ((u32)Td4[(t0 >>  8) & 0xff] <<  8) ^
1154         ((u32)Td4[(t3      ) & 0xff])       ^
1155         rk[2];
1156     PUTU32(out +  8, s2);
1157     s3 =
1158         ((u32)Td4[(t3 >> 24)       ] << 24) ^
1159         ((u32)Td4[(t2 >> 16) & 0xff] << 16) ^
1160         ((u32)Td4[(t1 >>  8) & 0xff] <<  8) ^
1161         ((u32)Td4[(t0      ) & 0xff])       ^
1162         rk[3];
1163     PUTU32(out + 12, s3);
1164 }
1165
1166 #else /* AES_ASM */
1167
1168 static const u8 Te4[256] = {
1169     0x63U, 0x7cU, 0x77U, 0x7bU, 0xf2U, 0x6bU, 0x6fU, 0xc5U,
1170     0x30U, 0x01U, 0x67U, 0x2bU, 0xfeU, 0xd7U, 0xabU, 0x76U,
1171     0xcaU, 0x82U, 0xc9U, 0x7dU, 0xfaU, 0x59U, 0x47U, 0xf0U,
1172     0xadU, 0xd4U, 0xa2U, 0xafU, 0x9cU, 0xa4U, 0x72U, 0xc0U,
1173     0xb7U, 0xfdU, 0x93U, 0x26U, 0x36U, 0x3fU, 0xf7U, 0xccU,
1174     0x34U, 0xa5U, 0xe5U, 0xf1U, 0x71U, 0xd8U, 0x31U, 0x15U,
1175     0x04U, 0xc7U, 0x23U, 0xc3U, 0x18U, 0x96U, 0x05U, 0x9aU,
1176     0x07U, 0x12U, 0x80U, 0xe2U, 0xebU, 0x27U, 0xb2U, 0x75U,
1177     0x09U, 0x83U, 0x2cU, 0x1aU, 0x1bU, 0x6eU, 0x5aU, 0xa0U,
1178     0x52U, 0x3bU, 0xd6U, 0xb3U, 0x29U, 0xe3U, 0x2fU, 0x84U,
1179     0x53U, 0xd1U, 0x00U, 0xedU, 0x20U, 0xfcU, 0xb1U, 0x5bU,
1180     0x6aU, 0xcbU, 0xbeU, 0x39U, 0x4aU, 0x4cU, 0x58U, 0xcfU,
1181     0xd0U, 0xefU, 0xaaU, 0xfbU, 0x43U, 0x4dU, 0x33U, 0x85U,
1182     0x45U, 0xf9U, 0x02U, 0x7fU, 0x50U, 0x3cU, 0x9fU, 0xa8U,
1183     0x51U, 0xa3U, 0x40U, 0x8fU, 0x92U, 0x9dU, 0x38U, 0xf5U,
1184     0xbcU, 0xb6U, 0xdaU, 0x21U, 0x10U, 0xffU, 0xf3U, 0xd2U,
1185     0xcdU, 0x0cU, 0x13U, 0xecU, 0x5fU, 0x97U, 0x44U, 0x17U,
1186     0xc4U, 0xa7U, 0x7eU, 0x3dU, 0x64U, 0x5dU, 0x19U, 0x73U,
1187     0x60U, 0x81U, 0x4fU, 0xdcU, 0x22U, 0x2aU, 0x90U, 0x88U,
1188     0x46U, 0xeeU, 0xb8U, 0x14U, 0xdeU, 0x5eU, 0x0bU, 0xdbU,
1189     0xe0U, 0x32U, 0x3aU, 0x0aU, 0x49U, 0x06U, 0x24U, 0x5cU,
1190     0xc2U, 0xd3U, 0xacU, 0x62U, 0x91U, 0x95U, 0xe4U, 0x79U,
1191     0xe7U, 0xc8U, 0x37U, 0x6dU, 0x8dU, 0xd5U, 0x4eU, 0xa9U,
1192     0x6cU, 0x56U, 0xf4U, 0xeaU, 0x65U, 0x7aU, 0xaeU, 0x08U,
1193     0xbaU, 0x78U, 0x25U, 0x2eU, 0x1cU, 0xa6U, 0xb4U, 0xc6U,
1194     0xe8U, 0xddU, 0x74U, 0x1fU, 0x4bU, 0xbdU, 0x8bU, 0x8aU,
1195     0x70U, 0x3eU, 0xb5U, 0x66U, 0x48U, 0x03U, 0xf6U, 0x0eU,
1196     0x61U, 0x35U, 0x57U, 0xb9U, 0x86U, 0xc1U, 0x1dU, 0x9eU,
1197     0xe1U, 0xf8U, 0x98U, 0x11U, 0x69U, 0xd9U, 0x8eU, 0x94U,
1198     0x9bU, 0x1eU, 0x87U, 0xe9U, 0xceU, 0x55U, 0x28U, 0xdfU,
1199     0x8cU, 0xa1U, 0x89U, 0x0dU, 0xbfU, 0xe6U, 0x42U, 0x68U,
1200     0x41U, 0x99U, 0x2dU, 0x0fU, 0xb0U, 0x54U, 0xbbU, 0x16U
1201 };
1202 static const u32 rcon[] = {
1203     0x01000000, 0x02000000, 0x04000000, 0x08000000,
1204     0x10000000, 0x20000000, 0x40000000, 0x80000000,
1205     0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
1206 };
1207
1208 /**
1209  * Expand the cipher key into the encryption key schedule.
1210  */
1211 int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
1212                         AES_KEY *key)
1213 {
1214     u32 *rk;
1215     int i = 0;
1216     u32 temp;
1217
1218     if (!userKey || !key)
1219         return -1;
1220     if (bits != 128 && bits != 192 && bits != 256)
1221         return -2;
1222
1223     rk = key->rd_key;
1224
1225     if (bits == 128)
1226         key->rounds = 10;
1227     else if (bits == 192)
1228         key->rounds = 12;
1229     else
1230         key->rounds = 14;
1231
1232     rk[0] = GETU32(userKey     );
1233     rk[1] = GETU32(userKey +  4);
1234     rk[2] = GETU32(userKey +  8);
1235     rk[3] = GETU32(userKey + 12);
1236     if (bits == 128) {
1237         while (1) {
1238             temp  = rk[3];
1239             rk[4] = rk[0] ^
1240                 ((u32)Te4[(temp >> 16) & 0xff] << 24) ^
1241                 ((u32)Te4[(temp >>  8) & 0xff] << 16) ^
1242                 ((u32)Te4[(temp      ) & 0xff] << 8) ^
1243                 ((u32)Te4[(temp >> 24)       ]) ^
1244                 rcon[i];
1245             rk[5] = rk[1] ^ rk[4];
1246             rk[6] = rk[2] ^ rk[5];
1247             rk[7] = rk[3] ^ rk[6];
1248             if (++i == 10) {
1249                 return 0;
1250             }
1251             rk += 4;
1252         }
1253     }
1254     rk[4] = GETU32(userKey + 16);
1255     rk[5] = GETU32(userKey + 20);
1256     if (bits == 192) {
1257         while (1) {
1258             temp = rk[ 5];
1259             rk[ 6] = rk[ 0] ^
1260                 ((u32)Te4[(temp >> 16) & 0xff] << 24) ^
1261                 ((u32)Te4[(temp >>  8) & 0xff] << 16) ^
1262                 ((u32)Te4[(temp      ) & 0xff] << 8) ^
1263                 ((u32)Te4[(temp >> 24)       ]) ^
1264                 rcon[i];
1265             rk[ 7] = rk[ 1] ^ rk[ 6];
1266             rk[ 8] = rk[ 2] ^ rk[ 7];
1267             rk[ 9] = rk[ 3] ^ rk[ 8];
1268             if (++i == 8) {
1269                 return 0;
1270             }
1271             rk[10] = rk[ 4] ^ rk[ 9];
1272             rk[11] = rk[ 5] ^ rk[10];
1273             rk += 6;
1274         }
1275     }
1276     rk[6] = GETU32(userKey + 24);
1277     rk[7] = GETU32(userKey + 28);
1278     if (bits == 256) {
1279         while (1) {
1280             temp = rk[ 7];
1281             rk[ 8] = rk[ 0] ^
1282                 ((u32)Te4[(temp >> 16) & 0xff] << 24) ^
1283                 ((u32)Te4[(temp >>  8) & 0xff] << 16) ^
1284                 ((u32)Te4[(temp      ) & 0xff] << 8) ^
1285                 ((u32)Te4[(temp >> 24)       ]) ^
1286                 rcon[i];
1287             rk[ 9] = rk[ 1] ^ rk[ 8];
1288             rk[10] = rk[ 2] ^ rk[ 9];
1289             rk[11] = rk[ 3] ^ rk[10];
1290             if (++i == 7) {
1291                 return 0;
1292             }
1293             temp = rk[11];
1294             rk[12] = rk[ 4] ^
1295                 ((u32)Te4[(temp >> 24)       ] << 24) ^
1296                 ((u32)Te4[(temp >> 16) & 0xff] << 16) ^
1297                 ((u32)Te4[(temp >>  8) & 0xff] << 8) ^
1298                 ((u32)Te4[(temp      ) & 0xff]);
1299             rk[13] = rk[ 5] ^ rk[12];
1300             rk[14] = rk[ 6] ^ rk[13];
1301             rk[15] = rk[ 7] ^ rk[14];
1302
1303             rk += 8;
1304         }
1305     }
1306     return 0;
1307 }
1308
1309 /**
1310  * Expand the cipher key into the decryption key schedule.
1311  */
1312 int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
1313                         AES_KEY *key)
1314 {
1315
1316     u32 *rk;
1317     int i, j, status;
1318     u32 temp;
1319
1320     /* first, start with an encryption schedule */
1321     status = AES_set_encrypt_key(userKey, bits, key);
1322     if (status < 0)
1323         return status;
1324
1325     rk = key->rd_key;
1326
1327     /* invert the order of the round keys: */
1328     for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
1329         temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
1330         temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
1331         temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
1332         temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
1333     }
1334     /* apply the inverse MixColumn transform to all round keys but the first and the last: */
1335     for (i = 1; i < (key->rounds); i++) {
1336         rk += 4;
1337         for (j = 0; j < 4; j++) {
1338             u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
1339
1340             tp1 = rk[j];
1341             m = tp1 & 0x80808080;
1342             tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
1343                 ((m - (m >> 7)) & 0x1b1b1b1b);
1344             m = tp2 & 0x80808080;
1345             tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
1346                 ((m - (m >> 7)) & 0x1b1b1b1b);
1347             m = tp4 & 0x80808080;
1348             tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
1349                 ((m - (m >> 7)) & 0x1b1b1b1b);
1350             tp9 = tp8 ^ tp1;
1351             tpb = tp9 ^ tp2;
1352             tpd = tp9 ^ tp4;
1353             tpe = tp8 ^ tp4 ^ tp2;
1354 #if defined(ROTATE)
1355             rk[j] = tpe ^ ROTATE(tpd,16) ^
1356                 ROTATE(tp9,24) ^ ROTATE(tpb,8);
1357 #else
1358             rk[j] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
1359                 (tp9 >> 8) ^ (tp9 << 24) ^
1360                 (tpb >> 24) ^ (tpb << 8);
1361 #endif
1362         }
1363     }
1364     return 0;
1365 }
1366
1367 #endif /* AES_ASM */